]> git.phdru.name Git - phdru.name/phdru.name.git/commitdiff
Fix(reindex_blog): Fix encodings
authorOleg Broytman <phd@phdru.name>
Sun, 12 Dec 2021 23:17:14 +0000 (02:17 +0300)
committerOleg Broytman <phd@phdru.name>
Sun, 12 Dec 2021 23:35:52 +0000 (02:35 +0300)
reindex_blog.py

index 291e182eee8ddffff655114f347e50b9da54b97a..0f1a8f5f9b185fe34ccecbdf8f2b297f3bd0b3d0 100755 (executable)
@@ -2,7 +2,7 @@
 # -*- coding: koi8-r -*-
 
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2006-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2006-2021 PhiloSoft Design"
 
 import sys, os
 from Cheetah.Template import Template
@@ -482,6 +482,8 @@ class AbsURLHTMLParser(_HTMLParser):
       self.accumulator += '<a'
       for attrname, value in attrs:
          value = cgi.escape(value, True)
+         if isinstance(value, unicode):
+            value = value.encode('koi8-r')
          if attrname == 'href':
             self.accumulator += ' href="%s"' % urljoin(self.base, value)
          else:
@@ -546,8 +548,17 @@ for item in tuple(reversed(all_titles_tags))[:10]:
    item.categoryList = tags
    body = bodies[(year, month, day, file)]
    body = absolute_urls(body, baseURL + url_path)
+   try:
+       body.decode('utf-8')
+   except UnicodeDecodeError:
+       body = body.decode('koi8-r').encode('utf-8')
    item.body = body
-   item.excerpt = get_first_p(body)
+   excerpt = get_first_p(body)
+   try:
+       excerpt.decode('utf-8')
+   except UnicodeDecodeError:
+       excerpt = excerpt.decode('koi8-r').encode('utf-8')
+   item.excerpt = excerpt
 
 namespace = {
    "title": "Oleg Broytman's blog",