X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=reindex_blog.py;h=4674ae0ea575628fb73864acf5fdc686fb6e4807;hb=1bccabe63f4f3ddc450c1e37edd606b36671ae21;hp=0ba0804d4c1d458628744a4c9d8ba5f6c2781744;hpb=251dd1739e176805010998623456b6672283159e;p=phdru.name%2Fphdru.name.git diff --git a/reindex_blog.py b/reindex_blog.py index 0ba0804..4674ae0 100755 --- a/reindex_blog.py +++ b/reindex_blog.py @@ -1,12 +1,8 @@ #! /usr/bin/env python # -*- coding: koi8-r -*- -__version__ = "$Revision$"[11:-2] -__revision__ = "$Id$"[5:-2] -__date__ = "$Date$"[7:-2] -__author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2006-2010 PhiloSoft Design" - +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2006-2013 PhiloSoft Design" import sys, os @@ -39,6 +35,10 @@ else: blog = {} years = {} +# bodies is a dictionary mapping file => body + +bodies = {} + # Walk the directory recursively for dirpath, dirs, files in os.walk(blog_root): d = os.path.basename(dirpath) @@ -49,13 +49,14 @@ for dirpath, dirs, files in os.walk(blog_root): continue fullpath = os.path.join(dirpath, file) template = Template(file=fullpath) - title_parts = template.Title.split() + title_parts = template.Title.decode('utf-8').encode('koi8-r').split() title = ' '.join(title_parts[6:]) - lead = getattr(template, "Lead", None) + lead = template.Lead.decode('utf-8').encode('koi8-r') tags = template.Tag if isinstance(tags, basestring): tags = (tags,) + tags = [tag.decode('utf-8').encode('koi8-r') for tag in tags] if title: key = year, month, day = tuple(dirpath[len(blog_root):].split(os.sep)[1:]) @@ -77,6 +78,12 @@ for dirpath, dirs, files in os.walk(blog_root): if day not in days: days.append(day) + file = file[:-len("tmpl")] + "html" + key = (year, month, day, file) + body = template.body() + if isinstance(body, unicode): + body = body.encode('koi8-r') + bodies[key] = body # Need to save the blog? if blog <> old_blog: @@ -121,7 +128,8 @@ def write_template(level, year, month, day, titles, tags=None): new_text = ["""\ ## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. -#extends phd_pp_ru +#encoding koi8-r +#extends phd_site #implements respond """] @@ -130,8 +138,12 @@ def write_template(level, year, month, day, titles, tags=None): #attr $Title = "Oleg Broytman's blog" #attr $Description = "Broytman Russian Blog Index Document" #attr $Copyright = %(cyear)s -#attr $alternates = (("News [Atom 1.0]", "application/atom+xml", "atom_10.xml"), - ("News [RSS 2.0]", "application/rss+xml", "rss_20.xml") +#attr $alternates = (("îÏ×ÏÓÔÉ [Atom 1.0] ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ", "application/atom+xml", "atom_10_titles.xml"), + ("îÏ×ÏÓÔÉ [Atom 1.0]", "application/atom+xml", "atom_10.xml"), + ("îÏ×ÏÓÔÉ [Atom 1.0] ÐÏÌÎÙÅ ÔÅËÓÔÙ", "application/atom+xml", "atom_10_full.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0] ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ", "application/rss+xml", "rss_20_titles.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0]", "application/rss+xml", "rss_20.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0] ÐÏÌÎÙÅ ÔÅËÓÔÙ", "application/rss+xml", "rss_20_full.xml"), ) ## #def body_html @@ -169,7 +181,7 @@ def write_template(level, year, month, day, titles, tags=None): if len(titles) == 1: new_text.append("""\ -#attr $refresh = "0; URL=%s" +#attr $Refresh = "0; URL=%s" """ % titles[0][3]) new_text.append("""\ @@ -207,23 +219,25 @@ def write_template(level, year, month, day, titles, tags=None): else: new_text.append('\n

%s %s

' % (day, months_names_ru[int(month)])) save_date = year, month, day - if lead: - lead = lead + ' ' - else: - lead = '' new_text.append('''

%s%s.

-''' % (lead, href, title)) +''' % (lead+' ' if lead else '', href, title)) if level == 0: new_text.append("""

îÏ×ÏÓÔÅ×ÁÑ ÌÅÎÔÁ × ÆÏÒÍÁÔÁÈ -Atom 1.0 RSS 2.0 . + +Atom 1.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ / +Atom 1.0 / +Atom 1.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ +RSS 2.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ / +RSS 2.0 / +RSS 2.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ.

""") @@ -245,18 +259,25 @@ def write_template(level, year, month, day, titles, tags=None): ''') max_year = int(sorted(years.keys())[-1]) - years = range(2005, max_year+1) + years = range(max_year, 2005, -1) new_text.append('''

ðÏ ÇÏÄÁÍ: ''') + + year_counts = {} + for year, month, day, file, title, lead in all_titles: + year_counts[year] = 0 + for year, month, day, file, title, lead in all_titles: + year_counts[year] += 1 + first_year = True for year in years: if first_year: first_year = False else: new_text.append(' - ') - new_text.append('%s' % (year, year)) + new_text.append('%s (%d)' % (year, year, year_counts[str(year)])) new_text.append('''

''') @@ -268,7 +289,7 @@ def write_template(level, year, month, day, titles, tags=None): new_text.append("""\ #end def -$phd_pp_ru.respond(self) +$phd_site.respond(self) """) write_if_changed(index_name, ''.join(new_text)) @@ -321,7 +342,8 @@ write_template(0, year, month, day, all_titles[-20:], all_tags) new_text = ["""\ ## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. -#extends phd_pp_ru +#encoding koi8-r +#extends phd_site #implements respond #attr $Title = "Oleg Broytman's blog: tags" #attr $Description = "Broytman Russian Blog Tags Index Document" @@ -330,6 +352,27 @@ new_text = ["""\ #def body_html

ôÅÇÉ

+

+æÏÒÍÁ ÐÏÉÓËÁ ÐÏÚ×ÏÌÑÅÔ ÉÓËÁÔØ ÓÏÏÂÝÅÎÉÑ × ÂÌÏÇÅ, ÓÏÏÔ×ÅÔÓÔ×ÕÀÝÉÅ ×ÙÒÁÖÅÎÉÀ. +óÉÎÔÁËÓÉÓ ×ÙÒÁÖÅÎÉÑ:

+ðÒÏÂÅÌÙ ÎÅ ÄÏÐÕÓËÁÀÔÓÑ. ðÒÉÍÅÒÙ ×ÙÒÁÖÅÎÉÊ: linux - ÐÒÏÉÚÏÊÄ£Ô ÐÅÒÅÎÁÐÒÁ×ÌÅÎÉÅ +ÎÁ ÓÔÒÁÎÉÃÕ linux.html; linux&!audio - ÉÓËÁÔØ ÚÁÐÉÓÉ × ËÏÔÏÒÙÈ ÅÓÔØ ÔÅÇ +linux É ÎÅÔ ÔÅÇÁ audio. +

+ +
+
+ + +
+
+

"""] @@ -352,7 +395,8 @@ for i, (count, tag, links) in enumerate(all_tags): tag_text = ["""\ ## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. -#extends phd_pp_ru +#encoding koi8-r +#extends phd_site #implements respond #attr $Title = "Oleg Broytman's blog: tag %s" #attr $Description = "Broytman Russian Blog Tag %s Index Document" @@ -388,12 +432,8 @@ for i, (count, tag, links) in enumerate(all_tags): count = 0 for year, month, day, filename, title, lead in reversed(links): - if lead: - lead = lead + ' ' - else: - lead = '' link = "../%s/%s/%s/%s" % (year, month, day, filename) - item_text = """
  • %s/%s/%s: %s%s
  • """ % (link, year, month, day, lead, title) + item_text = """
  • %s/%s/%s: %s%s
  • """ % (link, year, month, day, lead+' ' if lead else '', title) count += 1 if count <= 5: @@ -405,7 +445,7 @@ for i, (count, tag, links) in enumerate(all_tags):

    #end def -$phd_pp_ru.respond(self) +$phd_site.respond(self) """) write_if_changed(os.path.join(blog_root, "tags", tag+".tmpl"), ''.join(tag_text)) @@ -413,35 +453,121 @@ new_text.append("""\

    #end def -$phd_pp_ru.respond(self) +$phd_site.respond(self) """) write_if_changed(os.path.join(blog_root, "tags", "index.tmpl"), ''.join(new_text)) +from HTMLParser import HTMLParseError +import cgi +from urlparse import urljoin +from m_lib.net.www.html import HTMLParser as _HTMLParser + +class HTMLDone(Exception): pass + + +class FirstPHTMLParser(_HTMLParser): + def __init__(self): + _HTMLParser.__init__(self) + self.first_p = None + + def start_p(self, attrs): + self.accumulator = '

    ' + + def end_p(self): + self.first_p = self.accumulator + '

    ' + raise HTMLDone() + +def get_first_p(body): + parser = FirstPHTMLParser() + + try: + parser.feed(body) + except (HTMLParseError, HTMLDone): + pass + + try: + parser.close() + except (HTMLParseError, HTMLDone): + pass + + return parser.first_p + + +class AbsURLHTMLParser(_HTMLParser): + def __init__(self, base): + _HTMLParser.__init__(self) + self.base = base + + def start_a(self, attrs): + self.accumulator += '