X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=sidebyside;f=reindex_blog.py;h=beba7c12823ae151a8f5de6619c9776758b9d713;hb=6922322a54539e39cbba59653868fa9d749fa274;hp=a57f1d2e5db28e9225889729f603ae872af7de52;hpb=a024788d10259bae7f6cd138435959167cdce7ce;p=phdru.name%2Fphdru.name.git diff --git a/reindex_blog.py b/reindex_blog.py index a57f1d2..beba7c1 100755 --- a/reindex_blog.py +++ b/reindex_blog.py @@ -39,6 +39,10 @@ else: blog = {} years = {} +# bodies is a dictionary mapping file => body + +bodies = {} + # Walk the directory recursively for dirpath, dirs, files in os.walk(blog_root): d = os.path.basename(dirpath) @@ -51,7 +55,7 @@ for dirpath, dirs, files in os.walk(blog_root): template = Template(file=fullpath) title_parts = template.Title.split() title = ' '.join(title_parts[6:]) - lead = getattr(template, "Lead", None) + lead = template.Lead tags = template.Tag if isinstance(tags, basestring): @@ -77,6 +81,9 @@ for dirpath, dirs, files in os.walk(blog_root): if day not in days: days.append(day) + file = file[:-len("tmpl")] + "html" + key = (year, month, day, file) + bodies[key] = template.body() # Need to save the blog? if blog <> old_blog: @@ -130,8 +137,12 @@ def write_template(level, year, month, day, titles, tags=None): #attr $Title = "Oleg Broytman's blog" #attr $Description = "Broytman Russian Blog Index Document" #attr $Copyright = %(cyear)s -#attr $alternates = (("News [Atom 1.0]", "application/atom+xml", "atom_10.xml"), - ("News [RSS 2.0]", "application/rss+xml", "rss_20.xml") +#attr $alternates = (("îÏ×ÏÓÔÉ [Atom 1.0] ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ", "application/atom+xml", "atom_10_titles.xml"), + ("îÏ×ÏÓÔÉ [Atom 1.0]", "application/atom+xml", "atom_10.xml"), + ("îÏ×ÏÓÔÉ [Atom 1.0] ÐÏÌÎÙÅ ÔÅËÓÔÙ", "application/atom+xml", "atom_10_full.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0] ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ", "application/rss+xml", "rss_20_titles.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0]", "application/rss+xml", "rss_20.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0] ÐÏÌÎÙÅ ÔÅËÓÔÙ", "application/rss+xml", "rss_20_full.xml"), ) ## #def body_html @@ -167,11 +178,6 @@ def write_template(level, year, month, day, titles, tags=None): iday = int(day) imonth = int(month) - new_text.append("""\ -#attr $Next = "%s" -""" % titles[0][3]) - - if len(titles) == 1: new_text.append("""\ #attr $refresh = "0; URL=%s" @@ -212,23 +218,25 @@ def write_template(level, year, month, day, titles, tags=None): else: new_text.append('\n
%s%s.
-''' % (lead, href, title)) +''' % (lead+' ' if lead else '', href, title)) if level == 0: new_text.append("""îÏ×ÏÓÔÅ×ÁÑ ÌÅÎÔÁ × ÆÏÒÍÁÔÁÈ
-Atom 1.0
-É RSS 2.0
.
+
+Atom 1.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ /
+Atom 1.0 /
+Atom 1.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ
+É
+RSS 2.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ /
+RSS 2.0 /
+RSS 2.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ.
' + + def end_p(self): + self.first_p = self.accumulator + '
' + raise HTMLDone() + +def get_first_p(body): + parser = FirstPHTMLParser() + + try: + parser.feed(body) + except (HTMLParseError, HTMLDone): + pass + + try: + parser.close() + except (HTMLParseError, HTMLDone): + pass + + return parser.first_p + + +class AbsURLHTMLParser(_HTMLParser): + def __init__(self, base): + _HTMLParser.__init__(self) + self.base = base + + def start_a(self, attrs): + self.accumulator += '' + + def end_a(self): + self.accumulator += '' + +def absolute_urls(body, base): + parser = AbsURLHTMLParser(base) + + try: + parser.feed(body) + except HTMLParseError: + pass + + try: + parser.close() + except HTMLParseError: + pass + + return parser.accumulator + + from atom_10 import atom_10 from rss_20 import rss_20 from news import NewsItem @@ -403,18 +509,18 @@ else: items = [] for item in tuple(reversed(all_titles_tags))[:10]: year, month, day, file, title, lead, tags = item - if lead: - lead = lead + ' ' - else: - lead = '' + url_path = "%s/%s/%s/%s" % (year, month, day, file) item = NewsItem( "%s-%s-%s" % (year, month, day), - "%s%s" % (lead, title), - "%s/%s/%s/%s" % (year, month, day, file) - ) + "%s%s" % (lead+' ' if lead else '', title), + url_path) items.append(item) item.baseURL = baseURL item.categoryList = tags + body = bodies[(year, month, day, file)] + body = absolute_urls(body, baseURL + url_path) + item.body = body + item.excerpt = get_first_p(body) namespace = { "title": "Oleg Broytman's blog", @@ -435,3 +541,19 @@ atom_tmpl = str(atom_10(searchList=[namespace])) write_if_changed(os.path.join(blog_root, "atom_10.xml"), atom_tmpl) rss_tmpl = str(rss_20(searchList=[namespace])) write_if_changed(os.path.join(blog_root, "rss_20.xml"), rss_tmpl) + +for item in items: + item.excerpt = None + +atom_tmpl = str(atom_10(searchList=[namespace])) +write_if_changed(os.path.join(blog_root, "atom_10_titles.xml"), atom_tmpl) +rss_tmpl = str(rss_20(searchList=[namespace])) +write_if_changed(os.path.join(blog_root, "rss_20_titles.xml"), rss_tmpl) + +for item in items: + item.content = item.body + +atom_tmpl = str(atom_10(searchList=[namespace])) +write_if_changed(os.path.join(blog_root, "atom_10_full.xml"), atom_tmpl) +rss_tmpl = str(rss_20(searchList=[namespace])) +write_if_changed(os.path.join(blog_root, "rss_20_full.xml"), rss_tmpl)