X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=sidebyside;f=reindex_blog.py;h=0b7c2a6dd9fbf39af81f038f172c0b998470b84b;hb=0c0058d611f55ddae8c08726acb9952e5984f8da;hp=b75104df2c2a18f4be44d37672580e63d7590589;hpb=0d40d95940a4d1000134b9ede06cf6cff512dc1b;p=phdru.name%2Fphdru.name.git
diff --git a/reindex_blog.py b/reindex_blog.py
index b75104d..0b7c2a6 100755
--- a/reindex_blog.py
+++ b/reindex_blog.py
@@ -1,36 +1,21 @@
-#! /usr/local/bin/python -O
+#! /usr/bin/env python
# -*- coding: koi8-r -*-
-__version__ = "$Revision$"[11:-2]
-__revision__ = "$Id$"[5:-2]
-__date__ = "$Date$"[7:-2]
-__author__ = "Oleg BroytMann
%s%s.
îÏ×ÏÓÔÅ×ÁÑ ÌÅÎÔÁ × ÆÏÒÍÁÔÁÈ
-Atom 1.0
-É RSS 2.0 .
+
+Atom 1.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ /
+Atom 1.0 /
+Atom 1.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ
+É
+RSS 2.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ /
+RSS 2.0 /
+RSS 2.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ.
öÕÒÎÁÌ
+öÕÒÎÁÌ
""" % {"cyear": year or 2005})
elif level == 1:
new_text.append("""\
-#attr $Title = "Oleg BroytMann's blog: %(year)s"
-#attr $Description = "BroytMann Russian Blog %(year)s Index Document"
+#attr $Title = "Oleg Broytman's blog: %(year)s"
+#attr $Description = "Broytman Russian Blog %(year)s Index Document"
#attr $Copyright = %(cyear)s
##
#def body_html
-öÕÒÎÁÌ: %(year)s
+öÕÒÎÁÌ: %(year)s
""" % {"year": year, "cyear": year or 2005})
elif level == 2:
imonth = int(month)
new_text.append("""\
-#attr $Title = "Oleg BroytMann's blog: %(month_abbr_en)s %(year)s"
-#attr $Description = "BroytMann Russian Blog %(month_name_en)s %(year)s Index Document"
+#attr $Title = "Oleg Broytman's blog: %(month_abbr_en)s %(year)s"
+#attr $Description = "Broytman Russian Blog %(month_name_en)s %(year)s Index Document"
#attr $Copyright = %(cyear)s
##
#def body_html
-öÕÒÎÁÌ: %(month_name_ru0)s %(year)s
+öÕÒÎÁÌ: %(month_name_ru0)s %(year)s
""" % {
"year": year, "cyear": year or 2005,
"month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth],
@@ -164,34 +167,29 @@ def write_template(level, year, month, day, titles, tags=None):
iday = int(day)
imonth = int(month)
- new_text.append("""\
-#attr $Next = "%s"
-""" % titles[0][3])
-
-
if len(titles) == 1:
new_text.append("""\
-#attr $refresh = "0; URL=%s"
+#attr $Refresh = "0; URL=%s"
""" % titles[0][3])
new_text.append("""\
-#attr $Title = "Oleg BroytMann's blog: %(day)d %(month_abbr_en)s %(year)s"
-#attr $Description = "BroytMann Russian Blog %(day)d %(month_name_en)s %(year)s Index Document"
+#attr $Title = "Oleg Broytman's blog: %(day)d %(month_abbr_en)s %(year)s"
+#attr $Description = "Broytman Russian Blog %(day)d %(month_name_en)s %(year)s Index Document"
#attr $Copyright = %(cyear)s
##
#def body_html
-öÕÒÎÁÌ: %(day)d %(month_name_ru0)s %(year)s
+öÕÒÎÁÌ: %(day)d %(month_name_ru)s %(year)s
""" % {
"year": year, "cyear": year or 2005,
"month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth],
- "month_name_ru0": months_names_ru0[imonth],
+ "month_name_ru": months_names_ru[imonth],
"day": iday
})
save_titles = titles[:]
titles.reverse()
- save_day = None
+ save_date = None
for year, month, day, file, title, lead in titles:
href = []
if level == 0:
@@ -203,29 +201,31 @@ def write_template(level, year, month, day, titles, tags=None):
href.append(file)
href = '/'.join(href)
if day[0] == '0': day = day[1:]
- if save_day <> day:
+ if save_date != (year, month, day):
if level == 0:
new_text.append('\n%s %s %s
' % (day, months_names_ru[int(month)], year))
else:
new_text.append('\n%s %s
' % (day, months_names_ru[int(month)]))
- save_day = day
- if lead:
- lead = lead + ' '
- else:
- lead = ''
+ save_date = year, month, day
new_text.append('''
ðÏ ÇÏÄÁÍ: ''') + + year_counts = {} + for year, month, day, file, title, lead in all_titles: + year_counts[year] = 0 + for year, month, day, file, title, lead in all_titles: + year_counts[year] += 1 + first_year = True - for year in sorted(years.keys()): + for year in years: if first_year: first_year = False else: new_text.append(' - ') - new_text.append('%s' % (year, year)) + new_text.append('%s (%d)' % (year, year, year_counts[str(year)])) new_text.append('''
''') + new_text.append(""" +öö +""") + new_text.append("""\ #end def -$phd_pp_ru.respond(self) +$phd_site.respond(self) """) write_if_changed(index_name, ''.join(new_text)) @@ -299,54 +315,114 @@ for year in sorted(years.keys()): write_template(2, year, month, day, month_titles) write_template(1, year, month, day, year_titles) +def by_count_rev_tag_link(t1, t2): + """Sort all_tags by count in descending order, + and by tags and links in ascending order + """ + r = cmp(t1[0], t2[0]) + if r: + return -r + return cmp((t1[1], t1[2]), (t2[1], t2[2])) + all_tags = [(len(links), tag, links) for (tag, links) in all_tags.items()] -all_tags.sort() +all_tags.sort(by_count_rev_tag_link) write_template(0, year, month, day, all_titles[-20:], all_tags) new_text = ["""\ ## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. -#extends phd_pp_ru +#encoding koi8-r +#extends phd_site #implements respond -#attr $Title = "Oleg BroytMann's blog: tags" -#attr $Description = "BroytMann Russian Blog Tags Index Document" +#attr $Title = "Oleg Broytman's blog: tags" +#attr $Description = "Broytman Russian Blog Tags Index Document" #attr $Copyright = 2006 ## #def body_html -
+æÏÒÍÁ ÐÏÉÓËÁ ÐÏÚ×ÏÌÑÅÔ ÉÓËÁÔØ ÓÏÏÂÝÅÎÉÑ × ÂÌÏÇÅ, ÓÏÏÔ×ÅÔÓÔ×ÕÀÝÉÅ ×ÙÒÁÖÅÎÉÀ. +óÉÎÔÁËÓÉÓ ×ÙÒÁÖÅÎÉÑ:
++ðÒÉÍÅÒÙ ×ÙÒÁÖÅÎÉÊ: linux - ÐÒÏÉÚÏÊÄ£Ô ÐÅÒÅÎÁÐÒÁ×ÌÅÎÉÅ +ÎÁ ÓÔÒÁÎÉÃÕ linux.html; linux&!audio - ÉÓËÁÔØ ÚÁÐÉÓÉ × ËÏÔÏÒÙÈ ÅÓÔØ ÔÅÇ +linux É ÎÅÔ ÔÅÇÁ audio; linux and not audio - ÔÏ ÖÅ ÓÁÍÏÅ. +
+ +
' + + def end_p(self): + self.first_p = self.accumulator + '
' + raise HTMLDone() + +def get_first_p(body): + parser = FirstPHTMLParser() + + try: + parser.feed(body) + except (HTMLParseError, HTMLDone): + pass + + try: + parser.close() + except (HTMLParseError, HTMLDone): + pass + + return parser.first_p + + +class AbsURLHTMLParser(_HTMLParser): + def __init__(self, base): + _HTMLParser.__init__(self) + self.base = base + + def start_a(self, attrs): + self.accumulator += '' + + def end_a(self): + self.accumulator += '' + + def start_img(self, attrs): + self.accumulator += '' + + def end_img(self): + pass + +def absolute_urls(body, base): + parser = AbsURLHTMLParser(base) + + try: + parser.feed(body) + except HTMLParseError: + pass + + try: + parser.close() + except HTMLParseError: + pass + + return parser.accumulator + + from atom_10 import atom_10 from rss_20 import rss_20 from news import NewsItem -baseURL = "http://phd.pp.ru/Russian/blog/" +if blog_root: + blog_root_url = blog_root[ + blog_root.find('/htdocs/phdru.name/') + len('/htdocs/phdru.name/'):] + baseURL = "https://phdru.name/%s/" % blog_root_url +else: + baseURL = "https://phdru.name/" items = [] for item in tuple(reversed(all_titles_tags))[:10]: year, month, day, file, title, lead, tags = item - if lead: - lead = lead + ' ' - else: - lead = '' + lead = lead.decode('koi8-r').encode('utf-8') + title = title.decode('koi8-r').encode('utf-8') + url_path = "%s/%s/%s/%s" % (year, month, day, file) item = NewsItem( "%s-%s-%s" % (year, month, day), - "%s%s" % (lead, title), - "%s/%s/%s/%s" % (year, month, day, file) - ) + "%s%s" % (lead+' ' if lead else '', title), + url_path) items.append(item) item.baseURL = baseURL - item.categoryList = tags + item.categoryList = [t.decode('koi8-r').encode('utf-8') for t in tags] + body = bodies[(year, month, day, file)] + body = absolute_urls(body, baseURL + url_path) + try: + body.decode('utf-8') + except UnicodeDecodeError: + body = body.decode('koi8-r').encode('utf-8') + item.body = body + excerpt = get_first_p(body) + try: + excerpt.decode('utf-8') + except UnicodeDecodeError: + excerpt = excerpt.decode('koi8-r').encode('utf-8') + item.excerpt = excerpt namespace = { - "title": "Oleg Broytmann's blog", + "title": "Oleg Broytman's blog", "baseURL": baseURL, "indexFile": "", "description": "", "lang": "ru", - "author": "Oleg Broytmann", - "email": "phd@phd.pp.ru", + "author": "Oleg Broytman", + "email": "phd@phdru.name", "generator": os.path.basename(sys.argv[0]), "posts": items, } @@ -408,7 +585,23 @@ namespace = { # For english dates locale.setlocale(locale.LC_TIME, 'C') -atom_tmpl = str(atom_10(searchList=[namespace])) +atom_tmpl = unicode(atom_10(searchList=[namespace])).encode('koi8-r') write_if_changed(os.path.join(blog_root, "atom_10.xml"), atom_tmpl) -rss_tmpl = str(rss_20(searchList=[namespace])) +rss_tmpl = unicode(rss_20(searchList=[namespace])).encode('koi8-r') write_if_changed(os.path.join(blog_root, "rss_20.xml"), rss_tmpl) + +for item in items: + item.excerpt = None + +atom_tmpl = unicode(atom_10(searchList=[namespace])).encode('koi8-r') +write_if_changed(os.path.join(blog_root, "atom_10_titles.xml"), atom_tmpl) +rss_tmpl = unicode(rss_20(searchList=[namespace])).encode('koi8-r') +write_if_changed(os.path.join(blog_root, "rss_20_titles.xml"), rss_tmpl) + +for item in items: + item.content = item.body + +atom_tmpl = unicode(atom_10(searchList=[namespace])).encode('koi8-r') +write_if_changed(os.path.join(blog_root, "atom_10_full.xml"), atom_tmpl) +rss_tmpl = unicode(rss_20(searchList=[namespace])).encode('koi8-r') +write_if_changed(os.path.join(blog_root, "rss_20_full.xml"), rss_tmpl)