X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=inline;f=reindex_blog.py;h=bc34887d4076fe419cde94e4f7f1de2700e11749;hb=HEAD;hp=8d19837099f99795f0a8689213c333d56185e031;hpb=e3e8157280fba8ddfa3adb6d7c15265b01deef94;p=phdru.name%2Fphdru.name.git
diff --git a/reindex_blog.py b/reindex_blog.py
index 8d19837..3370d6f 100755
--- a/reindex_blog.py
+++ b/reindex_blog.py
@@ -1,36 +1,26 @@
-#! /usr/local/bin/python -O
+#! /usr/bin/env python3
# -*- coding: koi8-r -*-
-__version__ = "$Revision$"[11:-2]
-__revision__ = "$Id$"[5:-2]
-__date__ = "$Date$"[7:-2]
-__author__ = "Oleg BroytMann îÏ×ÏÓÔÅ×ÁÑ ÌÅÎÔÁ × ÆÏÒÍÁÔÁÈ
-Atom 1.0
-É RSS 2.0 .
+
+Atom 1.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ /
+Atom 1.0 /
+Atom 1.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ
+É
+RSS 2.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ /
+RSS 2.0 /
+RSS 2.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ.
ôÅÇÉ:
''')
- first_tag = True
- for count, tag, links in all_tags:
- if first_tag:
- first_tag = False
- else:
- new_text.append(' - ')
- new_text.append("""%s (%d)""" % (tag, tag, count))
- new_text.append('''
+ first_tag = True
+ for count, tag, links in all_tags:
+ if first_tag:
+ first_tag = False
+ else:
+ new_text.append(' - ')
+ new_text.append("""%s (%d)""" % (
+ encode_tag(tag), tag, count))
+ new_text.append('''
ðÏ ÇÏÄÁÍ:
''')
- first_year = True
- for year in sorted(years.keys()):
- if first_year:
- first_year = False
- else:
- new_text.append(' - ')
- new_text.append('%s' % (year, year))
- new_text.append('''
+
+ year_counts = {}
+ for year, month, day, file, title, lead in all_titles:
+ year_counts[year] = 0
+ for year, month, day, file, title, lead in all_titles:
+ year_counts[year] += 1
+
+ first_year = True
+ for year in years:
+ if first_year:
+ first_year = False
+ else:
+ new_text.append(' - ')
+ new_text.append('%s (%d)' % (year, year, year_counts[str(year)]))
+ new_text.append('''
öö
+""")
+
+ new_text.append("""\
#end def
-$phd_pp_ru.respond(self)
+$phd_site.respond(self)
""")
- write_if_changed(index_name, ''.join(new_text))
+ write_if_changed(index_name, ''.join(new_text))
all_tags = {}
@@ -270,141 +291,300 @@ all_titles = []
all_titles_tags = []
for year in sorted(years.keys()):
- year_titles = []
- months = years[year]
- for month in sorted(months.keys()):
- month_titles = []
- for day in sorted(months[month]):
- day_titles = []
- key = year, month, day
- if key in blog:
- for file, title, lead, tags in blog[key]:
- if file.endswith(".tmpl"): file = file[:-len("tmpl")] + "html"
- value = (year, month, day, file, title, lead)
- all_titles_tags.append((year, month, day, file, title, lead, tags))
- all_titles.append(value)
- year_titles.append(value)
- month_titles.append(value)
- day_titles.append(value)
- for tag in tags:
- if tag in all_tags:
- tag_links = all_tags[tag]
- else:
- tag_links = all_tags[tag] = []
- tag_links.append(value)
- write_template(3, year, month, day, day_titles)
- write_template(2, year, month, day, month_titles)
- write_template(1, year, month, day, year_titles)
+ year_titles = []
+ months = years[year]
+ for month in sorted(months.keys()):
+ month_titles = []
+ for day in sorted(months[month]):
+ day_titles = []
+ key = year, month, day
+ if key in blog:
+ for file, title, lead, tags in blog[key]:
+ if file.endswith(".tmpl"): file = file[:-len("tmpl")] + "html"
+ value = (year, month, day, file, title, lead)
+ all_titles_tags.append((year, month, day, file, title, lead, tags))
+ all_titles.append(value)
+ year_titles.append(value)
+ month_titles.append(value)
+ day_titles.append(value)
+ for tag in tags:
+ if tag in all_tags:
+ tag_links = all_tags[tag]
+ else:
+ tag_links = all_tags[tag] = []
+ tag_links.append(value)
+ write_template(3, year, month, day, day_titles)
+ write_template(2, year, month, day, month_titles)
+ write_template(1, year, month, day, year_titles)
+
+def by_count_rev_tag_link(tag):
+ """Sort all_tags by count in descending order,
+ and by tags and links in ascending order
+ """
+ return tag[:3]
all_tags = [(len(links), tag, links) for (tag, links) in all_tags.items()]
-all_tags.sort()
+all_tags.sort(key=by_count_rev_tag_link)
write_template(0, year, month, day, all_titles[-20:], all_tags)
new_text = ["""\
## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT.
-#extends phd_pp_ru
+#encoding koi8-r
+#extends phd_site
#implements respond
-#attr $Title = "Oleg BroytMann's blog: tags"
-#attr $Description = "BroytMann Russian Blog Tags Index Document"
+#attr $Title = "Oleg Broytman's blog: tags"
+#attr $Description = "Broytman Russian Blog Tags Index Document"
#attr $Copyright = 2006
##
#def body_html
-
+æÏÒÍÁ ÐÏÉÓËÁ ÐÏÚ×ÏÌÑÅÔ ÉÓËÁÔØ ÓÏÏÂÝÅÎÉÑ × ÂÌÏÇÅ, ÓÏÏÔ×ÅÔÓÔ×ÕÀÝÉÅ ×ÙÒÁÖÅÎÉÀ.
+óÉÎÔÁËÓÉÓ ×ÙÒÁÖÅÎÉÑ:
+ðÒÉÍÅÒÙ ×ÙÒÁÖÅÎÉÊ: linux - ÐÒÏÉÚÏÊÄ£Ô ÐÅÒÅÎÁÐÒÁ×ÌÅÎÉÅ
+ÎÁ ÓÔÒÁÎÉÃÕ Linux.html; linux&!debian - ÉÓËÁÔØ ÚÁÐÉÓÉ × ËÏÔÏÒÙÈ ÅÓÔØ ÔÅÇ
+Linux É ÎÅÔ ÔÅÇÁ Debian; Linux and not Debian - ÔÏ ÖÅ ÓÁÍÏÅ. åÓÌÉ × ÔÅÇÅ ÅÓÔØ
+ÐÒÏÂÅÌ ("í£ÒÔ×ÏÅ ÍÏÒÅ", "þ£ÒÎÏÅ ÍÏÒÅ") - ÅÇÏ ÎÁÄÏ ÚÁÍÅÎÉÔØ ÎÁ ÐÏÄÞ£ÒËÉ×ÁÎÉÅ;
+ÎÁÐÒÉÍÅÒ: "éÚÒÁÉÌØ é îå Í£ÒÔ×ÏÅ_ÍÏÒÅ", "ëÁ×ËÁÚ É ÎÅ Þ£ÒÎÏÅ_ÍÏÒÅ".
+
öÕÒÎÁÌ
+öÕÒÎÁÌ
""" % {"cyear": year or 2005})
- elif level == 1:
- new_text.append("""\
-#attr $Title = "Oleg BroytMann's blog: %(year)s"
-#attr $Description = "BroytMann Russian Blog %(year)s Index Document"
+ elif level == 1:
+ new_text.append("""\
+#attr $Title = "Oleg Broytman's blog: %(year)s"
+#attr $Description = "Broytman Russian Blog %(year)s Index Document"
#attr $Copyright = %(cyear)s
##
#def body_html
-öÕÒÎÁÌ: %(year)s
+öÕÒÎÁÌ: %(year)s
""" % {"year": year, "cyear": year or 2005})
- elif level == 2:
- imonth = int(month)
- new_text.append("""\
-#attr $Title = "Oleg BroytMann's blog: %(month_abbr_en)s %(year)s"
-#attr $Description = "BroytMann Russian Blog %(month_name_en)s %(year)s Index Document"
+ elif level == 2:
+ imonth = int(month)
+ new_text.append("""\
+#attr $Title = "Oleg Broytman's blog: %(month_abbr_en)s %(year)s"
+#attr $Description = "Broytman Russian Blog %(month_name_en)s %(year)s Index Document"
#attr $Copyright = %(cyear)s
##
#def body_html
-öÕÒÎÁÌ: %(month_name_ru0)s %(year)s
+öÕÒÎÁÌ: %(month_name_ru0)s %(year)s
""" % {
- "year": year, "cyear": year or 2005,
- "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth],
- "month_name_ru0": months_names_ru0[imonth],
- })
-
- elif level == 3:
- iday = int(day)
- imonth = int(month)
-
- new_text.append("""\
-#attr $Next = "%s"
-""" % titles[0][3])
-
-
- if len(titles) == 1:
- new_text.append("""\
-#attr $refresh = "0; URL=%s"
+ "year": year, "cyear": year or 2005,
+ "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth],
+ "month_name_ru0": months_names_ru0[imonth],
+ })
+
+ elif level == 3:
+ iday = int(day)
+ imonth = int(month)
+
+ if len(titles) == 1:
+ new_text.append("""\
+#attr $Refresh = "0; URL=%s"
""" % titles[0][3])
- new_text.append("""\
-#attr $Title = "Oleg BroytMann's blog: %(day)d %(month_abbr_en)s %(year)s"
-#attr $Description = "BroytMann Russian Blog %(day)d %(month_name_en)s %(year)s Index Document"
+ new_text.append("""\
+#attr $Title = "Oleg Broytman's blog: %(day)d %(month_abbr_en)s %(year)s"
+#attr $Description = "Broytman Russian Blog %(day)d %(month_name_en)s %(year)s Index Document"
#attr $Copyright = %(cyear)s
##
#def body_html
-öÕÒÎÁÌ: %(day)d %(month_name_ru0)s %(year)s
+öÕÒÎÁÌ: %(day)d %(month_name_ru)s %(year)s
""" % {
- "year": year, "cyear": year or 2005,
- "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth],
- "month_name_ru0": months_names_ru0[imonth],
- "day": iday
- })
-
- save_titles = titles[:]
- titles.reverse()
-
- save_day = None
- for year, month, day, file, title, lead in titles:
- href = []
- if level == 0:
- href.append(year)
- if level <= 1:
- href.append(month)
- if level <= 2:
- href.append(day)
- href.append(file)
- href = '/'.join(href)
- if day[0] == '0': day = day[1:]
- if save_day <> day:
- if level == 0:
- new_text.append('\n%s %s %s
' % (day, months_names_ru[int(month)], year))
- else:
- new_text.append('\n%s %s
' % (day, months_names_ru[int(month)]))
- save_day = day
- if lead:
- lead = lead + ' '
- else:
- lead = ''
- new_text.append('''
+ "year": year, "cyear": year or 2005,
+ "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth],
+ "month_name_ru": months_names_ru[imonth],
+ "day": iday
+ })
+
+ save_titles = titles[:]
+ titles.reverse()
+
+ save_date = None
+ for year, month, day, file, title, lead in titles:
+ href = []
+ if level == 0:
+ href.append(year)
+ if level <= 1:
+ href.append(month)
+ if level <= 2:
+ href.append(day)
+ href.append(file)
+ href = '/'.join(href)
+ if day[0] == '0': day = day[1:]
+ if save_date != (year, month, day):
+ if level == 0:
+ new_text.append('\n%s %s %s
' % (day, months_names_ru[int(month)], year))
+ else:
+ new_text.append('\n%s %s
' % (day, months_names_ru[int(month)]))
+ save_date = year, month, day
+ new_text.append('''
-''' % (lead, href, title))
+''' % (lead+' ' if lead else '', href, title))
- if level == 0:
- new_text.append("""
+ if level == 0:
+ new_text.append("""
+ôÅÇÉ
+ôÅÇÉ
+
+
+
+
"""]
-for count, tag, links in all_tags:
- new_text.append("""\
-
%s
+%s
-
-""" % (tag, tag, tag)]
+""" % tag)
- count = 0
- for year, month, day, filename, title, lead in reversed(links):
- if lead:
- lead = lead + ' '
- else:
- lead = ''
- link = "../%s/%s/%s/%s" % (year, month, day, filename)
- item_text = """
-
' + + def end_p(self): + self.first_p = self.accumulator + '
' + raise HTMLDone() + +def get_first_p(body): + parser = FirstPHTMLParser() + + try: + parser.feed(body) + except HTMLDone: + pass + + try: + parser.close() + except HTMLDone: + pass + + return parser.first_p -baseURL = "http://phd.pp.ru/Russian/blog/" + +class AbsURLHTMLParser(_HTMLParser): + def __init__(self, base): + _HTMLParser.__init__(self) + self.base = base + + def start_a(self, attrs): + self.accumulator += '' + + def end_a(self): + self.accumulator += '' + + def start_img(self, attrs): + self.accumulator += '' + + def end_img(self): + pass + +def absolute_urls(body, base): + parser = AbsURLHTMLParser(base) + + try: + parser.feed(body) + except Exception: + pass + + try: + parser.close() + except Exception: + pass + + return parser.accumulator + + +if blog_root: + blog_root_url = blog_root[ + blog_root.find('/htdocs/phdru.name/') + len('/htdocs/phdru.name/'):] + baseURL = "https://phdru.name/%s/" % blog_root_url +else: + baseURL = "https://phdru.name/" items = [] for item in tuple(reversed(all_titles_tags))[:10]: - year, month, day, file, title, lead, tags = item - if lead: - lead = lead + ' ' - else: - lead = '' - item = NewsItem( - "%s-%s-%s" % (year, month, day), - "%s%s" % (lead, title), - "%s/%s/%s/%s" % (year, month, day, file) - ) - items.append(item) - item.baseURL = baseURL - item.categoryList = tags + year, month, day, file, title, lead, tags = item + url_path = "%s/%s/%s/%s" % (year, month, day, file) + item = NewsItem( + "%s-%s-%s" % (year, month, day), + "%s%s" % (lead+' ' if lead else '', title), + url_path) + items.append(item) + item.baseURL = baseURL + item.categoryList = tags + body = bodies[(year, month, day, file)] + body = absolute_urls(body, baseURL + url_path) + item.body = body + excerpt = get_first_p(body) + item.excerpt = excerpt namespace = { - "title": "Oleg Broytmann's blog", - "baseURL": baseURL, - "indexFile": "", - "description": "", - "lang": "ru", - "author": "Oleg Broytmann", - "email": "phd@phd.pp.ru", - "posts": items, + "title": "Oleg Broytman's blog", + "baseURL": baseURL, + "indexFile": "", + "description": "", + "lang": "ru", + "author": "Oleg Broytman", + "email": "phd@phdru.name", + "generator": os.path.basename(sys.argv[0]), + "posts": items, } # For english dates locale.setlocale(locale.LC_TIME, 'C') -atom_tmpl = str(atom_10(searchList=[namespace])) -write_if_changed(os.path.join(blog_root, "atom_10.xml"), atom_tmpl) -rss_tmpl = str(rss_20(searchList=[namespace])) -write_if_changed(os.path.join(blog_root, "rss_20.xml"), rss_tmpl) +atom_tmpl = atom_10(searchList=[namespace]) +write_if_changed(os.path.join(blog_root, "atom_10.xml"), str(atom_tmpl)) +rss_tmpl = rss_20(searchList=[namespace]) +write_if_changed(os.path.join(blog_root, "rss_20.xml"), str(rss_tmpl)) + +for item in items: + item.excerpt = None + +atom_tmpl = atom_10(searchList=[namespace]) +write_if_changed(os.path.join(blog_root, "atom_10_titles.xml"), str(atom_tmpl)) +rss_tmpl = rss_20(searchList=[namespace]) +write_if_changed(os.path.join(blog_root, "rss_20_titles.xml"), str(rss_tmpl)) + +for item in items: + item.content = item.body + +atom_tmpl = atom_10(searchList=[namespace]) +write_if_changed(os.path.join(blog_root, "atom_10_full.xml"), str(atom_tmpl)) +rss_tmpl = rss_20(searchList=[namespace]) +write_if_changed(os.path.join(blog_root, "rss_20_full.xml"), str(rss_tmpl)) + +# vim: set ts=8 sts=4 sw=4 et :