X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;ds=sidebyside;f=reindex_blog.py;h=0f1a8f5f9b185fe34ccecbdf8f2b297f3bd0b3d0;hb=refs%2Fheads%2Fmaster;hp=be27835e9a9140c3988bc4a39009bd816a5a644d;hpb=6aa7ab00b5dc30b6221d0d55fecc1b0b957cdd79;p=phdru.name%2Fphdru.name.git diff --git a/reindex_blog.py b/reindex_blog.py index be27835..3370d6f 100755 --- a/reindex_blog.py +++ b/reindex_blog.py @@ -1,212 +1,590 @@ -#! /usr/local/bin/python -O +#! /usr/bin/env python3 # -*- coding: koi8-r -*- -import sys, os -from glob import glob +__author__ = "Oleg Broytman " +__copyright__ = "Copyright (C) 2006-2024 PhiloSoft Design" -try: - import cPickle as pickle -except ImportError: - import pickle +from calendar import _localized_month +from html import escape +import locale +import sys, os +from urllib.parse import quote, urljoin from Cheetah.Template import Template +from Cheetah.compat import string_type +from m_lib.net.www.html import HTMLParser as _HTMLParser -blog_filename = sys.argv[1] -try: - blog_file = open(blog_filename, "rb") -except IOError: - blog = {} -else: - blog = pickle.load(blog_file) - blog_file.close() +from atom_10 import atom_10 +from blog_db import blog_root, load_blog, save_blog +from news import NewsItem, write_if_changed +from rss_20 import rss_20 + +old_blog = load_blog() + +# blog is a dictionary mapping +# (year, month, day) => [list of (file, title, lead, tags)] + +blog = {} years = {} -months = sys.argv[2:] -isdir = os.path.isdir - -if not months: - for year in os.listdir(os.curdir): - if isdir(year): - years[year] = {} - for month in os.listdir(year): - m = os.path.join(year, month) - if isdir(m): - months.append(m) - -days = [] -for month in months: - year, m = month.split(os.sep) - if month not in years: - years[year] = {} - years[year][m] = days_of_month = [] - for day in os.listdir(month): - d = os.path.join(month, day) - if isdir(d): - days.append(d) - days_of_month.append(d) - -for day in days: - for tmpl in glob(os.path.join(day, "*.tmpl")): - template = Template(file=tmpl) - title_parts = template.Title.split() - title = ' '.join(title_parts[6:]) - lead = getattr(template, "Lead", None) - - if title: - day_parts = day.split(os.sep) - blog[tuple(day_parts)] = (title, os.path.basename(tmpl), lead) - -blog_file = open(blog_filename, "wb") -pickle.dump(blog, blog_file, pickle.HIGHEST_PROTOCOL) -blog_file.close() +# bodies is a dictionary mapping file => body -import locale -locale.setlocale(locale.LC_ALL, '') -from calendar import _localized_day, _localized_month +bodies = {} + +# Walk the directory recursively +for dirpath, dirs, files in os.walk(blog_root): + d = os.path.basename(dirpath) + if not d.startswith("20") and not d.isdigit(): + continue + for file in files: + if not file.endswith(".tmpl"): + continue + fullpath = os.path.join(dirpath, file) + template = Template(file=fullpath) + title_parts = template.Title.split() + title = ' '.join(title_parts[6:]) + lead = template.Lead + + tags = template.Tag + if isinstance(tags, string_type): + tags = (tags,) + + if title: + key = year, month, day = \ + tuple(dirpath[len(blog_root):].split(os.sep)[1:]) + if key in blog: + days = blog[key] + else: + days = blog[key] = [] + days.append((file, title, lead, tags)) + + if year in years: + months = years[year] + else: + months = years[year] = {} + + if month in months: + days = months[month] + else: + days = months[month] = [] + + if day not in days: days.append(day) + + file = file[:-len("tmpl")] + "html" + key = (year, month, day, file) + body = template.body() + bodies[key] = body + +# Need to save the blog? +if blog != old_blog: + save_blog(blog) + +# Localized month names +locale.setlocale(locale.LC_ALL, "ru_RU.KOI8-R") locale.setlocale(locale.LC_TIME, 'C') months_names_en = list(_localized_month('%B')) months_abbrs_en = list(_localized_month('%b')) -locale.setlocale(locale.LC_TIME, '') -months_names_ru = [month.lower() for month in _localized_month('%B')] +locale.setlocale(locale.LC_TIME, "ru_RU.KOI8-R") +# months_names_ru = list(_localized_month('%B')) + +months_names_ru = [ + '', "ÑÎ×ÁÒÑ", "ÆÅ×ÒÁÌÑ", "ÍÁÒÔÁ", "ÁÐÒÅÌÑ", "ÍÁÑ", "ÉÀÎÑ", + "ÉÀÌÑ", "Á×ÇÕÓÔÁ", "ÓÅÎÔÑÂÒÑ", "ÏËÔÑÂÒÑ", "ÎÏÑÂÒÑ", "ÄÅËÁÂÒÑ" +] -months_names_ru0 = ['', "ÑÎ×ÁÒØ", "ÆÅ×ÒÁÌØ", "ÍÁÒÔ", "ÁÐÒÅÌØ", "ÍÁÊ", "ÉÀÎØ", - "ÉÀÌØ", "Á×ÇÕÓÔ", "ÓÅÎÔÑÂÒØ", "ÏËÔÑÂÒØ", "ÎÏÑÂÒØ", "ÄÅËÁÂÒØ" +months_names_ru0 = [ + '', "ÑÎ×ÁÒØ", "ÆÅ×ÒÁÌØ", "ÍÁÒÔ", "ÁÐÒÅÌØ", "ÍÁÊ", "ÉÀÎØ", + "ÉÀÌØ", "Á×ÇÕÓÔ", "ÓÅÎÔÑÂÒØ", "ÏËÔÑÂÒØ", "ÎÏÑÂÒØ", "ÄÅËÁÂÒØ" ] -def write_template(show_year, show_month, year, month, titles, cut=None, show_years=False): - index_tmpl = open(os.path.join(year, month, "index.tmpl"), 'w') - if show_year: - index_tmpl.write("""\ -#extends phd_pp_ru +def encode_tag(tag): + return quote(tag.replace(' ', '_'), encoding='koi8-r') + + +def write_template(level, year, month, day, titles, tags=None): + path = [blog_root] + if level >= 1: + path.append(year) + if level >= 2: + path.append(month) + if level == 3: + path.append(day) + path.append("index.tmpl") + index_name = os.path.join(*path) + + new_text = ["""\ +## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. +#encoding koi8-r +#extends phd_site #implements respond -#attr $Title = "Oleg BroytMann's blog" -#attr $Description = "BroytMann Russian Blog Index Document" +"""] + + if level == 0: + new_text.append("""\ +#attr $Title = "Oleg Broytman's blog" +#attr $Description = "Broytman Russian Blog Index Document" #attr $Copyright = %(cyear)s +#attr $alternates = (("îÏ×ÏÓÔÉ [Atom 1.0] ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ", "application/atom+xml", "atom_10_titles.xml"), + ("îÏ×ÏÓÔÉ [Atom 1.0]", "application/atom+xml", "atom_10.xml"), + ("îÏ×ÏÓÔÉ [Atom 1.0] ÐÏÌÎÙÅ ÔÅËÓÔÙ", "application/atom+xml", "atom_10_full.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0] ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ", "application/rss+xml", "rss_20_titles.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0]", "application/rss+xml", "rss_20.xml"), + ("îÏ×ÏÓÔÉ [RSS 2.0] ÐÏÌÎÙÅ ÔÅËÓÔÙ", "application/rss+xml", "rss_20_full.xml"), +) ## #def body_html -

öÕÒÎÁÌ

+

öÕÒÎÁÌ

""" % {"cyear": year or 2005}) - elif show_month: - - index_tmpl.write("""\ -#extends phd_pp_ru -#implements respond -#attr $Title = "Oleg BroytMann's blog: %(year)s" -#attr $Description = "BroytMann Russian Blog %(year)s Index Document" + elif level == 1: + new_text.append("""\ +#attr $Title = "Oleg Broytman's blog: %(year)s" +#attr $Description = "Broytman Russian Blog %(year)s Index Document" #attr $Copyright = %(cyear)s ## #def body_html -

öÕÒÎÁÌ: %(year)s

+

öÕÒÎÁÌ: %(year)s

""" % {"year": year, "cyear": year or 2005}) - else: + elif level == 2: + imonth = int(month) + new_text.append("""\ +#attr $Title = "Oleg Broytman's blog: %(month_abbr_en)s %(year)s" +#attr $Description = "Broytman Russian Blog %(month_name_en)s %(year)s Index Document" +#attr $Copyright = %(cyear)s +## +#def body_html +

öÕÒÎÁÌ: %(month_name_ru0)s %(year)s

+""" % { + "year": year, "cyear": year or 2005, + "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth], + "month_name_ru0": months_names_ru0[imonth], + }) - month = int(month) - index_tmpl.write("""\ -#extends phd_pp_ru -#implements respond -#attr $Title = "Oleg BroytMann's blog: %(month_abbr_en)s %(year)s" -#attr $Description = "BroytMann Russian Blog %(month_name_en)s %(year)s Index Document" + elif level == 3: + iday = int(day) + imonth = int(month) + + if len(titles) == 1: + new_text.append("""\ +#attr $Refresh = "0; URL=%s" +""" % titles[0][3]) + + new_text.append("""\ +#attr $Title = "Oleg Broytman's blog: %(day)d %(month_abbr_en)s %(year)s" +#attr $Description = "Broytman Russian Blog %(day)d %(month_name_en)s %(year)s Index Document" #attr $Copyright = %(cyear)s ## #def body_html -

öÕÒÎÁÌ: %(month_name_ru0)s %(year)s

+

öÕÒÎÁÌ: %(day)d %(month_name_ru)s %(year)s

""" % { - "year": year, "cyear": year or 2005, - "month_abbr_en": months_abbrs_en[month], "month_name_en": months_names_en[month], - "month_name_ru0": months_names_ru0[month], - }) - - save_titles = titles[:] - titles.reverse() - if cut: - titles = titles[:cut] - - save_day = None - for key, tmpl, title, lead in titles: - year, month, day = key - href = [] - if show_year: - href.append(year) - if show_month: - href.append(month) - href.append(day) - href.append(tmpl) - href = '/'.join(href) - if day[0] == '0': day = day[1:] - if save_day <> day: - if show_year: - index_tmpl.write('\n

%s %s %s

' % (day, months_names_ru[int(month)], year)) - else: - index_tmpl.write('\n

%s %s

' % (day, months_names_ru[int(month)])) - save_day = day - if lead: - lead = lead + ' ' - else: - lead = '' - index_tmpl.write(''' + "year": year, "cyear": year or 2005, + "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth], + "month_name_ru": months_names_ru[imonth], + "day": iday + }) + + save_titles = titles[:] + titles.reverse() + + save_date = None + for year, month, day, file, title, lead in titles: + href = [] + if level == 0: + href.append(year) + if level <= 1: + href.append(month) + if level <= 2: + href.append(day) + href.append(file) + href = '/'.join(href) + if day[0] == '0': day = day[1:] + if save_date != (year, month, day): + if level == 0: + new_text.append('\n

%s %s %s

' % (day, months_names_ru[int(month)], year)) + else: + new_text.append('\n

%s %s

' % (day, months_names_ru[int(month)])) + save_date = year, month, day + new_text.append('''

- %s%s. + %s%s.

-''' % (lead, href, title)) - - if show_years: - years = {} - for key, tmpl, title, lead in save_titles: - year, month, day = key - years[year] = True - first_year = True - index_tmpl.write(''' +''' % (lead+' ' if lead else '', href, title)) + + if level == 0: + new_text.append("""
-

+

îÏ×ÏÓÔÅ×ÁÑ ÌÅÎÔÁ × ÆÏÒÍÁÔÁÈ + +Atom 1.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ / +Atom 1.0 / +Atom 1.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ +RSS 2.0 ÔÏÌØËÏ ÚÁÇÏÌÏ×ËÉ / +RSS 2.0 / +RSS 2.0 ÐÏÌÎÙÅ ÔÅËÓÔÙ. +

+""") + + years = {} + for year, month, day, file, title, lead in save_titles: + years[year] = True + new_text.append(''' +

ôÅÇÉ: ''') - for year in sorted(years.keys()): - if first_year: - first_year = False - else: - index_tmpl.write(' - ') - index_tmpl.write('%s' % (year, year)) - index_tmpl.write(''' + first_tag = True + for count, tag, links in all_tags: + if first_tag: + first_tag = False + else: + new_text.append(' - ') + new_text.append("""%s (%d)""" % ( + encode_tag(tag), tag, count)) + new_text.append('''

''') - index_tmpl.write("""\ -#end def -$phd_pp_ru.respond(self) + max_year = int(sorted(years.keys())[-1]) + years = range(max_year, 2005, -1) + + new_text.append(''' +

ðÏ ÇÏÄÁÍ: +''') + + year_counts = {} + for year, month, day, file, title, lead in all_titles: + year_counts[year] = 0 + for year, month, day, file, title, lead in all_titles: + year_counts[year] += 1 + + first_year = True + for year in years: + if first_year: + first_year = False + else: + new_text.append(' - ') + new_text.append('%s (%d)' % (year, year, year_counts[str(year)])) + new_text.append(''' +

+''') + + new_text.append(""" +
+

öö """) - index_tmpl.close() + new_text.append("""\ +#end def +$phd_site.respond(self) +""") -def translate(tmpl): - if tmpl == "index.tmpl": tmpl = '' - if tmpl.endswith(".tmpl"): tmpl = tmpl[:-len("tmpl")] + "html" - return tmpl + write_if_changed(index_name, ''.join(new_text)) +all_tags = {} all_titles = [] -for key in sorted(blog.keys()): - title, tmpl, lead = blog[key] - all_titles.append((key, translate(tmpl), title, lead)) +all_titles_tags = [] for year in sorted(years.keys()): - year_titles = [] - months = years[year] - for month in sorted(months.keys()): - month_titles = [] - for day in sorted(months[month]): - day_parts = day.split(os.sep) - key = tuple(day_parts) - if key in blog: - title, tmpl, lead = blog[key] - tmpl = translate(tmpl) - year_titles.append((key, tmpl, title, lead)) - month_titles.append((key, tmpl, title, lead)) - write_template(False, False, year, month, month_titles) - write_template(False, True, year, '', year_titles) -write_template(True, True, '', '', all_titles, 5, True) + year_titles = [] + months = years[year] + for month in sorted(months.keys()): + month_titles = [] + for day in sorted(months[month]): + day_titles = [] + key = year, month, day + if key in blog: + for file, title, lead, tags in blog[key]: + if file.endswith(".tmpl"): file = file[:-len("tmpl")] + "html" + value = (year, month, day, file, title, lead) + all_titles_tags.append((year, month, day, file, title, lead, tags)) + all_titles.append(value) + year_titles.append(value) + month_titles.append(value) + day_titles.append(value) + for tag in tags: + if tag in all_tags: + tag_links = all_tags[tag] + else: + tag_links = all_tags[tag] = [] + tag_links.append(value) + write_template(3, year, month, day, day_titles) + write_template(2, year, month, day, month_titles) + write_template(1, year, month, day, year_titles) + +def by_count_rev_tag_link(tag): + """Sort all_tags by count in descending order, + and by tags and links in ascending order + """ + return tag[:3] + +all_tags = [(len(links), tag, links) for (tag, links) in all_tags.items()] +all_tags.sort(key=by_count_rev_tag_link) + +write_template(0, year, month, day, all_titles[-20:], all_tags) + +new_text = ["""\ +## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. +#encoding koi8-r +#extends phd_site +#implements respond +#attr $Title = "Oleg Broytman's blog: tags" +#attr $Description = "Broytman Russian Blog Tags Index Document" +#attr $Copyright = 2006 +## +#def body_html +

ôÅÇÉ

+ +

+æÏÒÍÁ ÐÏÉÓËÁ ÐÏÚ×ÏÌÑÅÔ ÉÓËÁÔØ ÓÏÏÂÝÅÎÉÑ × ÂÌÏÇÅ, ÓÏÏÔ×ÅÔÓÔ×ÕÀÝÉÅ ×ÙÒÁÖÅÎÉÀ. +óÉÎÔÁËÓÉÓ ×ÙÒÁÖÅÎÉÑ:

+ +

+ðÒÉÍÅÒÙ ×ÙÒÁÖÅÎÉÊ: linux - ÐÒÏÉÚÏÊÄ£Ô ÐÅÒÅÎÁÐÒÁ×ÌÅÎÉÅ +ÎÁ ÓÔÒÁÎÉÃÕ Linux.html; linux&!debian - ÉÓËÁÔØ ÚÁÐÉÓÉ × ËÏÔÏÒÙÈ ÅÓÔØ ÔÅÇ +Linux É ÎÅÔ ÔÅÇÁ Debian; Linux and not Debian - ÔÏ ÖÅ ÓÁÍÏÅ. åÓÌÉ × ÔÅÇÅ ÅÓÔØ +ÐÒÏÂÅÌ ("í£ÒÔ×ÏÅ ÍÏÒÅ", "þ£ÒÎÏÅ ÍÏÒÅ") - ÅÇÏ ÎÁÄÏ ÚÁÍÅÎÉÔØ ÎÁ ÐÏÄÞ£ÒËÉ×ÁÎÉÅ; +ÎÁÐÒÉÍÅÒ: "éÚÒÁÉÌØ é îå Í£ÒÔ×ÏÅ_ÍÏÒÅ", "ëÁ×ËÁÚ É ÎÅ Þ£ÒÎÏÅ_ÍÏÒÅ". +

+ +
+
+ + +
+
+ +
+"""] + +for i, (count, tag, links) in enumerate(all_tags): + new_text.append("""\ +
%s (%d)
+""" % (encode_tag(tag), tag, count)) + + first = all_tags[0][1] + if i == 0: + prev = None + else: + prev = all_tags[i-1][1] + if i >= len(all_tags)-1: + next = None + else: + next = all_tags[i+1][1] + last = all_tags[-1][1] + + tag_text = ["""\ +## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. +#encoding koi8-r +#extends phd_site +#implements respond +#attr $Title = "Oleg Broytman's blog: tag %s" +#attr $Description = "Broytman Russian Blog Tag %s Index Document" +""" % (tag, tag)] + + tag_text.append("""\ +#attr $First = "%s" +""" % first) + + if prev: + tag_text.append("""\ +#attr $Prev = "%s" +""" % prev) + + if next: + tag_text.append("""\ +#attr $Next = "%s" +""" % next) + + tag_text.append("""\ +#attr $Last = "%s" +""" % last) + + tag_text.append("""\ +#attr $Copyright = 2006 +## +#def body_html +

%s

+ + +#end def +$phd_site.respond(self) +""") + write_if_changed(os.path.join(blog_root, "tags", + tag.replace(' ', '_') + ".tmpl"), + ''.join(tag_text)) + +new_text.append("""\ +
+#end def +$phd_site.respond(self) +""") +write_if_changed(os.path.join(blog_root, "tags", "index.tmpl"), ''.join(new_text)) + + +class HTMLDone(Exception): pass + + +class FirstPHTMLParser(_HTMLParser): + def __init__(self): + _HTMLParser.__init__(self) + self.first_p = None + + def start_p(self, attrs): + self.accumulator = '

' + + def end_p(self): + self.first_p = self.accumulator + '

' + raise HTMLDone() + +def get_first_p(body): + parser = FirstPHTMLParser() + + try: + parser.feed(body) + except HTMLDone: + pass + + try: + parser.close() + except HTMLDone: + pass + + return parser.first_p + + +class AbsURLHTMLParser(_HTMLParser): + def __init__(self, base): + _HTMLParser.__init__(self) + self.base = base + + def start_a(self, attrs): + self.accumulator += '