X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=reindex_blog.py;h=a4667b01c56edd9901d9fda68f383c80b3b0d647;hb=a063dfdd704ebdfefb7bac1d5ab4ee4bca0763df;hp=67461c0a89d5ee65ad3f5d47856d6b9900939e6f;hpb=e0b73283707ea4d35a2c511435f1e2dbd5c747f7;p=phdru.name%2Fphdru.name.git diff --git a/reindex_blog.py b/reindex_blog.py index 67461c0..a4667b0 100755 --- a/reindex_blog.py +++ b/reindex_blog.py @@ -7,8 +7,12 @@ __date__ = "$Date$"[7:-2] __author__ = "Oleg BroytMann " __copyright__ = "Copyright (C) 2006 PhiloSoft Design" + import sys, os -from glob import glob + +blog_data_root = sys.argv[1] +blog_root = sys.argv[2] +blog_filename = os.path.join(blog_data_root, "blog_dict.pickle") try: import cPickle as pickle @@ -17,55 +21,71 @@ except ImportError: from Cheetah.Template import Template -blog_filename = sys.argv[1] + +# Load old blog + try: blog_file = open(blog_filename, "rb") except IOError: - blog = {} + old_blog = {} else: - blog = pickle.load(blog_file) + old_blog = pickle.load(blog_file) blog_file.close() + +# blog is a dictionary mapping +# (year, month, day) => [list of (file, title, lead, tags)] + +blog = {} years = {} -months = sys.argv[2:] -isdir = os.path.isdir - -if not months: - for year in os.listdir(os.curdir): - if isdir(year): - years[year] = {} - for month in os.listdir(year): - m = os.path.join(year, month) - if isdir(m): - months.append(m) - -days = [] -for month in months: - year, m = month.split(os.sep) - if year not in years: - years[year] = {} - years[year][m] = days_of_month = [] - for day in os.listdir(month): - d = os.path.join(month, day) - if isdir(d): - days.append(d) - days_of_month.append(d) - -for day in days: - for tmpl in glob(os.path.join(day, "*.tmpl")): - template = Template(file=tmpl) + +# Walk the directory recursively +for dirpath, dirs, files in os.walk(blog_root): + d = os.path.basename(dirpath) + if not d.startswith("20") and not d.isdigit(): + continue + for file in files: + if not file.endswith(".tmpl"): + continue + fullpath = os.path.join(dirpath, file) + template = Template(file=fullpath) title_parts = template.Title.split() title = ' '.join(title_parts[6:]) lead = getattr(template, "Lead", None) + tags = template.Tag + if isinstance(tags, basestring): + tags = (tags,) + if title: - day_parts = day.split(os.sep) - blog[tuple(day_parts)] = (title, os.path.basename(tmpl), lead) + key = year, month, day = tuple(dirpath[len(blog_root):].split(os.sep)[1:]) + if key in blog: + days = blog[key] + else: + days = blog[key] = [] + days.append((file, title, lead, tags)) -blog_file = open(blog_filename, "wb") -pickle.dump(blog, blog_file, pickle.HIGHEST_PROTOCOL) -blog_file.close() + if year in years: + months = years[year] + else: + months = years[year] = {} + + if month in months: + days = months[month] + else: + days = months[month] = [] + if day not in days: days.append(day) + + +# Need to save the blog? +if blog <> old_blog: + blog_file = open(blog_filename, "wb") + pickle.dump(blog, blog_file, pickle.HIGHEST_PROTOCOL) + blog_file.close() + + +# Localized month names import locale locale.setlocale(locale.LC_ALL, '') @@ -82,38 +102,40 @@ months_names_ru0 = ['', " "ÉÀÌØ", "Á×ÇÕÓÔ", "ÓÅÎÔÑÂÒØ", "ÏËÔÑÂÒØ", "ÎÏÑÂÒØ", "ÄÅËÁÂÒØ" ] +from news import write_if_changed -def write_template(year, month, titles): - index_name = os.path.join(year, month, "index.tmpl") - try: - index_tmpl = open(index_name, 'r') - old_text = index_tmpl.read() - index_tmpl.close() - except IOError: - old_text = None - new_text = [] - show_year = not year - show_month = not month +def write_template(level, year, month, day, titles, tags=None): + path = [blog_root] + if level >= 1: + path.append(year) + if level >= 2: + path.append(month) + if level == 3: + path.append(day) + path.append("index.tmpl") + index_name = os.path.join(*path) - new_text.append("""\ + new_text = ["""\ ## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. #extends phd_pp_ru #implements respond -""") +"""] - if show_year: + if level == 0: new_text.append("""\ #attr $Title = "Oleg BroytMann's blog" #attr $Description = "BroytMann Russian Blog Index Document" #attr $Copyright = %(cyear)s +#attr $alternates = (("application/atom+xml", "News [Atom 1.0]", "atom_10.xml"), + ("application/rss+xml", "News [RSS 2.0]", "rss_20.xml") +) ## #def body_html

öÕÒÎÁÌ

""" % {"cyear": year or 2005}) - elif show_month: - + elif level == 1: new_text.append("""\ #attr $Title = "Oleg BroytMann's blog: %(year)s" #attr $Description = "BroytMann Russian Blog %(year)s Index Document" @@ -123,9 +145,8 @@ def write_template(year, month, titles):

öÕÒÎÁÌ: %(year)s

""" % {"year": year, "cyear": year or 2005}) - else: - - month = int(month) + elif level == 2: + imonth = int(month) new_text.append("""\ #attr $Title = "Oleg BroytMann's blog: %(month_abbr_en)s %(year)s" #attr $Description = "BroytMann Russian Blog %(month_name_en)s %(year)s Index Document" @@ -135,27 +156,55 @@ def write_template(year, month, titles):

öÕÒÎÁÌ: %(month_name_ru0)s %(year)s

""" % { "year": year, "cyear": year or 2005, - "month_abbr_en": months_abbrs_en[month], "month_name_en": months_names_en[month], - "month_name_ru0": months_names_ru0[month], + "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth], + "month_name_ru0": months_names_ru0[imonth], + }) + + elif level == 3: + iday = int(day) + imonth = int(month) + + new_text.append("""\ +#attr $Next = "%s" +""" % titles[0][3]) + + + if len(titles) == 1: + new_text.append("""\ +#attr $refresh = "0; URL=%s" +""" % titles[0][3]) + + new_text.append("""\ +#attr $Title = "Oleg BroytMann's blog: %(day)d %(month_abbr_en)s %(year)s" +#attr $Description = "BroytMann Russian Blog %(day)d %(month_name_en)s %(year)s Index Document" +#attr $Copyright = %(cyear)s +## +#def body_html +

öÕÒÎÁÌ: %(day)d %(month_name_ru)s %(year)s

+""" % { + "year": year, "cyear": year or 2005, + "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth], + "month_name_ru": months_names_ru[imonth], + "day": iday }) save_titles = titles[:] titles.reverse() save_day = None - for key, tmpl, title, lead in titles: - year, month, day = key + for year, month, day, file, title, lead in titles: href = [] - if show_year: + if level == 0: href.append(year) - if show_month: + if level <= 1: href.append(month) - href.append(day) - href.append(tmpl) + if level <= 2: + href.append(day) + href.append(file) href = '/'.join(href) if day[0] == '0': day = day[1:] if save_day <> day: - if show_year: + if level == 0: new_text.append('\n

%s %s %s

' % (day, months_names_ru[int(month)], year)) else: new_text.append('\n

%s %s

' % (day, months_names_ru[int(month)])) @@ -170,17 +219,37 @@ def write_template(year, month, titles):

''' % (lead, href, title)) - if show_year: + if level == 0: + new_text.append(""" +
+ +

îÏ×ÏÓÔÅ×ÁÑ ÌÅÎÔÁ × ÆÏÒÍÁÔÁÈ +Atom 1.0 RSS 2.0 . +

+""") + years = {} - for key, tmpl, title, lead in save_titles: - year, month, day = key + for year, month, day, file, title, lead in save_titles: years[year] = True - first_year = True new_text.append(''' -
+

ôÅÇÉ: +''') + first_tag = True + for count, tag, links in all_tags: + if first_tag: + first_tag = False + else: + new_text.append(' - ') + new_text.append("""%s (%d)""" % (tag, tag, count)) + new_text.append(''' +

+''') -

+ new_text.append(''' +

ðÏ ÇÏÄÁÍ: ''') + first_year = True for year in sorted(years.keys()): if first_year: first_year = False @@ -191,57 +260,169 @@ def write_template(year, month, titles):

''') + new_text.append(""" +
+

öö +""") + new_text.append("""\ #end def $phd_pp_ru.respond(self) """) - new_text = ''.join(new_text) - if old_text <> new_text: - print "Writing", index_name - index_tmpl = open(index_name, 'w') - index_tmpl.write(new_text) - index_tmpl.close() + write_if_changed(index_name, ''.join(new_text)) -def translate(tmpl): - if tmpl == "index.tmpl": tmpl = '' - if tmpl.endswith(".tmpl"): tmpl = tmpl[:-len("tmpl")] + "html" - return tmpl - - -all_years = {} +all_tags = {} all_titles = [] -for key in sorted(blog.keys()): - year, month, day = key - if year in all_years: - year_d = all_years[year] - else: - all_years[year] = year_d = {} - if month in year_d: - days_of_month = year_d[month] - else: - year_d[month] = days_of_month = [] - d = os.path.join(year, month, day) - days_of_month.append(d) - title, tmpl, lead = blog[key] - all_titles.append((key, translate(tmpl), title, lead)) -all_titles = all_titles[-20:] - +all_titles_tags = [] for year in sorted(years.keys()): year_titles = [] - months = all_years[year] + months = years[year] for month in sorted(months.keys()): month_titles = [] for day in sorted(months[month]): - day_parts = day.split(os.sep) - key = tuple(day_parts) + day_titles = [] + key = year, month, day if key in blog: - title, tmpl, lead = blog[key] - tmpl = translate(tmpl) - year_titles.append((key, tmpl, title, lead)) - month_titles.append((key, tmpl, title, lead)) - write_template(year, month, month_titles) - write_template(year, '', year_titles) -write_template('', '', all_titles) + for file, title, lead, tags in blog[key]: + if file.endswith(".tmpl"): file = file[:-len("tmpl")] + "html" + value = (year, month, day, file, title, lead) + all_titles_tags.append((year, month, day, file, title, lead, tags)) + all_titles.append(value) + year_titles.append(value) + month_titles.append(value) + day_titles.append(value) + for tag in tags: + if tag in all_tags: + tag_links = all_tags[tag] + else: + tag_links = all_tags[tag] = [] + tag_links.append(value) + write_template(3, year, month, day, day_titles) + write_template(2, year, month, day, month_titles) + write_template(1, year, month, day, year_titles) + +def by_count_rev_tag_link(t1, t2): + """Sort all_tags by count in descending order, + and by tags and links in ascending order + """ + r = cmp(t1[0], t2[0]) + if r: + return -r + return cmp((t1[1], t1[2]), (t2[1], t2[2])) + +all_tags = [(len(links), tag, links) for (tag, links) in all_tags.items()] +all_tags.sort(by_count_rev_tag_link) + +write_template(0, year, month, day, all_titles[-20:], all_tags) + +new_text = ["""\ +## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. +#extends phd_pp_ru +#implements respond +#attr $Title = "Oleg BroytMann's blog: tags" +#attr $Description = "BroytMann Russian Blog Tags Index Document" +#attr $Copyright = 2006 +## +#def body_html +

ôÅÇÉ

+ +

+

+"""] + +for count, tag, links in all_tags: + new_text.append("""\ +
%s (%d)
+""" % (tag, tag, count)) + + tag_text = ["""\ +## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. +#extends phd_pp_ru +#implements respond +#attr $Title = "Oleg BroytMann's blog: tag %s" +#attr $Description = "BroytMann Russian Blog Tag %s Index Document" +#attr $Copyright = 2006 +## +#def body_html +

%s

+ +

+

+

+#end def +$phd_pp_ru.respond(self) +""") + write_if_changed(os.path.join(blog_root, "tags", tag+".tmpl"), ''.join(tag_text)) + +new_text.append("""\ +
+

+#end def +$phd_pp_ru.respond(self) +""") +write_if_changed(os.path.join(blog_root, "tags", "index.tmpl"), ''.join(new_text)) + + +from atom_10 import atom_10 +from rss_20 import rss_20 +from news import NewsItem + +baseURL = "http://phd.pp.ru/" + blog_root + +items = [] +for item in tuple(reversed(all_titles_tags))[:10]: + year, month, day, file, title, lead, tags = item + if lead: + lead = lead + ' ' + else: + lead = '' + item = NewsItem( + "%s-%s-%s" % (year, month, day), + "%s%s" % (lead, title), + "%s/%s/%s/%s" % (year, month, day, file) + ) + items.append(item) + item.baseURL = baseURL + item.categoryList = tags + +namespace = { + "title": "Oleg Broytmann's blog", + "baseURL": baseURL, + "indexFile": "", + "description": "", + "lang": "ru", + "author": "Oleg Broytmann", + "email": "phd@phd.pp.ru", + "generator": os.path.basename(sys.argv[0]), + "posts": items, +} + +# For english dates +locale.setlocale(locale.LC_TIME, 'C') + +atom_tmpl = str(atom_10(searchList=[namespace])) +write_if_changed(os.path.join(blog_root, "atom_10.xml"), atom_tmpl) +rss_tmpl = str(rss_20(searchList=[namespace])) +write_if_changed(os.path.join(blog_root, "rss_20.xml"), rss_tmpl)