import os, re, time, urllib
+from HTMLParser import HTMLParseError
from Cheetah.Template import Template
+from m_lib.net.www.html import HTMLParser as _HTMLParser
+
url_re = r"(((https?|ftp|gopher|telnet)://|(mailto|file|news|about|ed2k|irc|sip|magnet):)[^' \t<>\"]+|(www|web|w3)[A-Za-z0-9_-]*\.[A-Za-z0-9._-]+\.[^' \t<>\"]+)[A-Za-z0-9/]"
url = match.group(0)
return '<a href="%s">%s</a>' % (url, url)
+
full_dirs = len(os.getcwd().split('/')) + 1
class phd_pp(Template):
def body(self):
if hasattr(self, "body_html"):
- return self.body_html()
+ body = self.body_html()
if hasattr(self, "body_text"):
- return self.text2html()
+ body = self.text2html()
if hasattr(self, "body_rst"):
- return self.rst2html()
+ body = self.rst2html()
+ self.Body = body
+ return body
def text2html(self):
body = re.sub(url_re, _url2href, self.body_text())
def rst2html(self):
from docutils.core import publish_parts
- from locale import getpreferredencoding
- encoding = getpreferredencoding()
+ from m_lib.defenc import default_encoding as encoding
parts = publish_parts(self.body_rst(), writer_name="html")
parts = [part for part in (title, subtitle, body) if part]
return "\n\n".join(parts)
+ def get_first_p(self):
+ parser = HTMLParser()
+
+ try:
+ parser.feed(self.body())
+ except (HTMLParseError, HTMLHeadDone):
+ pass
+
+ try:
+ parser.close()
+ except (HTMLParseError, HTMLHeadDone):
+ pass
+
+ return parser.first_p
+
def img_thumbnail_800_1024(self, img_name):
return """\
<img src="%(img_name)s-thumbnail.jpg" alt="%(img_name)s-thumbnail.jpg" /><br />
def quote_string(s, to_encoding="utf-8", ext_safe=''):
return urllib.quote(unicode(s, "koi8-r").encode(to_encoding), '/' + ext_safe)
+
+
+class HTMLHeadDone(Exception): pass
+
+class HTMLParser(_HTMLParser):
+ def __init__(self, charset=None):
+ _HTMLParser.__init__(self)
+ self.first_p = None
+
+ def start_p(self, attrs):
+ self.accumulator = '<p>'
+
+ def end_p(self):
+ self.first_p = self.accumulator + '</p>'
+ raise HTMLHeadDone()
blog = {}
years = {}
+# excerpts nd bodies are dictionaries mapping file => excerpt/body
+
+excerpts = {}
+bodies = {}
+
# Walk the directory recursively
for dirpath, dirs, files in os.walk(blog_root):
d = os.path.basename(dirpath)
if day not in days: days.append(day)
+ file = file[:-len("tmpl")] + "html"
+ key = (year, month, day, file)
+ excerpts[key] = template.get_first_p()
+ bodies[key] = template.Body
# Need to save the blog?
if blog <> old_blog:
#attr $Title = "Oleg Broytman's blog"
#attr $Description = "Broytman Russian Blog Index Document"
#attr $Copyright = %(cyear)s
-#attr $alternates = (("News [Atom 1.0]", "application/atom+xml", "atom_10.xml"),
- ("News [RSS 2.0]", "application/rss+xml", "rss_20.xml")
+#attr $alternates = (("Новости [Atom 1.0] только заголовки", "application/atom+xml", "atom_10_titles.xml"),
+ ("Новости [Atom 1.0]", "application/atom+xml", "atom_10.xml"),
+ ("Новости [Atom 1.0] полные тексты", "application/atom+xml", "atom_10_full.xml"),
+ ("Новости [RSS 2.0] только заголовки", "application/rss+xml", "rss_20_titles.xml"),
+ ("Новости [RSS 2.0]", "application/rss+xml", "rss_20.xml"),
+ ("Новости [RSS 2.0] полные тексты", "application/rss+xml", "rss_20_full.xml"),
)
##
#def body_html
<hr>
<p class="head">Новостевая лента в форматах
-<A HREF="atom_10.xml">Atom 1.0 <img src="../../Graphics/atom_10.jpg" border=0></A>
-и <A HREF="rss_20.xml">RSS 2.0 <img src="../../Graphics/rss_20.jpg" border=0></A>.
+<img src="../../Graphics/atom_10.jpg" border=0>
+<A HREF="atom_10_titles.xml">Atom 1.0 только заголовки</A> /
+<A HREF="atom_10.xml">Atom 1.0</A> /
+<A HREF="atom_10_full.xml">Atom 1.0 полные тексты</A>
+и <img src="../../Graphics/rss_20.jpg" border=0>
+<A HREF="rss_20_titles.xml">RSS 2.0 только заголовки</A> /
+<A HREF="rss_20.xml">RSS 2.0</A> /
+<A HREF="rss_20_full.xml">RSS 2.0 полные тексты</A>.
</p>
""")
items.append(item)
item.baseURL = baseURL
item.categoryList = tags
+ item.excerpt = excerpts[(year, month, day, file)]
+ item.body = bodies[(year, month, day, file)]
namespace = {
"title": "Oleg Broytman's blog",
write_if_changed(os.path.join(blog_root, "atom_10.xml"), atom_tmpl)
rss_tmpl = str(rss_20(searchList=[namespace]))
write_if_changed(os.path.join(blog_root, "rss_20.xml"), rss_tmpl)
+
+for item in items:
+ item.excerpt = None
+
+atom_tmpl = str(atom_10(searchList=[namespace]))
+write_if_changed(os.path.join(blog_root, "atom_10_titles.xml"), atom_tmpl)
+rss_tmpl = str(rss_20(searchList=[namespace]))
+write_if_changed(os.path.join(blog_root, "rss_20_titles.xml"), rss_tmpl)
+
+for item in items:
+ item.content = item.body
+
+atom_tmpl = str(atom_10(searchList=[namespace]))
+write_if_changed(os.path.join(blog_root, "atom_10_full.xml"), atom_tmpl)
+rss_tmpl = str(rss_20(searchList=[namespace]))
+write_if_changed(os.path.join(blog_root, "rss_20_full.xml"), rss_tmpl)