]> git.phdru.name Git - phdru.name/phdru.name.git/blob - phd_pp.py
Moved html parsing from phd_pp.py to reindex_blog.py.
[phdru.name/phdru.name.git] / phd_pp.py
1 import os, re, time, urllib
2 from Cheetah.Template import Template
3
4
5 url_re = r"(((https?|ftp|gopher|telnet)://|(mailto|file|news|about|ed2k|irc|sip|magnet):)[^' \t<>\"]+|(www|web|w3)[A-Za-z0-9_-]*\.[A-Za-z0-9._-]+\.[^' \t<>\"]+)[A-Za-z0-9/]"
6
7 def _url2href(match):
8    url = match.group(0)
9    return '<a href="%s">%s</a>' % (url, url)
10
11
12 full_dirs = len(os.getcwd().split('/')) + 1
13
14 class phd_pp(Template):
15    def __init__(self, *args, **kw):
16       if not hasattr(self, "_fileBaseName"):
17          self._fileDirName, self._fileBaseName = os.path.split(os.path.abspath(self._CHEETAH_src))
18       Template.__init__(self, *args, **kw)
19       directories = self._fileDirName.split('/')[full_dirs:] # remove directories up to "./files"
20       dirs_to_root = len(directories)
21       if dirs_to_root:
22          root = "../"*dirs_to_root
23       else:
24          root = ''
25       self.root = root
26       path = '/'.join(directories) + '/' + \
27          self._fileBaseName.replace(".tmpl", ".html")
28       if path[0] <> '/': path = '/' + path
29       self.path = path
30
31    def copyright(self, start_year):
32       this_year = time.localtime()[0]
33       if start_year >= this_year:
34          return this_year
35       if start_year == this_year - 1:
36          return "%s, %s" % (start_year, this_year)
37       return "%s-%s" % (start_year, this_year)
38
39
40    def body(self):
41       if hasattr(self, "body_html"):
42          body = self.body_html()
43       if hasattr(self, "body_text"):
44          body = self.text2html()
45       if hasattr(self, "body_rst"):
46          body = self.rst2html()
47       return body
48
49    def text2html(self):
50       body = re.sub(url_re, _url2href, self.body_text())
51
52       paragraphs = body.split("\n\n")
53
54       new_paras = []
55       for p in paragraphs:
56          parts = p.split("\n   ")
57          parts[0] = parts[0].strip()
58          new_paras.append('\n</p>\n<p>\n'.join(parts))
59
60       if self.Title:
61          title = "<h1>%s</h1>\n\n" % self.Title
62       else:
63          title = ''
64
65       body = '\n</p>\n\n<p class="head">\n'.join(new_paras)
66       return "%s<p>%s</p>" % (title, body)
67
68    def rst2html(self):
69       from docutils.core import publish_parts
70       from m_lib.defenc import default_encoding as encoding
71
72       parts = publish_parts(self.body_rst(), writer_name="html")
73
74       title = parts["title"].encode(encoding) or self.Title
75       if title:
76          title = "<h1>%s</h1>" % title
77
78       subtitle = parts["subtitle"].encode(encoding)
79       if subtitle:
80          subtitle = "<h2>%s</h2>" % subtitle
81
82       body = parts["body"].encode(encoding)
83       parts = [part for part in (title, subtitle, body) if part]
84       return "\n\n".join(parts)
85
86
87    def img_thumbnail_800_1024(self, img_name):
88       return """\
89 <img src="%(img_name)s-thumbnail.jpg" alt="%(img_name)s-thumbnail.jpg" /><br />
90 <a href="%(img_name)s-800x600.jpg">800x600</a>, <a href="%(img_name)s-1024x800.jpg">1024x800</a>""" % {"img_name": img_name}
91
92    def wikipedia(self, query):
93       return "http://en.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',')
94
95    def wikipedia_ru(self, query):
96       return "http://ru.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',')
97
98    def nigma(self, query):
99        return "http://www.nigma.ru/index.php?s=%s" % quote_string(query)
100
101    search = nigma
102
103    def yandex(self, query):
104       return "http://www.yandex.ru/yandsearch?text=%s&rpt=rad" % quote_string(query, "cp1251")
105
106    def google(self, query):
107       return "http://www.google.com/search?hl=en&ie=utf-8&oe=utf-8&q=%s" % quote_string(query)
108
109    def google_ru(self, query):
110       return "http://www.google.ru/search?hl=ru&ie=utf-8&oe=utf-8&q=%s" % quote_string(query)
111
112 def quote_string(s, to_encoding="utf-8", ext_safe=''):
113    return urllib.quote(unicode(s, "koi8-r").encode(to_encoding), '/' + ext_safe)