phd_pp.py

   1 import os, re, time, urllib
   2 from HTMLParser import HTMLParseError
   3 from Cheetah.Template import Template
   4 from m_lib.net.www.html import HTMLParser as _HTMLParser
   5
   6
   7 url_re = r"(((https?|ftp|gopher|telnet)://|(mailto|file|news|about|ed2k|irc|sip|magnet):)[^' \t<>\"]+|(www|web|w3)[A-Za-z0-9_-]*\.[A-Za-z0-9._-]+\.[^' \t<>\"]+)[A-Za-z0-9/]"
   8
   9 def _url2href(match):
  10    url = match.group(0)
  11    return '<a href="%s">%s</a>' % (url, url)
  12
  13
  14 full_dirs = len(os.getcwd().split('/')) + 1
  15
  16 class phd_pp(Template):
  17    def __init__(self, *args, **kw):
  18       if not hasattr(self, "_fileBaseName"):
  19          self._fileDirName, self._fileBaseName = os.path.split(os.path.abspath(self._CHEETAH_src))
  20       Template.__init__(self, *args, **kw)
  21       directories = self._fileDirName.split('/')[full_dirs:] # remove directories up to "./files"
  22       dirs_to_root = len(directories)
  23       if dirs_to_root:
  24          root = "../"*dirs_to_root
  25       else:
  26          root = ''
  27       self.root = root
  28       path = '/'.join(directories) + '/' + \
  29          self._fileBaseName.replace(".tmpl", ".html")
  30       if path[0] <> '/': path = '/' + path
  31       self.path = path
  32
  33    def copyright(self, start_year):
  34       this_year = time.localtime()[0]
  35       if start_year >= this_year:
  36          return this_year
  37       if start_year == this_year - 1:
  38          return "%s, %s" % (start_year, this_year)
  39       return "%s-%s" % (start_year, this_year)
  40
  41    def body(self):
  42       if hasattr(self, "body_html"):
  43          body = self.body_html()
  44       if hasattr(self, "body_text"):
  45          body = self.text2html()
  46       if hasattr(self, "body_rst"):
  47          body = self.rst2html()
  48       self.Body = body
  49       return body
  50
  51    def text2html(self):
  52       body = re.sub(url_re, _url2href, self.body_text())
  53
  54       paragraphs = body.split("\n\n")
  55
  56       new_paras = []
  57       for p in paragraphs:
  58          parts = p.split("\n   ")
  59          parts[0] = parts[0].strip()
  60          new_paras.append('\n</p>\n<p>\n'.join(parts))
  61
  62       if self.Title:
  63          title = "<h1>%s</h1>\n\n" % self.Title
  64       else:
  65          title = ''
  66
  67       body = '\n</p>\n\n<p class="head">\n'.join(new_paras)
  68       return "%s<p>%s</p>" % (title, body)
  69
  70    def rst2html(self):
  71       from docutils.core import publish_parts
  72       from m_lib.defenc import default_encoding as encoding
  73
  74       parts = publish_parts(self.body_rst(), writer_name="html")
  75
  76       title = parts["title"].encode(encoding) or self.Title
  77       if title:
  78          title = "<h1>%s</h1>" % title
  79
  80       subtitle = parts["subtitle"].encode(encoding)
  81       if subtitle:
  82          subtitle = "<h2>%s</h2>" % subtitle
  83
  84       body = parts["body"].encode(encoding)
  85       parts = [part for part in (title, subtitle, body) if part]
  86       return "\n\n".join(parts)
  87
  88    def get_first_p(self):
  89       parser = HTMLParser()
  90
  91       try:
  92          parser.feed(self.body())
  93       except (HTMLParseError, HTMLHeadDone):
  94          pass
  95
  96       try:
  97          parser.close()
  98       except (HTMLParseError, HTMLHeadDone):
  99          pass
 100
 101       return parser.first_p
 102
 103    def img_thumbnail_800_1024(self, img_name):
 104       return """\
 105 <img src="%(img_name)s-thumbnail.jpg" alt="%(img_name)s-thumbnail.jpg" /><br />
 106 <a href="%(img_name)s-800x600.jpg">800x600</a>, <a href="%(img_name)s-1024x800.jpg">1024x800</a>""" % {"img_name": img_name}
 107
 108    def wikipedia(self, query):
 109       return "http://en.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',')
 110
 111    def wikipedia_ru(self, query):
 112       return "http://ru.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',')
 113
 114    def nigma(self, query):
 115        return "http://www.nigma.ru/index.php?s=%s" % quote_string(query)
 116
 117    search = nigma
 118
 119    def yandex(self, query):
 120       return "http://www.yandex.ru/yandsearch?text=%s&rpt=rad" % quote_string(query, "cp1251")
 121
 122    def google(self, query):
 123       return "http://www.google.com/search?hl=en&ie=utf-8&oe=utf-8&q=%s" % quote_string(query)
 124
 125    def google_ru(self, query):
 126       return "http://www.google.ru/search?hl=ru&ie=utf-8&oe=utf-8&q=%s" % quote_string(query)
 127
 128 def quote_string(s, to_encoding="utf-8", ext_safe=''):
 129    return urllib.quote(unicode(s, "koi8-r").encode(to_encoding), '/' + ext_safe)
 130
 131
 132 class HTMLHeadDone(Exception): pass
 133
 134 class HTMLParser(_HTMLParser):
 135    def __init__(self, charset=None):
 136       _HTMLParser.__init__(self)
 137       self.first_p = None
 138
 139    def start_p(self, attrs):
 140       self.accumulator = '<p>'
 141
 142    def end_p(self):
 143       self.first_p = self.accumulator + '</p>'
 144       raise HTMLHeadDone()