X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=phd_pp.py;h=95e90db415b1901106c061d90b3111c11793cfab;hb=c6eb1e00be821ab12e0edaa622e75eff1ec8abd2;hp=b3ad03aec704cbc5f27fc3f0cf9b108debc736b6;hpb=f95ee475c554f22998041186e5daf26872fa8b69;p=phdru.name%2Fphdru.name.git diff --git a/phd_pp.py b/phd_pp.py index b3ad03a..95e90db 100644 --- a/phd_pp.py +++ b/phd_pp.py @@ -1,13 +1,10 @@ import os, re, time, urllib +from HTMLParser import HTMLParseError from Cheetah.Template import Template +from m_lib.net.www.html import HTMLParser as _HTMLParser -# Copied from ZWiki - -urlchars = r'[A-Za-z0-9/:@_%~#=&\.\-\?\+\$,]+' -urlendchar = r'[A-Za-z0-9/]' -url = r'["=]?((about|file|ftp|gopher|http|https|mailto|news|telnet):%s)' % \ - (urlchars+urlendchar) +url_re = r"(((https?|ftp|gopher|telnet)://|(mailto|file|news|about|ed2k|irc|sip|magnet):)[^' \t<>\"]+|(www|web|w3)[A-Za-z0-9_-]*\.[A-Za-z0-9._-]+\.[^' \t<>\"]+)[A-Za-z0-9/]" def _url2href(match): url = match.group(0) @@ -43,14 +40,16 @@ class phd_pp(Template): def body(self): if hasattr(self, "body_html"): - return self.body_html() + body = self.body_html() if hasattr(self, "body_text"): - return self.text2html() + body = self.text2html() if hasattr(self, "body_rst"): - return self.rst2html() + body = self.rst2html() + self.Body = body + return body def text2html(self): - body = re.sub(url, _url2href, self.body_text()) + body = re.sub(url_re, _url2href, self.body_text()) paragraphs = body.split("\n\n") @@ -70,8 +69,7 @@ class phd_pp(Template): def rst2html(self): from docutils.core import publish_parts - from locale import getpreferredencoding - encoding = getpreferredencoding() + from m_lib.defenc import default_encoding as encoding parts = publish_parts(self.body_rst(), writer_name="html") @@ -87,23 +85,60 @@ class phd_pp(Template): parts = [part for part in (title, subtitle, body) if part] return "\n\n".join(parts) + def get_first_p(self): + parser = HTMLParser() + + try: + parser.feed(self.body()) + except (HTMLParseError, HTMLHeadDone): + pass + + try: + parser.close() + except (HTMLParseError, HTMLHeadDone): + pass + + return parser.first_p + def img_thumbnail_800_1024(self, img_name): return """\ %(img_name)s-thumbnail.jpg
800x600, 1024x800""" % {"img_name": img_name} - def google_ru(self, query): - return "http://www.google.ru/search?hl=ru&ie=utf-8&oe=utf-8&q=%s" % quote_string(query) + def wikipedia(self, query): + return "http://en.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',') + + def wikipedia_ru(self, query): + return "http://ru.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',') + + def nigma(self, query): + return "http://www.nigma.ru/index.php?s=%s" % quote_string(query) + + search = nigma def yandex(self, query): return "http://www.yandex.ru/yandsearch?text=%s&rpt=rad" % quote_string(query, "cp1251") - def wikipedia(self, query): - return "http://en.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_')) + def google(self, query): + return "http://www.google.com/search?hl=en&ie=utf-8&oe=utf-8&q=%s" % quote_string(query) - def wikipedia_ru(self, query): - return "http://ru.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_')) + def google_ru(self, query): + return "http://www.google.ru/search?hl=ru&ie=utf-8&oe=utf-8&q=%s" % quote_string(query) + +def quote_string(s, to_encoding="utf-8", ext_safe=''): + return urllib.quote(unicode(s, "koi8-r").encode(to_encoding), '/' + ext_safe) + + +class HTMLHeadDone(Exception): pass + +class HTMLParser(_HTMLParser): + def __init__(self, charset=None): + _HTMLParser.__init__(self) + self.first_p = None + def start_p(self, attrs): + self.accumulator = '

' -def quote_string(s, to_encoding="utf-8"): - return urllib.quote(unicode(s, "koi8-r").encode(to_encoding)) + def end_p(self): + self.first_p = self.accumulator + '

' + raise HTMLHeadDone()