X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=phd_pp.py;h=95e90db415b1901106c061d90b3111c11793cfab;hb=c6eb1e00be821ab12e0edaa622e75eff1ec8abd2;hp=b3ad03aec704cbc5f27fc3f0cf9b108debc736b6;hpb=f95ee475c554f22998041186e5daf26872fa8b69;p=phdru.name%2Fphdru.name.git

diff --git a/phd_pp.py b/phd_pp.py
index b3ad03a..95e90db 100644
--- a/phd_pp.py
+++ b/phd_pp.py
@@ -1,13 +1,10 @@
 import os, re, time, urllib
+from HTMLParser import HTMLParseError
 from Cheetah.Template import Template
+from m_lib.net.www.html import HTMLParser as _HTMLParser
 
 
-# Copied from ZWiki
-
-urlchars         = r'[A-Za-z0-9/:@_%~#=&\.\-\?\+\$,]+'
-urlendchar       = r'[A-Za-z0-9/]'
-url              = r'["=]?((about|file|ftp|gopher|http|https|mailto|news|telnet):%s)' % \
-                   (urlchars+urlendchar)
+url_re = r"(((https?|ftp|gopher|telnet)://|(mailto|file|news|about|ed2k|irc|sip|magnet):)[^' \t<>\"]+|(www|web|w3)[A-Za-z0-9_-]*\.[A-Za-z0-9._-]+\.[^' \t<>\"]+)[A-Za-z0-9/]"
 
 def _url2href(match):
    url = match.group(0)
@@ -43,14 +40,16 @@ class phd_pp(Template):
 
    def body(self):
       if hasattr(self, "body_html"):
-         return self.body_html()
+         body = self.body_html()
       if hasattr(self, "body_text"):
-         return self.text2html()
+         body = self.text2html()
       if hasattr(self, "body_rst"):
-         return self.rst2html()
+         body = self.rst2html()
+      self.Body = body
+      return body
 
    def text2html(self):
-      body = re.sub(url, _url2href, self.body_text())
+      body = re.sub(url_re, _url2href, self.body_text())
 
       paragraphs = body.split("\n\n")
 
@@ -70,8 +69,7 @@ class phd_pp(Template):
 
    def rst2html(self):
       from docutils.core import publish_parts
-      from locale import getpreferredencoding
-      encoding = getpreferredencoding()
+      from m_lib.defenc import default_encoding as encoding
 
       parts = publish_parts(self.body_rst(), writer_name="html")
 
@@ -87,23 +85,60 @@ class phd_pp(Template):
       parts = [part for part in (title, subtitle, body) if part]
       return "\n\n".join(parts)
 
+   def get_first_p(self):
+      parser = HTMLParser()
+
+      try:
+         parser.feed(self.body())
+      except (HTMLParseError, HTMLHeadDone):
+         pass
+
+      try:
+         parser.close()
+      except (HTMLParseError, HTMLHeadDone):
+         pass
+
+      return parser.first_p
+
    def img_thumbnail_800_1024(self, img_name):
       return """\
 <img src="%(img_name)s-thumbnail.jpg" alt="%(img_name)s-thumbnail.jpg" /><br />
 <a href="%(img_name)s-800x600.jpg">800x600</a>, <a href="%(img_name)s-1024x800.jpg">1024x800</a>""" % {"img_name": img_name}
 
-   def google_ru(self, query):
-      return "http://www.google.ru/search?hl=ru&ie=utf-8&oe=utf-8&q=%s" % quote_string(query)
+   def wikipedia(self, query):
+      return "http://en.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',')
+
+   def wikipedia_ru(self, query):
+      return "http://ru.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',')
+
+   def nigma(self, query):
+       return "http://www.nigma.ru/index.php?s=%s" % quote_string(query)
+
+   search = nigma
 
    def yandex(self, query):
       return "http://www.yandex.ru/yandsearch?text=%s&rpt=rad" % quote_string(query, "cp1251")
 
-   def wikipedia(self, query):
-      return "http://en.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'))
+   def google(self, query):
+      return "http://www.google.com/search?hl=en&ie=utf-8&oe=utf-8&q=%s" % quote_string(query)
 
-   def wikipedia_ru(self, query):
-      return "http://ru.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'))
+   def google_ru(self, query):
+      return "http://www.google.ru/search?hl=ru&ie=utf-8&oe=utf-8&q=%s" % quote_string(query)
+
+def quote_string(s, to_encoding="utf-8", ext_safe=''):
+   return urllib.quote(unicode(s, "koi8-r").encode(to_encoding), '/' + ext_safe)
+
+
+class HTMLHeadDone(Exception): pass
+
+class HTMLParser(_HTMLParser):
+   def __init__(self, charset=None):
+      _HTMLParser.__init__(self)
+      self.first_p = None
 
+   def start_p(self, attrs):
+      self.accumulator = '<p>'
 
-def quote_string(s, to_encoding="utf-8"):
-   return urllib.quote(unicode(s, "koi8-r").encode(to_encoding))
+   def end_p(self):
+      self.first_p = self.accumulator + '</p>'
+      raise HTMLHeadDone()