Put the first paragraph or the full body to xml feeds.

[phdru.name/phdru.name.git] / phd_pp.py
diff --git a/phd_pp.py b/phd_pp.py

index d731d38a270fe9d13d8c8e8337aefb6d5ed78e3a..95e90db415b1901106c061d90b3111c11793cfab 100644 (file)
--- a/phd_pp.py
+++ b/phd_pp.py
@@ -1,5 +1,8 @@
  import os, re, time, urllib
+from HTMLParser import HTMLParseError
  from Cheetah.Template import Template
+from m_lib.net.www.html import HTMLParser as _HTMLParser
+
  
  url_re = r"(((https?|ftp|gopher|telnet)://|(mailto|file|news|about|ed2k|irc|sip|magnet):)[^' \t<>\"]+|(www|web|w3)[A-Za-z0-9_-]*\.[A-Za-z0-9._-]+\.[^' \t<>\"]+)[A-Za-z0-9/]"
  
@@ -7,6 +10,7 @@ def _url2href(match):
     url = match.group(0)
     return '<a href="%s">%s</a>' % (url, url)
  
+
  full_dirs = len(os.getcwd().split('/')) + 1
  
  class phd_pp(Template):
@@ -36,11 +40,13 @@ class phd_pp(Template):
  
     def body(self):
        if hasattr(self, "body_html"):
-         return self.body_html()
+         body = self.body_html()
        if hasattr(self, "body_text"):
-         return self.text2html()
+         body = self.text2html()
        if hasattr(self, "body_rst"):
-         return self.rst2html()
+         body = self.rst2html()
+      self.Body = body
+      return body
  
     def text2html(self):
        body = re.sub(url_re, _url2href, self.body_text())
@@ -63,8 +69,7 @@ class phd_pp(Template):
  
     def rst2html(self):
        from docutils.core import publish_parts
-      from locale import getpreferredencoding
-      encoding = getpreferredencoding()
+      from m_lib.defenc import default_encoding as encoding
  
        parts = publish_parts(self.body_rst(), writer_name="html")
  
@@ -80,6 +85,21 @@ class phd_pp(Template):
        parts = [part for part in (title, subtitle, body) if part]
        return "\n\n".join(parts)
  
+   def get_first_p(self):
+      parser = HTMLParser()
+
+      try:
+         parser.feed(self.body())
+      except (HTMLParseError, HTMLHeadDone):
+         pass
+
+      try:
+         parser.close()
+      except (HTMLParseError, HTMLHeadDone):
+         pass
+
+      return parser.first_p
+
     def img_thumbnail_800_1024(self, img_name):
        return """\
  <img src="%(img_name)s-thumbnail.jpg" alt="%(img_name)s-thumbnail.jpg" /><br />
@@ -107,3 +127,18 @@ class phd_pp(Template):
  
  def quote_string(s, to_encoding="utf-8", ext_safe=''):
     return urllib.quote(unicode(s, "koi8-r").encode(to_encoding), '/' + ext_safe)
+
+
+class HTMLHeadDone(Exception): pass
+
+class HTMLParser(_HTMLParser):
+   def __init__(self, charset=None):
+      _HTMLParser.__init__(self)
+      self.first_p = None
+
+   def start_p(self, attrs):
+      self.accumulator = '<p>'
+
+   def end_p(self):
+      self.first_p = self.accumulator + '</p>'
+      raise HTMLHeadDone()