Moved html parsing from phd_pp.py to reindex_blog.py.

[phdru.name/phdru.name.git] / phd_pp.py
diff --git a/phd_pp.py b/phd_pp.py

index 95e90db415b1901106c061d90b3111c11793cfab..f9c9a67dca9712272ba8141bf9e7e9d57ea43690 100644 (file)
--- a/phd_pp.py
+++ b/phd_pp.py
@@ -1,7 +1,5 @@
  import os, re, time, urllib
-from HTMLParser import HTMLParseError
  from Cheetah.Template import Template
-from m_lib.net.www.html import HTMLParser as _HTMLParser
  
  
  url_re = r"(((https?|ftp|gopher|telnet)://|(mailto|file|news|about|ed2k|irc|sip|magnet):)[^' \t<>\"]+|(www|web|w3)[A-Za-z0-9_-]*\.[A-Za-z0-9._-]+\.[^' \t<>\"]+)[A-Za-z0-9/]"
@@ -38,6 +36,7 @@ class phd_pp(Template):
           return "%s, %s" % (start_year, this_year)
        return "%s-%s" % (start_year, this_year)
  
+
     def body(self):
        if hasattr(self, "body_html"):
           body = self.body_html()
@@ -45,7 +44,6 @@ class phd_pp(Template):
           body = self.text2html()
        if hasattr(self, "body_rst"):
           body = self.rst2html()
-      self.Body = body
        return body
  
     def text2html(self):
@@ -85,20 +83,6 @@ class phd_pp(Template):
        parts = [part for part in (title, subtitle, body) if part]
        return "\n\n".join(parts)
  
-   def get_first_p(self):
-      parser = HTMLParser()
-
-      try:
-         parser.feed(self.body())
-      except (HTMLParseError, HTMLHeadDone):
-         pass
-
-      try:
-         parser.close()
-      except (HTMLParseError, HTMLHeadDone):
-         pass
-
-      return parser.first_p
  
     def img_thumbnail_800_1024(self, img_name):
        return """\
@@ -127,18 +111,3 @@ class phd_pp(Template):
  
  def quote_string(s, to_encoding="utf-8", ext_safe=''):
     return urllib.quote(unicode(s, "koi8-r").encode(to_encoding), '/' + ext_safe)
-
-
-class HTMLHeadDone(Exception): pass
-
-class HTMLParser(_HTMLParser):
-   def __init__(self, charset=None):
-      _HTMLParser.__init__(self)
-      self.first_p = None
-
-   def start_p(self, attrs):
-      self.accumulator = '<p>'
-
-   def end_p(self):
-      self.first_p = self.accumulator + '</p>'
-      raise HTMLHeadDone()