+from HTMLParser import HTMLParseError
+import cgi
+from urlparse import urljoin
+from m_lib.net.www.html import HTMLParser as _HTMLParser
+
+class HTMLDone(Exception): pass
+
+
+class FirstPHTMLParser(_HTMLParser):
+ def __init__(self):
+ _HTMLParser.__init__(self)
+ self.first_p = None
+
+ def start_p(self, attrs):
+ self.accumulator = '<p>'
+
+ def end_p(self):
+ self.first_p = self.accumulator + '</p>'
+ raise HTMLDone()
+
+def get_first_p(body):
+ parser = FirstPHTMLParser()
+
+ try:
+ parser.feed(body)
+ except (HTMLParseError, HTMLDone):
+ pass
+
+ try:
+ parser.close()
+ except (HTMLParseError, HTMLDone):
+ pass
+
+ return parser.first_p
+
+
+class AbsURLHTMLParser(_HTMLParser):
+ def __init__(self, base):
+ _HTMLParser.__init__(self)
+ self.base = base
+
+ def start_a(self, attrs):
+ self.accumulator += '<a'
+ for attrname, value in attrs:
+ value = cgi.escape(value, True)
+ if isinstance(value, unicode):
+ value = value.encode('koi8-r')
+ if attrname == 'href':
+ self.accumulator += ' href="%s"' % urljoin(self.base, value)
+ else:
+ self.accumulator += ' %s="%s"' % (attrname, value)
+ self.accumulator += '>'
+
+ def end_a(self):
+ self.accumulator += '</a>'
+
+ def start_img(self, attrs):
+ self.accumulator += '<img'
+ for attrname, value in attrs:
+ value = cgi.escape(value, True)
+ if attrname == 'src':
+ self.accumulator += ' src="%s"' % urljoin(self.base, value)
+ else:
+ self.accumulator += ' %s="%s"' % (attrname, value)
+ self.accumulator += '>'
+
+ def end_img(self):
+ pass
+
+def absolute_urls(body, base):
+ parser = AbsURLHTMLParser(base)
+
+ try:
+ parser.feed(body)
+ except HTMLParseError:
+ pass
+
+ try:
+ parser.close()
+ except HTMLParseError:
+ pass
+
+ return parser.accumulator
+
+