m_lib/net/www/xml.py

   1 """XML parsers"""
   2
   3
   4 import re, xmllib
   5 illegal = re.compile('[^\t\r\n -\377]') # illegal chars in content
   6 xmllib.illegal = illegal # allow cyrillic characters in XML
   7
   8
   9 def join_xml_attrs(attrs):
  10    attr_list = ['']
  11    for attrname, value in attrs.items():
  12       attr_list.append('%s="%s"' % (attrname, string.strip(value)))
  13
  14    return string.join(attr_list, " ")
  15
  16
  17 class XMLParser(xmllib.XMLParser):
  18    def __init__(self):
  19       xmllib.XMLParser.__init__(self)
  20       self.accumulator = ""
  21
  22
  23    def handle_data(self, data):
  24       if data:
  25          self.accumulator = "%s%s" % (self.accumulator, data)
  26
  27    def handle_comment(self, data):
  28       if data:
  29          self.accumulator = "%s<!--%s-->" % (self.accumulator, data)
  30
  31
  32    # Pass other tags unmodified
  33    def unknown_starttag(self, tag, attrs):
  34       self.accumulator = "%s<%s%s>" % (self.accumulator, tag, join_xml_attrs(attrs))
  35
  36    def unknown_endtag(self, tag):
  37       self.accumulator = "%s</%s>" % (self.accumulator, tag)
  38
  39
  40 class XMLFilter(XMLParser):
  41    def handle_comment(self, data):
  42       pass
  43
  44    # Filter out all tags
  45    def unknown_starttag(self, tag, attrs):
  46       pass
  47
  48    def unknown_endtag(self, tag):
  49       pass
  50
  51 def filter_xml(str, filter=None):
  52     "Process XML using some XML parser/filter"
  53
  54     if filter is None:
  55        filter = XMLFilter()
  56
  57     filter.feed(str)
  58     return filter.accumulator