5 illegal = re.compile('[^\t\r\n -\377]') # illegal chars in content
6 xmllib.illegal = illegal # allow cyrillic characters in XML
9 def join_xml_attrs(attrs):
11 for attrname, value in attrs.items():
12 attr_list.append('%s="%s"' % (attrname, string.strip(value)))
14 return string.join(attr_list, " ")
17 class XMLParser(xmllib.XMLParser):
19 xmllib.XMLParser.__init__(self)
23 def handle_data(self, data):
25 self.accumulator = "%s%s" % (self.accumulator, data)
27 def handle_comment(self, data):
29 self.accumulator = "%s<!--%s-->" % (self.accumulator, data)
32 # Pass other tags unmodified
33 def unknown_starttag(self, tag, attrs):
34 self.accumulator = "%s<%s%s>" % (self.accumulator, tag, join_xml_attrs(attrs))
36 def unknown_endtag(self, tag):
37 self.accumulator = "%s</%s>" % (self.accumulator, tag)
40 class XMLFilter(XMLParser):
41 def handle_comment(self, data):
45 def unknown_starttag(self, tag, attrs):
48 def unknown_endtag(self, tag):
51 def filter_xml(str, filter=None):
52 "Process XML using some XML parser/filter"
58 return filter.accumulator