X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html.py;h=57ad6a0cf69c89247281700915ced9854a3ef8ba;hb=d655a6006176ecd02b8bbf6fe942a8f518f30e13;hp=2207ea202755b72ec40844e9a095f4f893ebdc18;hpb=9edef5a570ea28f7c06d8d92fdd70afe0ea86809;p=bookmarks_db.git diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 2207ea2..57ad6a0 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -2,7 +2,7 @@ """ HTML Parser - Written by BroytMann. Copyright (C) 1997-2005 PhiloSoft Design + Written by BroytMann. Copyright (C) 1997-2007 PhiloSoft Design """ @@ -71,6 +71,24 @@ class HTMLParser(_HTMLParser): self.title = self.accumulator + def do_link(self, attrs): + has_icon = False + href = None + + for attrname, value in attrs: + if value: + value = value.strip().lower() + if (attrname == 'rel') and (value in ('icon', 'shortcut icon')): + has_icon = True + elif attrname == 'href': + href = value + + if has_icon: + self.icon = href + else: + self.icon = None + + import re entity_re = re.compile("(&#[0-9]+;)") @@ -135,3 +153,4 @@ if __name__ == '__main__': print parser.charset print parser.title print parser.refresh + print parser.icon