X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fparse_html.py;h=57ad6a0cf69c89247281700915ced9854a3ef8ba;hb=d655a6006176ecd02b8bbf6fe942a8f518f30e13;hp=350ad789b5fa34592db37ce6e097de98e8b8dd6a;hpb=62977582003da9f434cd4fd377837935b6f0520c;p=bookmarks_db.git diff --git a/Robots/parse_html.py b/Robots/parse_html.py index 350ad78..57ad6a0 100755 --- a/Robots/parse_html.py +++ b/Robots/parse_html.py @@ -1,8 +1,8 @@ -#! /usr/local/bin/python -O +#! /usr/bin/env python """ HTML Parser - Written by BroytMann. Copyright (C) 1997-2005 PhiloSoft Design + Written by BroytMann. Copyright (C) 1997-2007 PhiloSoft Design """ @@ -71,6 +71,24 @@ class HTMLParser(_HTMLParser): self.title = self.accumulator + def do_link(self, attrs): + has_icon = False + href = None + + for attrname, value in attrs: + if value: + value = value.strip().lower() + if (attrname == 'rel') and (value in ('icon', 'shortcut icon')): + has_icon = True + elif attrname == 'href': + href = value + + if has_icon: + self.icon = href + else: + self.icon = None + + import re entity_re = re.compile("(&#[0-9]+;)") @@ -135,3 +153,4 @@ if __name__ == '__main__': print parser.charset print parser.title print parser.refresh + print parser.icon