git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@366
fdd5c36f-1aea-0310-aeeb-
c58d7e2b6c23
Fixed a bug in handling place: URIs (do not append '//').
Fixed a bug in handling place: URIs (do not append '//').
+ Extract html redirect even if ther is no title.
+
WHAT'S NEW in version 4.5.1 (2011-12-28).
WHAT'S NEW in version 4.5.1 (2011-12-28).
# Lookup TITLE in the root
title = root.title
# Lookup TITLE in the root
title = root.title
- if title is None:
- return None
-
- if title.string:
- title = title.string
- else:
- parts = []
- for part in title:
- if not isinstance(part, basestring):
- part = unicode(part)
- parts.append(part.strip())
- title = ''.join(parts)
+ if title is not None:
+ if title.string:
+ title = title.string
+ else:
+ parts = []
+ for part in title:
+ if not isinstance(part, basestring):
+ part = unicode(part)
+ parts.append(part.strip())
+ title = ''.join(parts)
meta = head.find(_find_contenttype, recursive=False)
if meta:
meta = head.find(_find_contenttype, recursive=False)
if meta:
else:
meta_charset = False
else:
meta_charset = False
- if _charset or meta_charset:
+ if title and (_charset or meta_charset):
title = title.encode(_charset or meta_charset)
meta = head.find(_find_refresh, recursive=False)
title = title.encode(_charset or meta_charset)
meta = head.find(_find_refresh, recursive=False)
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
return HTMLParser(_charset, meta_charset, title, refresh, icon)
def _find_contenttype(Tag):
return HTMLParser(_charset, meta_charset, title, refresh, icon)
def _find_contenttype(Tag):
title = html_tree.findtext('head/title')
if title is None:
title = html_tree.findtext('title')
title = html_tree.findtext('head/title')
if title is None:
title = html_tree.findtext('title')
- if title is None:
- return None
meta = html_tree.findall('head/meta')
for m in meta:
meta = html_tree.findall('head/meta')
for m in meta:
else:
meta_charset = False
else:
meta_charset = False
- if charset or meta_charset:
+ if title and (charset or meta_charset):
title = title.encode(charset or meta_charset)
for m in meta:
title = title.encode(charset or meta_charset)
for m in meta:
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
return HTMLParser(charset, meta_charset, title, refresh, icon)
return HTMLParser(charset, meta_charset, title, refresh, icon)
- if title is None:
- return None
-
for node in head.childNodes:
if node.name == 'meta' and \
('http-equiv' in node.attributes) and \
for node in head.childNodes:
if node.name == 'meta' and \
('http-equiv' in node.attributes) and \
if not charset:
charset = parser.tokenizer.stream.charEncoding[0]
if not charset:
charset = parser.tokenizer.stream.charEncoding[0]
- if charset or meta_charset:
+ if title and (charset or meta_charset):
title = title.encode(charset or meta_charset)
for node in head.childNodes:
title = title.encode(charset or meta_charset)
for node in head.childNodes:
- if title is None:
- return None
-
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
return HTMLParser(charset, meta_charset, title, refresh, icon)
return HTMLParser(charset, meta_charset, title, refresh, icon)
except (HTMLParseError, HTMLHeadDone):
pass
except (HTMLParseError, HTMLHeadDone):
pass
- if parser.title is None:
+ if (parser.title is None) and (parser.refresh is None) and (parser.icon is None):
title = html_tree.findtext('head/title')
if title is None:
title = html_tree.findtext('title')
title = html_tree.findtext('head/title')
if title is None:
title = html_tree.findtext('title')
- if title is None:
- return None
meta = html_tree.findall('head/meta')
for m in meta:
meta = html_tree.findall('head/meta')
for m in meta:
else:
meta_charset = False
else:
meta_charset = False
- if charset or meta_charset:
+ if title and (charset or meta_charset):
title = title.encode(charset or meta_charset)
for m in meta:
title = title.encode(charset or meta_charset)
for m in meta:
+ if (title is None) and (refresh is None) and (icon is None):
+ return None
return HTMLParser(charset, meta_charset, title, refresh, icon)
return HTMLParser(charset, meta_charset, title, refresh, icon)