"""
Simple robot with socket's timeout
- Written by Broytman. Copyright (C) 2000-2010 PhiloSoft Design
+ Written by Broytman. Copyright (C) 2000-2011 PhiloSoft Design
"""
import socket
socket.setdefaulttimeout(900)
-from bkmk_rsimple import robot_simple, get_error
+from .bkmk_rsimple import robot_simple, get_error
class robot_simple_tos(robot_simple):
-#! /usr/bin/env python
"""
HTML Parsers wrapper
parsers = []
try:
- import parse_html_beautifulsoup
- parse_html_beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
+ from . import beautifulsoup
except ImportError:
pass
else:
- parsers.append(parse_html_beautifulsoup.parse_html)
+ beautifulsoup.DEFAULT_CHARSET = DEFAULT_CHARSET
+ parsers.append(beautifulsoup.parse_html)
try:
- from parse_html_lxml import parse_html
+ from .lxml import parse_html
except ImportError:
pass
else:
parsers.append(parse_html)
try:
- from parse_html_htmlparser import parse_html
+ from .htmlparser import parse_html
except ImportError:
pass
else:
parsers.append(parse_html)
try:
- import parse_html_html5
+ from . import html5
except ImportError:
pass
else:
- parsers.append(parse_html_html5.parse_html)
+ parsers.append(html5.parse_html)
# ElementTidy often segfaults
#try:
-# import parse_html_etreetidy
+# from . import etreetidy
#except ImportError:
# pass
#else:
-# parsers.append(parse_html_etreetidy.parse_html)
+# parsers.append(etreetidy.parse_html)
import re
from htmlentitydefs import name2codepoint
for c in charsets:
try:
parser = p(filename, c, log)
- break
except UnicodeEncodeError:
pass
+ else:
+ break
if parser:
break
else:
return parser
-if __name__ == '__main__':
+def test():
import sys
l = len(sys.argv)
"""
HTML Parser using BeautifulSoup
- Written by Broytman. Copyright (C) 2007-2010 PhiloSoft Design
+ Written by Broytman. Copyright (C) 2007-2011 PhiloSoft Design
"""
import re
from sgmllib import SGMLParser, SGMLParseError
from BeautifulSoup import BeautifulSoup, CData
-from parse_html_util import HTMLParser
+from .util import HTMLParser
# http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63
"""
HTML Parser using ElementTree+TidyLib.
- Written by Broytman. Copyright (C) 2010 PhiloSoft Design
+ Written by Broytman. Copyright (C) 2010, 2011 PhiloSoft Design
"""
from elementtidy import TidyHTMLTreeBuilder
-from parse_html_util import HTMLParser
+from .util import HTMLParser
def parse_html(filename, charset=None, log=None):
"""
HTML Parser using html5.
- Written by Broytman. Copyright (C) 2010 PhiloSoft Design
+ Written by Broytman. Copyright (C) 2010, 2011 PhiloSoft Design
"""
from html5lib import HTMLParser as HTML5Parser
-from parse_html_util import HTMLParser
+from .util import HTMLParser
def parse_html(filename, charset=None, log=None):
"""
HTML Parser
- Written by Broytman. Copyright (C) 1997-2010 PhiloSoft Design
+ Written by Broytman. Copyright (C) 1997-2011 PhiloSoft Design
"""
from HTMLParser import HTMLParseError
"""
HTML Parser using lxml.html.
- Written by Broytman. Copyright (C) 2010 PhiloSoft Design
+ Written by Broytman. Copyright (C) 2010, 2011 PhiloSoft Design
"""
from lxml.html import parse
-from parse_html_util import HTMLParser
+from .util import HTMLParser
def parse_html(filename, charset=None, log=None):