]> git.phdru.name Git - bookmarks_db.git/commitdiff
Add robot based on urllib2
authorOleg Broytman <phd@phdru.name>
Sun, 6 Jul 2014 01:22:31 +0000 (05:22 +0400)
committerOleg Broytman <phd@phdru.name>
Sun, 6 Jul 2014 01:25:10 +0000 (05:25 +0400)
Robots/bkmk_rurllib2.py [new file with mode: 0644]
doc/ChangeLog
doc/TODO

diff --git a/Robots/bkmk_rurllib2.py b/Robots/bkmk_rurllib2.py
new file mode 100644 (file)
index 0000000..b15a7b0
--- /dev/null
@@ -0,0 +1,63 @@
+"""Robot based on urllib2
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2014 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_urllib2']
+
+
+import sys
+import httplib
+import urllib2
+from Robots.bkmk_robot_base import robot_base
+
+
+opener = urllib2.OpenerDirector()
+default_classes = [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+                   urllib2.FTPHandler, urllib2.HTTPErrorProcessor]
+if hasattr(httplib, 'HTTPS'):
+    default_classes.insert(0, urllib2.HTTPSHandler)
+for klass in default_classes:
+    opener.add_handler(klass())
+
+urllib2.install_opener(opener)
+
+
+# Fake headers to pretend this is a real browser
+_user_agent = "Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0"
+_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib2)" % (
+    sys.version_info[0], sys.version_info[1], sys.version_info[2])
+
+
+class robot_urllib2(robot_base):
+   def get(self, bookmark, url, accept_charset=False):
+        request = urllib2.Request(url)
+        request.add_header('Accept', '*/*')
+        if accept_charset and bookmark.charset:
+            request.add_header('Accept-Charset', bookmark.charset)
+        request.add_header('Accept-Language', 'ru,en')
+        request.add_header('Cache-Control', 'max-age=300')
+        request.add_header('Connection', 'close')
+        request.add_header('Referer', url)
+        request.add_header('User-agent', _user_agent)
+        request.add_header('X-User-Agent', _x_user_agent)
+
+        try:
+            response = urllib2.urlopen(request)
+        except urllib2.HTTPError, e:
+            if e.code in (301, 302, 303, 307):
+                return None, e.code, e.hdrs['Location'], None, None
+            else:
+                return "HTTP Error %s: %s" % (e.code, e.msg), None, None, None, None
+        except urllib2.URLError, e:
+            return "URL Error: %s" % e.reason, None, None, None, None
+        else:
+            return None, None, None, response.info(), response.read()
+
+   def get_ftp_welcome(self):
+      return ''
index cc441e75c78f0bdcacdca8616d00e696aa942271..96273797dc26721196f8bc2da57bd58a2b39afd5 100644 (file)
@@ -17,6 +17,8 @@ WHAT'S NEW in version 4.6.0 (2014-06-??)
 
    Pass subproc parameter to the subprocess to allow different robots.
 
+   Add a new robot based on urllib2.
+
 
 WHAT'S NEW in version 4.5.6 (2014-01-14)
 
index 234dc2bb4b2a90dcfb0903b7c34135d61e47631e..f98d0769a9116d382c103c62e6340ea8ce50012a 100644 (file)
--- a/doc/TODO
+++ b/doc/TODO
@@ -1,4 +1,4 @@
-A new robot based on urllib2.
+Robot based on urllib2: handle timeout and ftp.
 
 A new robot based on PycURL.