Add robot based on urllib2

author Oleg Broytman <phd@phdru.name>

Sun, 6 Jul 2014 01:22:31 +0000 (05:22 +0400)

committer Oleg Broytman <phd@phdru.name>

Sun, 6 Jul 2014 01:25:10 +0000 (05:25 +0400)
author Oleg Broytman <phd@phdru.name>
Sun, 6 Jul 2014 01:22:31 +0000 (05:22 +0400)
committer Oleg Broytman <phd@phdru.name>
Sun, 6 Jul 2014 01:25:10 +0000 (05:25 +0400)
diff --git a/Robots/bkmk_rurllib2.py b/Robots/bkmk_rurllib2.py

new file mode 100644 (file)

index 0000000..b15a7b0
--- /dev/null
+++ b/Robots/bkmk_rurllib2.py
@@ -0,0 +1,63 @@
+"""Robot based on urllib2
+
+This file is a part of Bookmarks database and Internet robot.
+
+"""
+
+__author__ = "Oleg Broytman <phd@phdru.name>"
+__copyright__ = "Copyright (C) 2014 PhiloSoft Design"
+__license__ = "GNU GPL"
+
+__all__ = ['robot_urllib2']
+
+
+import sys
+import httplib
+import urllib2
+from Robots.bkmk_robot_base import robot_base
+
+
+opener = urllib2.OpenerDirector()
+default_classes = [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+                   urllib2.FTPHandler, urllib2.HTTPErrorProcessor]
+if hasattr(httplib, 'HTTPS'):
+    default_classes.insert(0, urllib2.HTTPSHandler)
+for klass in default_classes:
+    opener.add_handler(klass())
+
+urllib2.install_opener(opener)
+
+
+# Fake headers to pretend this is a real browser
+_user_agent = "Mozilla/5.0 (X11; Linux i686; rv:30.0) Gecko/20100101 Firefox/30.0"
+_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib2)" % (
+    sys.version_info[0], sys.version_info[1], sys.version_info[2])
+
+
+class robot_urllib2(robot_base):
+   def get(self, bookmark, url, accept_charset=False):
+        request = urllib2.Request(url)
+        request.add_header('Accept', '*/*')
+        if accept_charset and bookmark.charset:
+            request.add_header('Accept-Charset', bookmark.charset)
+        request.add_header('Accept-Language', 'ru,en')
+        request.add_header('Cache-Control', 'max-age=300')
+        request.add_header('Connection', 'close')
+        request.add_header('Referer', url)
+        request.add_header('User-agent', _user_agent)
+        request.add_header('X-User-Agent', _x_user_agent)
+
+        try:
+            response = urllib2.urlopen(request)
+        except urllib2.HTTPError, e:
+            if e.code in (301, 302, 303, 307):
+                return None, e.code, e.hdrs['Location'], None, None
+            else:
+                return "HTTP Error %s: %s" % (e.code, e.msg), None, None, None, None
+        except urllib2.URLError, e:
+            return "URL Error: %s" % e.reason, None, None, None, None
+        else:
+            return None, None, None, response.info(), response.read()
+
+   def get_ftp_welcome(self):
+      return ''
diff --git a/doc/ChangeLog b/doc/ChangeLog

index cc441e75c78f0bdcacdca8616d00e696aa942271..96273797dc26721196f8bc2da57bd58a2b39afd5 100644 (file)
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -17,6 +17,8 @@ WHAT'S NEW in version 4.6.0 (2014-06-??)
  
     Pass subproc parameter to the subprocess to allow different robots.
  
+   Add a new robot based on urllib2.
+
  
  WHAT'S NEW in version 4.5.6 (2014-01-14)
  
diff --git a/doc/TODO b/doc/TODO

index 234dc2bb4b2a90dcfb0903b7c34135d61e47631e..f98d0769a9116d382c103c62e6340ea8ce50012a 100644 (file)
--- a/doc/TODO
+++ b/doc/TODO
@@ -1,4 +1,4 @@
-A new robot based on urllib2.
+Robot based on urllib2: handle timeout and ftp.
  
  A new robot based on PycURL.
author	Oleg Broytman <phd@phdru.name>
	Sun, 6 Jul 2014 01:22:31 +0000 (05:22 +0400)
committer	Oleg Broytman <phd@phdru.name>
	Sun, 6 Jul 2014 01:25:10 +0000 (05:25 +0400)
Robots/bkmk_rurllib2.py	[new file with mode: 0644]	patch \| blob
doc/ChangeLog		patch \| blob \| history
doc/TODO		patch \| blob \| history