]> git.phdru.name Git - bookmarks_db.git/commitdiff
Return redirect code/destination URL
authorOleg Broytman <phd@phdru.name>
Fri, 4 Jul 2014 21:04:27 +0000 (01:04 +0400)
committerOleg Broytman <phd@phdru.name>
Fri, 4 Jul 2014 21:04:27 +0000 (01:04 +0400)
Return redirect code and destination URL instead of raising
RedirectException.

Robots/bkmk_robot_base.py
Robots/bkmk_rurllib.py

index dce593310c1a1461749f189ca49dbd3b269f06cb..80079b4d9006a45793d212b898b319c004a3a5c9 100644 (file)
@@ -23,17 +23,13 @@ from bkmk_objects import Robot
 from parse_html import parse_html
 
 
-class RedirectException(Exception):
-   reloc_dict = {
-      301: "perm.",
-      302: "temp2.",
-      303: "temp3.",
-      307: "temp7.",
-      "html": "html"
-   }
-   def __init__(self, errcode, newurl):
-      Exception.__init__(self, "(%s) to %s" % (self.reloc_dict[errcode], newurl))
-      self.url = newurl
+reloc_dict = {
+  301: "perm.",
+  302: "temp2.",
+  303: "temp3.",
+  307: "temp7.",
+  "html": "html"
+}
 
 
 def get_error(msg):
@@ -61,12 +57,16 @@ class robot_base(Robot):
          url_path, url_tag  = urllib.splittag(url_path)
 
          url = "%s://%s%s" % (url_type, url_host, url_path)
-         error, headers, content = self.get(bookmark, url, True)
+         error, redirect_code, redirect_to, headers, content = self.get(bookmark, url, True)
 
          if error:
              bookmark.error = error
              return 1
 
+         if redirect_code:
+             self.set_redirect(bookmark, redirect_code, redirect_to)
+             return 1
+
          size = 0
          last_modified = None
 
@@ -142,10 +142,11 @@ class robot_base(Robot):
                      try:
                         _icon_url = icon_url
                         for i in range(8):
-                           try:
-                              error, icon_headers, icon_data = self.get(bookmark, _icon_url)
-                           except RedirectException, e:
-                              _icon_url = e.url
+                           error, icon_redirect_code, icon_redirect_to, \
+                              icon_headers, icon_data = \
+                                 self.get(bookmark, _icon_url)
+                           if icon_redirect_code:
+                              _icon_url = icon_redirect_to
                               self.log("   redirect to : %s" % _icon_url)
                            else:
                               if icon_data is None:
@@ -183,13 +184,13 @@ class robot_base(Robot):
                      try:
                         timeout = float(refresh.split(';')[0])
                      except (IndexError, ValueError):
-                        raise RedirectException("html", "Bad redirect to %s (%s)" % (url, refresh))
+                        self.set_redirect(bookmark, "html", "Bad redirect to %s (%s)" % (url, refresh))
                      else:
                         try:
                            timeout = int(refresh.split(';')[0])
                         except ValueError:
                            pass # float timeout
-                        raise RedirectException("html", "%s (%s sec)" % (url, timeout))
+                        self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout))
 
             except KeyError, key:
                self.log("   no header: %s" % key)
@@ -198,10 +199,6 @@ class robot_base(Robot):
          bookmark.error = "Unexpected EOF (FTP server closed connection)"
          self.log('   EOF: %s' % bookmark.error)
 
-      except RedirectException, msg:
-         bookmark.moved = str(msg)
-         self.log('   Moved: %s' % bookmark.moved)
-
       except KeyboardInterrupt:
          self.log("Keyboard interrupt (^C)")
          return 0
@@ -218,6 +215,10 @@ class robot_base(Robot):
       # Tested
       return 1
 
+   def set_redirect(self, bookmark, errcode, newurl):
+        bookmark.moved = "(%s) to %s" % (reloc_dict[errcode], newurl)
+        self.log('   Moved: %s' % bookmark.moved)
+
    def finish_check_url(self, bookmark):
       start = self.start
       bookmark.last_tested = str(start)
index d9908b83292ab92fcfd107710446555cd3aab712..6c10148af5688b26fc4eba60c7f34247a8d581bc 100644 (file)
@@ -13,9 +13,15 @@ __all__ = ['robot_urllib']
 
 import sys, os
 import time, urllib
-from Robots.bkmk_robot_base import robot_base, RedirectException, get_error
+from Robots.bkmk_robot_base import robot_base, get_error
 
 
+class RedirectException(Exception):
+   def __init__(self, errcode, newurl):
+      Exception.__init__(self)
+      self.errcode = errcode
+      self.newurl = newurl
+
 class MyURLopener(urllib.URLopener):
    # Error 301 -- relocated (permanently)
    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 
@@ -91,7 +97,10 @@ class robot_urllib(robot_base):
          content = infile.read()
          infile.close()
 
-         return None, headers, content
+         return None, None, None, headers, content
+
+      except RedirectException, e:
+         return None, e.errcode, e.newurl, None, None
 
       except IOError, msg:
          if (msg[0] == "http error") and (msg[1] == -1):
@@ -102,7 +111,7 @@ class robot_urllib(robot_base):
             error = get_error(msg)
             self.log('   Error: %s' % error)
 
-         return error, None, None
+         return error, None, None, None, None
 
    def get_ftp_welcome(self):
       global ftpcache_key