X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=Robots%2Fbkmk_rurllib.py;h=057c018e510c9fc6ff4de4e992e6cbd765a0ce63;hb=7462ff4d330df6386557b5265e454ffa0e6b7bb9;hp=067799c737271b5ceeb373eeb321bc12eaf1e7ec;hpb=2b3829aef193cb1951989a8cf97a96dcbfc084a1;p=bookmarks_db.git diff --git a/Robots/bkmk_rurllib.py b/Robots/bkmk_rurllib.py index 067799c..057c018 100644 --- a/Robots/bkmk_rurllib.py +++ b/Robots/bkmk_rurllib.py @@ -5,16 +5,15 @@ This file is a part of Bookmarks database and Internet robot. """ __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design" +__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design" __license__ = "GNU GPL" __all__ = ['robot_urllib'] -import sys import urllib -from Robots.bkmk_robot_base import robot_base, get_error +from Robots.bkmk_robot_base import robot_base, request_headers, get_error class RedirectException(Exception): @@ -56,24 +55,19 @@ class MyURLopener(urllib.URLopener): raise IOError(('http error', errcode, errmsg, headers)) -urllib._urlopener = MyURLopener() +def add_headers(opener): + try: + _user_agent = request_headers.pop('User-Agent') + except KeyError: + pass + else: + opener.addheaders[0] = ('User-Agent', _user_agent) + for h, v in request_headers.items(): + opener.addheader(h, v) -# Fake headers to pretend this is a real browser -_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)" -" Gecko/20001221 Firefox/2.0.0" -urllib._urlopener.addheaders[0] = ('User-Agent', _user_agent) -_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % ( - sys.version_info[0], sys.version_info[1], - sys.version_info[2], urllib.__version__ -) -urllib._urlopener.addheader('X-User-Agent', _x_user_agent) -urllib._urlopener.addheader('Referer', '') - -urllib._urlopener.addheader('Accept', '*/*') -urllib._urlopener.addheader('Accept-Language', 'ru,en') -urllib._urlopener.addheader('Cache-Control', 'max-age=300') -urllib._urlopener.addheader('Connection', 'close') +urllib._urlopener = opener = MyURLopener() +add_headers(opener) urllib_ftpwrapper = urllib.ftpwrapper ftpcache_key = None @@ -93,16 +87,16 @@ class robot_urllib(robot_base): def get(self, bookmark, url, accept_charset=False): try: # Set fake referer to the base URL - urllib._urlopener.addheaders[2] = ('Referer', url) + opener.addheaders[2] = ('Referer', url) if accept_charset and bookmark.charset: - urllib._urlopener.addheader('Accept-Charset', bookmark.charset) + opener.addheader('Accept-Charset', bookmark.charset) try: fname, headers = urllib.urlretrieve(url) finally: if accept_charset and bookmark.charset: # Remove Accept-Charset - del urllib._urlopener.addheaders[-1] + del opener.addheaders[-1] infile = open(fname, 'rt') content = infile.read() @@ -127,7 +121,7 @@ class robot_urllib(robot_base): def get_ftp_welcome(self): global ftpcache_key - _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome + _welcome = opener.ftpcache[ftpcache_key].ftp.welcome # I am assuming there are no duplicate ftp URLs in db. # If there are - ftpcache_key in next line is invalid. ftpcache_key = None