from parse_html import parse_html
+# Fake headers to pretend this is a real browser
+_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
+" Gecko/20001221 Firefox/2.0.0"
+_x_user_agent = "bookmarks_db (Python %d.%d.%d)" % sys.version_info[:3]
+
+request_headers = {
+ 'Accept': '*/*',
+ 'Accept-Language': 'ru,en',
+ 'Cache-Control': 'max-age=300',
+ 'Connection': 'close',
+ 'Referer': '/',
+ 'User-Agent': _user_agent,
+ 'X-User-Agent': _x_user_agent,
+}
+
+
reloc_dict = {
301: "perm1.",
302: "temp2.",
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_urllib']
-import sys
import urllib
-from Robots.bkmk_robot_base import robot_base, get_error
+from Robots.bkmk_robot_base import robot_base, request_headers, get_error
class RedirectException(Exception):
raise IOError(('http error', errcode, errmsg, headers))
-urllib._urlopener = MyURLopener()
+def add_headers(opener):
+ try:
+ _user_agent = request_headers.pop('User-Agent')
+ except KeyError:
+ pass
+ else:
+ opener.addheaders[0] = ('User-Agent', _user_agent)
+ for h, v in request_headers.items():
+ opener.addheader(h, v)
-# Fake headers to pretend this is a real browser
-_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
-" Gecko/20001221 Firefox/2.0.0"
-urllib._urlopener.addheaders[0] = ('User-Agent', _user_agent)
-_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % (
- sys.version_info[0], sys.version_info[1],
- sys.version_info[2], urllib.__version__
-)
-urllib._urlopener.addheader('X-User-Agent', _x_user_agent)
-urllib._urlopener.addheader('Referer', '')
-
-urllib._urlopener.addheader('Accept', '*/*')
-urllib._urlopener.addheader('Accept-Language', 'ru,en')
-urllib._urlopener.addheader('Cache-Control', 'max-age=300')
-urllib._urlopener.addheader('Connection', 'close')
+urllib._urlopener = opener = MyURLopener()
+add_headers(opener)
urllib_ftpwrapper = urllib.ftpwrapper
ftpcache_key = None
def get(self, bookmark, url, accept_charset=False):
try:
# Set fake referer to the base URL
- urllib._urlopener.addheaders[2] = ('Referer', url)
+ opener.addheaders[2] = ('Referer', url)
if accept_charset and bookmark.charset:
- urllib._urlopener.addheader('Accept-Charset', bookmark.charset)
+ opener.addheader('Accept-Charset', bookmark.charset)
try:
fname, headers = urllib.urlretrieve(url)
finally:
if accept_charset and bookmark.charset:
# Remove Accept-Charset
- del urllib._urlopener.addheaders[-1]
+ del opener.addheaders[-1]
infile = open(fname, 'rt')
content = infile.read()
def get_ftp_welcome(self):
global ftpcache_key
- _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
+ _welcome = opener.ftpcache[ftpcache_key].ftp.welcome
# I am assuming there are no duplicate ftp URLs in db.
# If there are - ftpcache_key in next line is invalid.
ftpcache_key = None
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2014-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2014-2024 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_urllib2']
import sys
import httplib
import urllib2
-from Robots.bkmk_robot_base import robot_base, get_error
+from Robots.bkmk_robot_base import robot_base, request_headers, get_error
_fw = None
urllib2.install_opener(opener)
-# Fake headers to pretend this is a real browser
-_user_agent = "Mozilla/5.0 (X11; Linux i686; rv:30.0)"
-" Gecko/20100101 Firefox/30.0"
-_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib2)" % (
- sys.version_info[0], sys.version_info[1], sys.version_info[2])
-
-
class robot_urllib2(robot_base):
def get(self, bookmark, url, accept_charset=False):
request = urllib2.Request(url)
- request.add_header('Accept', '*/*')
+ for h, v in request_headers.items():
+ request.add_header(h, v)
if accept_charset and bookmark.charset:
request.add_header('Accept-Charset', bookmark.charset)
- request.add_header('Accept-Language', 'ru,en')
- request.add_header('Cache-Control', 'max-age=300')
- request.add_header('Connection', 'close')
- request.add_header('Referer', url)
- request.add_header('User-agent', _user_agent)
- request.add_header('X-User-Agent', _x_user_agent)
global _fw
_fw = None
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['robot_urllib_py3']
import http.client
import socket
import sys
+import urllib
import urllib.request
from Robots.bkmk_robot_base import robot_base, get_error
+# Fake to import 'add_headers'
+urllib.URLopener = urllib.request.URLopener
+urllib.ftpwrapper = urllib.request.ftpwrapper
+from Robots.bkmk_rurllib import add_headers # noqa: E402 import not at top
+
class RedirectException(Exception):
def __init__(self, errcode, newurl):
urllib.request._opener = opener = MyURLopener()
-
-# Fake headers to pretend this is a real browser
-_user_agent = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en)"
-" Gecko/20001221 Firefox/2.0.0"
-opener.addheaders[0] = ('User-Agent', _user_agent)
-_x_user_agent = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % (
- sys.version_info[0], sys.version_info[1],
- sys.version_info[2], urllib.request.__version__
-)
-opener.addheader('X-User-Agent', _x_user_agent)
-opener.addheader('Referer', '')
-
-opener.addheader('Accept', '*/*')
-opener.addheader('Accept-Language', 'ru,en')
-opener.addheader('Cache-Control', 'max-age=300')
-opener.addheader('Connection', 'close')
-
+add_headers(opener)
urllib_ftpwrapper = urllib.request.ftpwrapper
ftpcache_key = None