From: Oleg Broytman Date: Sun, 30 Dec 2007 20:31:36 +0000 (+0000) Subject: Better fake headers. X-Git-Tag: v4.5.3~225 X-Git-Url: https://git.phdru.name/?a=commitdiff_plain;h=2c2e369574d8be5edffe54d3b9758bab0cc7f46b;p=bookmarks_db.git Better fake headers. git-svn-id: file:///home/phd/archive/SVN/bookmarks_db/trunk@151 fdd5c36f-1aea-0310-aeeb-c58d7e2b6c23 --- diff --git a/Robots/bkmk_rsimple.py b/Robots/bkmk_rsimple.py index 2591588..399cf06 100644 --- a/Robots/bkmk_rsimple.py +++ b/Robots/bkmk_rsimple.py @@ -4,14 +4,17 @@ Written by Oleg BroytMann. Copyright (C) 2000-2007 PhiloSoft Design. """ - import sys, os import time, urllib from base64 import b64encode from urlparse import urljoin + from m_lib.net.www.util import parse_time from m_lib.md5wrapper import md5wrapper +from bkmk_objects import Robot +from parse_html import parse_html + class RedirectException(Exception): reloc_dict = { @@ -46,18 +49,17 @@ class MyURLopener(urllib.URLopener): urllib._urlopener = MyURLopener() # Fake headers to pretend this is a real browser -_version = "Links (2.1; Linux 2.6 i686; 80x25)" +_version = "Mozilla/5.0 (X11; U; Linux 2.6 i686; en) Gecko/20001221 Firefox/2.0.0" urllib._urlopener.addheaders[0] = ('User-Agent', _version) _version = "bookmarks_db (Python %d.%d.%d; urllib/%s)" % ( sys.version_info[0], sys.version_info[1], sys.version_info[2], urllib.__version__) urllib._urlopener.addheader('X-User-Agent', _version) +urllib._urlopener.addheader('Referer', '') urllib._urlopener.addheader('Connection', 'close') -urllib._urlopener.addheader('Content-Length', '0') urllib._urlopener.addheader('Accept', '*/*') urllib._urlopener.addheader('Accept-Language', 'ru,en') urllib._urlopener.addheader('Cache-Control', 'max-age=300') -urllib._urlopener.addheader('Referer', 'http://www.yahoo.com/') def get_error(msg): @@ -90,9 +92,6 @@ def get_welcome(): return _welcome -from bkmk_objects import Robot -from parse_html import parse_html - class robot_simple(Robot): def check_url(self, bookmark): if not self.tempfname: @@ -107,6 +106,7 @@ class robot_simple(Robot): url_host, url_path = urllib.splithost(url_rest) url_path, url_tag = urllib.splittag(url_path) + urllib._urlopener.addheaders[2] = ('Referer', "%s://%s%s" % (url_type, url_host, url_path)) if bookmark.charset: urllib._urlopener.addheader('Accept-Charset', bookmark.charset) fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), self.tempfname) if bookmark.charset: del urllib._urlopener.addheaders[-1]