_set_subproc = True
class robot_forking(Robot):
- subproc = 'urllib2' # Default subprocess
+ subproc = 'urllib2' # Default subprocess
def check_url(self, bookmark):
global _set_subproc
os.environ['BKMK_ROBOT'] = subproc
if not check_subp:
- restart_subp(self.log) # Not restart, just start afresh
+ restart_subp(self.log) # Not restart, just start afresh
try:
save_parent = bookmark.parent
bookmark.parent = None
subp_pipe.write_record(pickle.dumps(bookmark))
- if check_subp.waitForPendingChar(60): # wait a minute
+ if check_subp.waitForPendingChar(60): # wait a minute
new_b = pickle.loads(subp_pipe.read_record())
for attr in (
"error", "no_error",
return 1
def stop(self):
- stop_subp(None) # Stop subprocess; do not log restarting
+ stop_subp(None) # Stop subprocess; do not log restarting
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1999-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1999-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = []
import sys, os
lib_dir = os.path.normpath(os.path.join(os.path.dirname(sys.argv[0]), os.pardir))
-sys.path.append(lib_dir) # for bkmk_objects.py
+sys.path.append(lib_dir) # for bkmk_objects.py
try:
import cPickle
bookmark.last_modified = last_modified
md5 = md5wrapper()
- if url_type == "ftp": # Pass welcome message through MD5
+ if url_type == "ftp": # Pass welcome message through MD5
md5.update(self.get_ftp_welcome())
md5.update(content)
try:
timeout = int(refresh.split(';')[0])
except ValueError:
- pass # float timeout
+ pass # float timeout
self.set_redirect(bookmark, "html", "%s (%s sec)" % (url, timeout))
except KeyError as key:
fname, headers = urllib.urlretrieve(url)
finally:
if accept_charset and bookmark.charset:
- del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
+ del urllib._urlopener.addheaders[-1] # Remove Accept-Charset
infile = open(fname, 'rb')
content = infile.read()
root_folder.comment = string.join(header[7:], '')[4:]
save_level = 0
- got_folder = 1 # Start as if we already have one folder
+ got_folder = 1 # Start as if we already have one folder
for record in bookmarks_db:
level = int(record["Level"])
raise ValueError("new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1))
save_level = level
- got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
+ got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
if record.has_key("URL"):
comment = record["Comment"].replace("\\n", '\n')
def strftime(s):
try:
return time.strftime("%a %d %b %Y %T", time.localtime(int(s)))
- except (TypeError, ValueError): # s is None or is already formatted
+ except (TypeError, ValueError): # s is None or is already formatted
return s
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2002-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2002-2023 PhiloSoft Design"
__license__ = "GNU GPL"
import sys, os, time
global robot
robot = robot(None)
- if robot.check_url(bookmark): # get real title and last modified date
- if title: # forced title
+ if robot.check_url(bookmark): # get real title and last modified date
+ if title: # forced title
bookmark.name = title
elif hasattr(bookmark, "real_title"):
bookmark.name = bookmark.real_title
filename = args[0]
else:
- filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
+ filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
if report_stats:
if os.name == 'dos' or os.name == 'nt':
dos_add = 1
else:
- dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
+ dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
infile = open(filename, 'r')
self.last_modified = last_modified
def walk_depth(self, walker, level=0):
- if hasattr(self, "header"): # root folder
+ if hasattr(self, "header"): # root folder
prune = 0
walker.root_folder(self)
else:
self.log = log
def stop(self):
- pass # Nothing to do on cleanup
+ pass # Nothing to do on cleanup
# Helper class to make inverese links (nodes linked to their parent)
def start_folder(self, f, level):
f.parent = self.parent_stack[-1]
- self.parent_stack.append(f) # Push the folder onto the stack of parents
+ self.parent_stack.append(f) # Push the folder onto the stack of parents
def end_folder(self, f, level):
del self.parent_stack[-1] # Pop off the stack
folder = Folder(add_date, last_modified=last_modified)
self.current_object = folder
self.current_folder.append(folder)
- self.folder_stack.append(folder) # push new folder
+ self.folder_stack.append(folder) # push new folder
self.current_folder = folder
self.objects += 1
debug("End folder")
debug("Folder stack: %s" % dump_names(self.folder_stack))
if self.folder_stack:
- del self.folder_stack[-1] # pop last folder
+ del self.folder_stack[-1] # pop last folder
if self.folder_stack:
self.current_folder = self.folder_stack[-1]
else:
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2002-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2002-2023 PhiloSoft Design"
__license__ = "GNU GPL"
pass
if report_stats:
- print("Broytman check_title, Copyright (C) 2002-2017 PhiloSoft Design")
+ print("Broytman check_title, Copyright (C) 2002-2023 PhiloSoft Design")
if args:
sys.stderr.write("check_title: too many arguments\n")
if object.isBookmark:
if hasattr(object, "moved") or hasattr(object, "error") \
- or object.href.startswith('place:'): # Firefox SmartBookmarks
+ or object.href.startswith('place:'): # Firefox SmartBookmarks
continue
if hasattr(object, "real_title") and (object.real_title is not None):
if object.isBookmark:
href = object.href
- if (href.startswith('place:') # Firefox SmartBookmarks
- or '%s' in href): # Bookmark with keyword
+ if (href.startswith('place:') # Firefox SmartBookmarks
+ or '%s' in href): # Bookmark with keyword
log("Skipped %s" % href)
continue
try:
size = size + int(object.size)
except (AttributeError, TypeError, ValueError):
- pass # Some object does not have a size :(
+ pass # Some object does not have a size :(
else:
log("Interrupted by user (^C)")
break
output_filename = writer.filename
transl = 0
- transl_name = "" # dictionary translation; default is no translation
+ transl_name = "" # dictionary translation; default is no translation
for _opt, _arg in optlist:
if _opt == '-s':
else:
raise ValueError("transl (%d) must be 1 or 2" % transl)
- del transl_db # Save few bytes of memory
+ del transl_db # Save few bytes of memory
from bkmk_objects import Walker
class Transl(Walker):
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html', 'parse_filename', 'universal_charset']
import codecs
universal_charset = "utf-8"
-DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
+DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
parsers = []
try:
part = unichr(int(part[2:-1])).encode(charset)
except UnicodeEncodeError:
- pass # Leave the entity as is
+ pass # Leave the entity as is
output.append(part)
return ''.join(output)
if charset:
try:
- codecs.lookup(charset) # In case of unknown charset...
+ codecs.lookup(charset) # In case of unknown charset...
except (ValueError, LookupError):
charset = None # ...try charset from HTML
from BeautifulSoup import BeautifulSoup, CData
from .bkmk_ph_util import HTMLParser
-DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
+DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
# http://groups.google.com/group/beautifulsoup/browse_thread/thread/69093cb0d3a3cf63
class BadDeclParser(BeautifulSoup):
return None
_charset = root.originalEncoding
- if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"): # Replace default
+ if _charset in ("ISO-8859-2", "windows-1252", "MacCyrillic"): # Replace default
_charset = DEFAULT_CHARSET
root = _parse_html(html_text, _charset)
if root is None:
head = html.head
if head is None:
- head = html # Some sites put TITLE in HTML without HEAD
+ head = html # Some sites put TITLE in HTML without HEAD
title = head.title
if (title is None) and (html is not head):
__charset = meta_content.lower().split('charset=')[1].split(';')[0]
else:
__charset = False
- except IndexError: # No charset in the META Content-Type
+ except IndexError: # No charset in the META Content-Type
meta_charset = False
else:
meta_charset = _charset == __charset
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2017-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
from .bkmk_ph_util import HTMLParser
universal_charset = "utf-8"
-DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
+DEFAULT_CHARSET = "cp1251" # Stupid default for Russian Cyrillic
def _parse_html(html_text, charset):
try:
head = html.head
if head is None:
- head = html # Some sites put TITLE in HTML without HEAD
+ head = html # Some sites put TITLE in HTML without HEAD
title = head.title
if (title is None) and (html is not head):
__charset = meta_content.lower().split('charset=')[1].split(';')[0]
else:
__charset = False
- except IndexError: # No charset in the META Content-Type
+ except IndexError: # No charset in the META Content-Type
meta_charset = False
else:
meta_charset = _charset = __charset
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 1997-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 1997-2023 PhiloSoft Design"
__license__ = "GNU GPL"
__all__ = ['parse_html']
self.accumulator = ''
def end_title(self):
- if not self.title: # use only the first title
+ if not self.title: # use only the first title
self.title = self.accumulator
def do_link(self, attrs):
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2003-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2003-2023 PhiloSoft Design"
__license__ = "GNU GPL"
pass
if report_stats:
- print("Broytman set-title-list, Copyright (C) 2003-2017 PhiloSoft Design")
+ print("Broytman set-title-list, Copyright (C) 2003-2023 PhiloSoft Design")
if len(args) != 1:
sys.stderr.write("Usage: set-title-list [-s] title_list_file\n")
title_list_file = open(args[0], 'r')
for line in title_list_file:
- line = line[:-1] # strip trailing newline
+ line = line[:-1] # strip trailing newline
if URL is None:
URL = line
elif title is None:
title = line
- elif line: # the third line in every 3 lines must be empty
+ elif line: # the third line in every 3 lines must be empty
raise ValueError("line is not empty for URL `%s', title `%s': line `%s'" % (URL, title, line))
- else: # We've got 3 lines - add new entry to the mapping
+ else: # We've got 3 lines - add new entry to the mapping
if titles_dict.has_key(URL):
if title != titles_dict[URL]:
raise ValueError("titles are not identical for URL `%s': `%s' != `%s'" % (URL, title, titles_dict[URL]))
"""
__author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2000-2017 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design"
__license__ = "GNU GPL"
import sys
make_linear(root_folder)
linear = root_folder.linear
- del linear[0] # exclude root folder from sorting
+ del linear[0] # exclude root folder from sorting
by = SortBy(sort_by)
linear.sort(by)