--- /dev/null
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+\f
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+\f
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+\f
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+\f
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+\f
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
--- /dev/null
+
+
+VERSION=3.3.1
+EXAMPLE_SHELL=ns-unescape mz-unescape bkmk2db koi2win.db \
+ bkmk-koi bkmk-win bkmk-chk bkmk-set bkmk-sort hotexplode.pl \
+ check_title.py set-real_title.py bkmk-add.py bkmk-add bkmk-rsync
+
+
+.PHONY: all
+all:
+ @echo "Nothing to be done for \`all'"
+
+
+distr: clean examples_distr
+ cd .. && tar cf - bookmarks_db | gzip -9 > bookmarks_db-$(VERSION)-`date +'%Y%m%d'`.tar.gz
+
+
+.PHONY: examples_distr
+examples_distr:
+ tar cf - $(EXAMPLE_SHELL) | gzip -9 > ../bookmarks_sh-$(VERSION)-`date +'%Y%m%d'`.tar.gz
+ rm $(EXAMPLE_SHELL)
+
+
+.PHONY: clean
+clean:
+ find . -name '*.py[co]' -print | xargs rm -f
--- /dev/null
+Bookmarks database and Internet robot.
+
+Author: Oleg Broytmann <phd@phd.pp.ru>
+
+Copyright (C) 1997-2002 PhiloSoft Design.
+
+License: GPL. For detailed terms see COPYING.
+
+Documentation is in directory doc.
--- /dev/null
+"""
+ Forking robot
+
+ Written by BroytMann, Mar 2000 - Jun 2002. Copyright (C) 2000-2002 PhiloSoft Design
+"""
+
+
+try:
+ import cPickle
+ pickle = cPickle
+except ImportError:
+ import pickle
+
+import sys, os
+from subproc import Subprocess, RecordFile
+
+
+check_subp = None
+subp_pipe = None
+
+def stop_subp(log):
+ global check_subp, subp_pipe
+ if check_subp:
+ if log: log(" restarting hanging subprocess")
+ del check_subp
+ del subp_pipe
+
+def restart_subp(log):
+ global check_subp, subp_pipe
+ stop_subp(log)
+
+ check_subp = Subprocess("%s/Robots/bkmk_rforking_sub.py" % os.path.dirname(sys.argv[0]))
+ subp_pipe = RecordFile(check_subp)
+
+
+from bkmk_objects import Robot
+
+class robot_forking(Robot):
+ def check_url(self, bookmark, url_type, url_rest):
+ if not check_subp:
+ restart_subp(self.log) # Not restart, just start afresh
+
+ try:
+ save_parent = bookmark.parent
+ bookmark.parent = None
+
+ bookmark.tempfname = self.tempfname
+ subp_pipe.write_record(pickle.dumps(bookmark))
+
+ if check_subp.waitForPendingChar(900): # wait 15 minutes
+ new_b = pickle.loads(subp_pipe.read_record())
+ for attr in ("error", "no_error",
+ "moved", "size", "md5", "real_title",
+ "last_tested", "last_modified", "test_time"):
+ if hasattr(new_b, attr):
+ setattr(bookmark, attr, getattr(new_b, attr))
+ else:
+ bookmark.error = "Subprocess connection timed out"
+ restart_subp(self.log)
+
+ bookmark.parent = save_parent
+
+ except KeyboardInterrupt:
+ return 0
+
+ # Tested
+ return 1
+
+
+ def stop(self):
+ stop_subp(None) # Stop subprocess; do not log restarting
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Check URL - subprocess
+
+ Written by BroytMann, Mar 1999 - Aug 2002. Copyright (C) 1999-2002 PhiloSoft Design
+"""
+
+
+import sys, os, urllib
+
+lib_dir = os.path.normpath(os.path.dirname(sys.argv[0]) + os.sep + os.pardir)
+sys.path.append(lib_dir) # for bkmk_objects.py
+
+try:
+ import cPickle
+ pickle = cPickle
+except ImportError:
+ import pickle
+
+from subproc import RecordFile
+import bkmk_rsimple
+
+
+def run():
+ bkmk_in = RecordFile(sys.stdin)
+ bkmk_out = RecordFile(sys.stdout)
+
+ from m_lib.flog import openlog
+ log = openlog("check2.log")
+ from bkmk_rsimple import robot_simple
+ robot = robot_simple(None, log)
+
+ while 1:
+ bookmark = pickle.loads(bkmk_in.read_record())
+ log(bookmark.href)
+ url_type, url_rest = urllib.splittype(bookmark.href)
+ robot.check_url(bookmark, url_type, url_rest)
+ bkmk_out.write_record(pickle.dumps(bookmark))
+ log.outfile.flush()
+
+ log.close()
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+"""
+ Simple, strightforward robot; guaranteed to has problems with timeouts :)
+
+ Written by BroytMann, Mar 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
+"""
+
+
+class RedirectException(Exception):
+ reloc_dict = {
+ 301: "perm.",
+ 302: "temp.",
+ "html": "html"
+ }
+ def __init__(self, errcode, newurl):
+ Exception.__init__(self, "(%s) to %s" % (self.reloc_dict[errcode], newurl))
+
+
+import string, os
+import time, urllib
+from m_lib.www.util import parse_time
+from m_lib.md5wrapper import md5wrapper
+
+
+class MyURLopener(urllib.URLopener):
+ # Error 302 -- relocated (temporarily)
+ def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
+ if headers.has_key('location'):
+ newurl = headers['location']
+ elif headers.has_key('uri'):
+ newurl = headers['uri']
+ else:
+ newurl = "Nowhere"
+ raise RedirectException(errcode, newurl)
+
+ # Error 301 -- also relocated (permanently)
+ http_error_301 = http_error_302
+
+ # Error 401 -- authentication required
+ def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
+ raise IOError, ('http error', errcode, "Authentication required ", headers)
+
+
+urllib._urlopener = MyURLopener()
+
+# Some sites allow only Mozilla-compatible browsers; way to stop robots?
+server_version = "Mozilla/3.0 (compatible; Python-urllib/%s)" % urllib.__version__
+urllib._urlopener.addheaders[0] = ('User-agent', server_version)
+
+
+def get_error(msg):
+ if type(msg) == type(""):
+ return msg
+
+ else:
+ s = []
+ for i in msg:
+ s.append("'%s'" % string.join(string.split(str(i), "\n"), "\\n"))
+ return "(%s)" % string.join(s)
+
+
+urllib_ftpwrapper = urllib.ftpwrapper
+ftpcache_key = None
+
+class myftpwrapper(urllib_ftpwrapper):
+ def __init__(self, user, passwd, host, port, dirs):
+ urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs)
+ global ftpcache_key
+ ftpcache_key = (user, host, port, string.join(dirs, '/'))
+
+urllib.ftpwrapper = myftpwrapper
+
+def get_welcome():
+ global ftpcache_key
+ _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
+ ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
+ # If there are - ftpcache_key in prev line is invalid.
+ return _welcome
+
+
+from bkmk_objects import Robot
+from parse_html import parse_html
+
+class robot_simple(Robot):
+ def check_url(self, bookmark, url_type, url_rest):
+ if not self.tempfname:
+ self.tempfname = bookmark.tempfname
+
+ try:
+ try:
+ self.start = int(time.time())
+ url_host, url_path = urllib.splithost(url_rest)
+ url_path, url_tag = urllib.splittag(url_path)
+
+ fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), self.tempfname)
+
+ size = 0
+ last_modified = None
+
+ if headers:
+ try:
+ size = headers["Content-Length"]
+ except KeyError:
+ pass
+
+ try:
+ last_modified = headers["Last-Modified"]
+ except KeyError:
+ pass
+
+ if last_modified:
+ last_modified = parse_time(last_modified)
+
+ if last_modified:
+ last_modified = str(int(last_modified))
+ else:
+ last_modified = bookmark.last_visit
+
+ bookmark.size = size
+ bookmark.last_modified = last_modified
+
+ md5 = md5wrapper()
+ if urllib._urlopener.type == "ftp": # Pass welcome message through MD5
+ md5.update(get_welcome())
+
+ md5.md5file(self.tempfname)
+ bookmark.md5 = str(md5)
+
+ if headers:
+ try:
+ content_type = headers["Content-Type"]
+ try:
+ content_type, charset = content_type.split(';')
+ content_type = content_type.strip()
+ charset = charset.split('=')[1].strip()
+ if self.log: self.log(" HTTP charset : %s" % charset)
+ except (ValueError, IndexError):
+ charset = None
+ if self.log: self.log(" no charset in Content-Type header")
+ if content_type == "text/html":
+ parser = parse_html(fname, charset, self.log)
+ title = parser.title.replace('\r', '').replace('\n', ' ').strip()
+ bookmark.real_title = parser.unescape(title)
+ if self.log: self.log(" final title : %s" % bookmark.real_title)
+ if parser.refresh:
+ refresh = parser.refresh
+ try:
+ timeout = int(refresh.split(';')[0])
+ except (IndexError, ValueError):
+ timeout = "ERROR"
+ try:
+ url = refresh.split('=', 1)[1]
+ except IndexError:
+ url = "self"
+ raise RedirectException("html", "%s (%d sec)" % (url, timeout))
+ except KeyError:
+ pass
+
+ except IOError, msg:
+ if (msg[0] == "http error") and (msg[1] == -1):
+ bookmark.no_error = "The server did not return any header - it is not an error, actually"
+ else:
+ bookmark.error = get_error(msg)
+
+ except EOFError:
+ bookmark.error = "Unexpected EOF (FTP server closed connection)"
+
+ except RedirectException, msg:
+ bookmark.moved = str(msg)
+
+ except KeyboardInterrupt:
+ return 0
+
+ finally:
+ self.finish_check_url(bookmark)
+
+ # Tested
+ return 1
+
+
+ def finish_check_url(self, bookmark):
+ # Calculate these attributes even in case of an error
+ if os.path.exists(self.tempfname):
+ size = str(os.stat(self.tempfname).st_size)
+ if size[-1] == 'L':
+ size = size[:-1]
+ bookmark.size = size
+
+ start = self.start
+ bookmark.last_tested = str(start)
+
+ now = int(time.time())
+ bookmark.test_time = str(now - start)
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Simpl robot with timeoutsocket
+
+ Written by BroytMann, Sep 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import socket, timeoutsocket
+timeoutsocket.setDefaultSocketTimeout(900)
+
+from bkmk_rsimple import robot_simple, get_error
+
+class robot_simple_tos(robot_simple):
+ def check_url(self, bookmark, url_type, url_rest):
+ try:
+ return robot_simple.check_url(self, bookmark, url_type, url_rest)
+
+ except (socket.error, timeoutsocket.Timeout), msg:
+ bookmark.error = get_error(msg)
+
+ self.finish_check_url(bookmark)
+
+ return 1
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ HTML Parser
+
+ Written by BroytMann, Jun 2002 - Aug 2002. Copyright (C) 1997-2002 PhiloSoft Design
+"""
+
+
+import sys
+current_charset = sys.getdefaultencoding()
+DEFAULT_CHARSET = "windows-1251"
+
+
+from HTMLParser import HTMLParseError
+from m_lib.www.html import HTMLParser as _HTMLParser
+
+
+class HTMLHeadDone(Exception): pass
+
+
+class HTMLParser(_HTMLParser):
+ def __init__(self, charset=None):
+ _HTMLParser.__init__(self)
+ self.charset = charset
+ self.meta_charset = 0
+ self.title = ''
+ self.refresh = ''
+
+ def end_head(self):
+ raise HTMLHeadDone()
+
+
+ def do_meta(self, attrs):
+ http_equiv = ""
+ content = ""
+
+ for attrname, value in attrs:
+ if value:
+ value = value.strip()
+ if attrname == 'http-equiv':
+ http_equiv = value.lower()
+ elif attrname == 'content':
+ content = value
+
+ if (not self.charset) and (http_equiv == "content-type"):
+ try:
+ # extract charset from "text/html; foo; charset=UTF-8; bar;"
+ self.charset = content.lower().split('charset=')[1].split(';')[0]
+ self.meta_charset = 1
+ except IndexError:
+ pass
+
+ if http_equiv == "refresh":
+ self.refresh = content
+
+
+ def start_title(self, attrs):
+ self.accumulator = ''
+ def end_title(self):
+ if not self.title: # use only the first title
+ self.title = self.accumulator
+
+
+def parse_html(filename, charset=None, log=None):
+ infile = open(filename, 'r')
+ parser = HTMLParser(charset)
+
+ for line in infile:
+ try:
+ parser.feed(line)
+ except (HTMLParseError, HTMLHeadDone):
+ break
+
+ infile.close()
+
+ try:
+ parser.close()
+ except (HTMLParseError, HTMLHeadDone):
+ pass
+
+ if not parser.charset:
+ title = parser.title
+ ascii = 1
+ for c in title:
+ if not (32 <= ord(c) <= 127): # non-ASCII character
+ ascii = 0
+ break
+ if not ascii:
+ parser.charset = DEFAULT_CHARSET
+ if parser.charset and (parser.charset <> current_charset):
+ try:
+ if parser.meta_charset:
+ if log: log(" META charset : %s" % parser.charset)
+ else:
+ if log: log(" charset : %s" % parser.charset)
+ if log: log(" title : %s" % parser.title)
+ parser.title = unicode(parser.title, parser.charset, "replace").encode(current_charset, "replace")
+ if log: log(" current charset: %s" % current_charset)
+ if log: log(" converted title: %s" % parser.title)
+ except LookupError:
+ if log: log(" unknown charset: `%s' or `%s'" % (parser.charset, current_charset))
+
+ return parser
+
+
+if __name__ == '__main__':
+ import sys
+ parser = parse_html(sys.argv[1])
+ print parser.charset
+ print parser.title
+ print parser.refresh
--- /dev/null
+"""
+ Bookmarks storage module - FLAD (Flat ASCII Database)
+ special version for compatibility with old (version 1) bkmk2db
+
+ Written by BroytMann, Feb 2000 - Mar 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import string
+from m_lib.flad import fladm
+from bkmk_objects import Folder, Bookmark, Ruler, Walker
+
+
+class storage_flad(Walker):
+ filename = "bookmarks_db.flad"
+
+ def __init__(self):
+ self.first_object = 1
+
+ def root_folder(self, f):
+ header = string.replace(f.header, ".", ".\n")
+ header = string.replace(header, "<", "\n<", 3)[1:]
+ header_file = open("header", 'w')
+ header_file.write(header + "\n")
+ header_file.write('<H1>%s</H1>\n\n' % f.name)
+ if f.comment: header_file.write('<DD>%s\n' % f.comment)
+ header_file.close()
+
+ def start_folder(self, f, level):
+ self.outfile.write("""
+Level: %d
+Folder: %s
+AddDate: %s
+Comment: %s
+""" % (level, f.name, f.add_date, f.comment))
+
+
+ def bookmark(self, b, level):
+ self.outfile.write("""
+Level: %d
+Title: %s
+URL: %s
+AddDate: %s
+LastVisit: %s
+LastModified: %s
+Comment: %s
+""" % (level+1, b.name, b.href, b.add_date, b.last_visit, b.last_modified, b.comment))
+
+
+ def ruler(self, r, level):
+ self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
+
+
+ def store(self, root_folder):
+ self.outfile = open(self.filename, 'w')
+ root_folder.walk_depth(self)
+ self.outfile.close()
+
+
+ def unindent(self, old_level, new_level):
+ while old_level > new_level:
+ old_level = old_level - 1
+ del self.folder_stack[-1]
+
+ if self.folder_stack:
+ self.current_folder = self.folder_stack[-1]
+ else:
+ self.current_folder = None
+
+ def load(self):
+ bookmarks_db = fladm.load_from_file(self.filename, fladm.check_record, ["Level"])
+
+ root_folder = Folder()
+ self.folder_stack = [root_folder]
+ self.current_folder = root_folder
+
+ header_file = open("header", 'r')
+ header = header_file.read()
+ header_file.close()
+
+ header = string.split(header, "\n")
+ root_folder.header = string.join(header[:5], '')
+ root_folder.name = header[5][4:-5]
+ root_folder.comment = string.join(header[7:], '')[4:]
+
+ save_level = 0
+ got_folder = 1 # Start as if we already have one folder
+
+ for record in bookmarks_db:
+ level = int(record["Level"])
+
+ if level == save_level:
+ pass
+ elif level == save_level + 1:
+ if not got_folder:
+ raise ValueError, "indent without folder"
+ elif level <= save_level - 1:
+ self.unindent(save_level, level)
+ else:
+ raise ValueError, "new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1)
+
+ save_level = level
+ got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
+
+ if record.has_key("URL"):
+ bookmark = Bookmark(record["URL"], record["AddDate"], record["LastVisit"], record["LastModified"], record["Comment"])
+ bookmark.name = record["Title"]
+ self.current_folder.append(bookmark)
+
+ elif record.has_key("Folder"):
+ folder = Folder(record["AddDate"], record["Comment"])
+ folder.name = record["Folder"]
+ self.current_folder.append(folder)
+ self.folder_stack.append(folder)
+ self.current_folder = folder
+
+ elif record.has_key("Ruler"):
+ self.current_folder.append(Ruler())
+
+ else:
+ raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)
+
+ if save_level >= 0:
+ self.unindent(save_level, 0)
+ else:
+ raise ValueError, "new level (%d) too little - must be >= 0" % save_level
+
+ return root_folder
--- /dev/null
+"""
+ Bookmarks storage manager - pickle; certainly the most simple and elegant :)
+
+ Written by BroytMann, Feb 2000 - Mar 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+try:
+ import cPickle
+ pickle = cPickle
+
+except ImportError:
+ import pickle
+
+
+class storage_pickle:
+ filename = "bookmarks_db.pickle"
+
+
+ def store(self, root_folder):
+ outfile = open(self.filename, 'wb')
+ pickle.dump(root_folder, outfile, 1)
+ outfile.close()
+
+
+ def load(self):
+ infile = open(self.filename, 'rb')
+ root_folder = pickle.load(infile)
+ infile.close()
+
+ return root_folder
--- /dev/null
+"""
+ Dump bookmarks db to a more readable FLAD after check_urls
+
+ Written by BroytMann, Apr 2000 - Jun 2002. Copyright (C) 2000-2002 PhiloSoft Design
+"""
+
+
+import time
+from bkmk_objects import Writer
+
+
+def strftime(s):
+ try:
+ return time.strftime("%a %d %b %Y %T", time.localtime(int(s)))
+ except ValueError: # s is already formatted
+ return s
+
+
+class writer_flad(Writer):
+ filename = "bookmarks_db.flad"
+
+ def __init__(self, outfile, prune=None):
+ Writer.__init__(self, outfile, prune)
+ self.first_object = 1
+
+
+ def start_folder(self, f, level):
+ self.outfile.write("""
+Level: %d
+Folder: %s
+AddDate: %s
+Comment: %s
+""" % (level, f.name, strftime(f.add_date), f.comment))
+
+
+ def bookmark(self, b, level):
+ self.outfile.write("""
+Level: %d
+Title: %s
+URL: %s
+AddDate: %s
+LastVisit: %s
+LastModified: %s
+Comment: %s""" % (level+1, b.name, b.href, strftime(b.add_date), strftime(b.last_visit), strftime(b.last_modified), b.comment))
+
+ for attr_name, attr_out in (("error", "Error"), ("no_error", "NoError"),
+ ("moved", "Moved"), ("size", "Size"), ("md5", "Md5"),
+ ("real_title", "RealTitle"), ("test_time", "TestTime")):
+ if hasattr(b, attr_name):
+ self.outfile.write("\n%s: %s" % (attr_out, getattr(b, attr_name)))
+
+ if hasattr(b, "last_tested"):
+ self.outfile.write("\n%s: %s" % ("LastTested", strftime(getattr(b, "last_tested"))))
+
+ self.outfile.write("\n")
+
+ def ruler(self, r, level):
+ self.outfile.write("\nLevel: %s\nRuler: YES\n" % (level+1))
--- /dev/null
+"""
+ Dump bookmarks db to a more readable FLAD after check_urls
+
+ Written by BroytMann, Oct 2000 - Jun 2002. Copyright (C) 2000-2002 PhiloSoft Design
+"""
+
+
+import time
+from bkmk_objects import Writer
+
+
+def strftime(s):
+ return time.strftime("%a %d %b %Y %T", time.localtime(int(s)))
+
+
+class writer_flad_err(Writer):
+ filename = "bookmarks_db.errors"
+
+ def bookmark(self, b, level):
+ if not hasattr(b, "error"):
+ return
+
+ self.outfile.write("""
+Level: %d
+Title: %s
+URL: %s
+AddDate: %s
+LastVisit: %s
+LastModified: %s
+Comment: %s""" % (1, b.name, b.href, strftime(b.add_date), strftime(b.last_visit), strftime(b.last_modified), b.comment))
+
+ for attr_name, attr_out in (("error", "Error"), ("no_error", "NoError"),
+ ("moved", "Moved"), ("size", "Size"), ("md5", "Md5"),
+ ("real_title", "RealTitle"), ("test_time", "TestTime")):
+ if hasattr(b, attr_name):
+ self.outfile.write("\n%s: %s" % (attr_out, getattr(b, attr_name)))
+
+ if hasattr(b, "last_tested"):
+ self.outfile.write("\n%s: %s" % ("LastTested", strftime(getattr(b, "last_tested"))))
+
+ self.outfile.write("\n")
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Convert a bkmk database back to bookmarks.html
+
+ Written by BroytMann, Mar 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import string
+
+
+def dump_comment(comment):
+ return string.replace(comment, "<BR>", "<BR>\n")
+
+
+ind_s = " "*4
+
+from bkmk_objects import Writer
+
+class writer_html(Writer):
+ filename = "bookmarks.html"
+
+ def _folder(self, f, level):
+ if f.comment: self.outfile.write('<DD>%s\n' % dump_comment(f.comment))
+ self.outfile.write(ind_s*level + "<DL><p>\n")
+
+ def root_folder(self, f):
+ header = string.replace(f.header, ".", ".\n")
+ header = string.replace(header, "<T", "\n<T", 3)
+ self.outfile.write("<!DOCTYPE NETSCAPE-Bookmark-file-1>\n")
+ self.outfile.write(header + "\n")
+ self.outfile.write('<H1>%s</H1>\n\n' % f.name)
+ self._folder(f, 0)
+
+ def start_folder(self, f, level):
+ self.outfile.write(ind_s*level + '<DT><H3 ADD_DATE="%s">%s</H3>\n' % (f.add_date, f.name))
+ self._folder(f, level)
+
+ def end_folder(self, f, level):
+ self.outfile.write(ind_s*level + "</DL><p>\n")
+
+ def bookmark(self, b, level):
+ self.outfile.write(ind_s*(level+1) + '<DT><A HREF="%s" ADD_DATE="%s" LAST_VISIT="%s" LAST_MODIFIED="%s">%s</A>\n' % (b.href, b.add_date, b.last_visit, b.last_modified, b.name))
+ if b.comment: self.outfile.write('<DD>%s\n' % dump_comment(b.comment))
+
+ def ruler(self, r, level):
+ self.outfile.write(ind_s*(level+1) + "<HR>\n")
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Dump a bkmk database to a text file
+
+ Written by BroytMann, Mar 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+ind_s = " "*4
+
+
+from bkmk_objects import Writer
+
+class writer_txt(Writer):
+ filename = "dump.txt"
+
+ def root_folder(self, f):
+ self.outfile.write("Folder: %s\n" % f.name)
+
+ def start_folder(self, f, level):
+ self.outfile.write(ind_s*level + "Folder: %s\n" % f.name)
+
+ def end_folder(self, f, level):
+ self.outfile.write(ind_s*level + "Folder end: %s\n" % f.name)
+
+ def bookmark(self, b, level):
+ self.outfile.write(ind_s*(level+1) + "Bookmark: %s\n" % b.name)
+
+ def ruler(self, r, level):
+ self.outfile.write(ind_s*(level+1) + "-----\n")
--- /dev/null
+#! /bin/sh
+
+PATH=$HOME/lib/bookmarks_db:$PATH
+
+bkmk2db || exit 1
+
+bkmk-add.py "$@" && \
+db2bkmk.py && \
+exec touch bookmarks_db.pickle # to make it more recent
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Add a bookmark to the database.
+
+ Written by BroytMann, Aug 2002. Copyright (C) 2002 PhiloSoft Design
+"""
+
+
+import sys, os, time, urllib
+from bkmk_objects import Bookmark
+from Robots.bkmk_rsimple import robot_simple
+
+import tempfile
+tempfname = "bkmk-add" + tempfile.gettempprefix() + "tmp"
+
+
+def run():
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "t:")
+
+ report_stats = 1
+ title = ''
+
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ elif _opt == '-t':
+ title = _arg
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if len(args) <> 1:
+ sys.stderr.write("bkmk-add: too many or too few arguments\n")
+ sys.stderr.write("Usage: bkmk-add [-s] [-t title] url\n")
+ sys.exit(1)
+
+ from storage import storage, import_storage
+ storage = storage()
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+
+ if report_stats:
+ print "Ok"
+
+ href = args[0]
+ now = int(time.time())
+ bookmark = Bookmark(href, str(now), '0', '0')
+ bookmark.name = ''
+
+ robot = robot_simple(tempfname, None)
+ url_type, url_rest = urllib.splittype(href)
+
+ if robot.check_url(bookmark, url_type, url_rest): # get real title and last modified date
+ if title: # forced title
+ bookmark.name = title
+ elif hasattr(bookmark, "real_title"):
+ bookmark.name = bookmark.real_title
+ if report_stats:
+ sys.stdout.write("Adding %s with title '%s'\n" % (href, bookmark.name))
+ root_folder.append(bookmark)
+
+ if report_stats:
+ sys.stdout.write("Storing %s: " % storage.filename)
+ sys.stdout.flush()
+
+ storage.store(root_folder)
+
+ if report_stats:
+ print "Ok"
+
+
+ try:
+ os.unlink(tempfname)
+ except os.error:
+ pass
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /bin/sh
+
+PATH=$HOME/lib/bookmarks_db:$PATH
+
+bkmk2db || exit 1
+
+check_urls.py || exit 1
+check_urls.py -e || exit 1
+
+# Report results
+BKMK_WRITER=flad db2bkmk.py || exit 1
+check_dups.py -s -l bookmarks.err || exit 1
+
+# Write results to output bookmarks files
+db2bkmk.py || exit 1
+diff $HOME/.netscape/bookmarks.html bookmarks.html > diff.out
--- /dev/null
+#! /bin/sh
+
+PATH=$HOME/lib/bookmarks_db:$PATH
+
+if [ -f ./bookmarks.html ]; then
+ BKMK=
+elif [ -f $HOME/.netscape/bookmarks.html ]; then
+ BKMK=$HOME/.netscape/bookmarks.html
+else
+ echo Cannot find bookmarks.html, aborted
+ exit 1
+fi
+
+
+ns-unescape || exit 1
+bkmk2db || exit 1
+#check_db.py -l bookmarks.err || exit 1
+
+#cp $HOME/Internet/WWW/header header
+db2bkmk.py || exit 1
+db2bkmk.py -p "Private links" -o public-koi.tmp || exit 1
+
+db2bkmk.py -t $HOME/lib/bookmarks_db/koi2win.db -o private-win.tmp || exit 1
+db2bkmk.py -t $HOME/lib/bookmarks_db/koi2win.db -p "Private links" -o public-win.tmp || exit 1
+
+
+replace() {
+ sed "s^$2^$3^" $1 > _tmp.$$
+ mv _tmp.$$ $1
+}
+
+replace private-win.tmp http://koi.aha.ru/ http://win.aha.ru/
+replace public-win.tmp http://koi.aha.ru/ http://win.aha.ru/
+replace private-win.tmp http://koi.postman.ru/ http://win.postman.ru/
+replace public-win.tmp http://koi.postman.ru/ http://win.postman.ru/
+
+towin < private-win.tmp > bookmark.ht0
+towin < public-win.tmp > bookmark.ht1
+
+diff $HOME/.netscape/bookmarks.html bookmarks.html > diff.out
+
+if [ "$1" = "-c" ]; then
+ echo -n "Copying to home: "
+ cp -p bookmarks.html $HOME/.netscape/bookmarks.html && echo "Ok"
+fi
--- /dev/null
+#! /bin/sh
+
+remote_server=sun
+
+rsync -avz "$HOME/.netscape/bookmarks.html" "$remote_server:.netscape" # && \
+
+#rsync -avz "$HOME/Internet/WWW/phd.pp.ru/Bookmarks/bkmk-koi.html" \
+# "$HOME/Internet/WWW/phd.pp.ru/Bookmarks/bkmk-win.html" \
+# "$remote_server:Internet/WWW/phd.pp.ru/Bookmarks"
--- /dev/null
+#! /bin/sh
+
+umask 077
+
+cd $HOME/work && \
+bkmk-koi -c || exit 1
+
+mv public-koi.tmp bkmk-koi.html && \
+mv public-win.tmp bkmk-win.html && \
+chmod a+r bkmk-koi.html bkmk-win.html bookmark.ht1 || exit 1
+
+echo "Splitting bookmarks..."
+cd $HOME/Internet/WWW/phd.pp.ru/Bookmarks && \
+rm -rf split.koi split.win || exit 1
+
+$HOME/lib/bookmarks_db/hotexplode.pl -o split.koi $HOME/work/bookmarks.html && \
+cp -p ../../nopasswd-index.html split.koi/privatelinks/index.html && \
+$HOME/lib/bookmarks_db/hotexplode.pl -o split.win $HOME/work/private-win.tmp && \
+cp -p ../../nopasswd-index.html split.win/privatelinks/index.html || exit 1
+
+cp -p $HOME/work/bkmk-koi.html $HOME/work/bkmk-win.html . && \
+chmod -R a+rX . || exit 1
+
+#/usr/local/htdig/bkmk.sh
+
+cd $HOME/work && \
+exec rm -f header bookmarks.err bookmarks_db.pickle bookmarks_db.flad private-win.tmp
+
+#zip -9 bkmk bkmk-koi.html bkmk-win.html && \
+#rm -f bkmk-win.html && \
+#cp -p bookmark.ht1 bkmk-win.html || exit 1
--- /dev/null
+#! /bin/sh
+
+PATH=$HOME/lib/bookmarks_db:$PATH
+
+bkmk2db || exit 1
+
+BKMK_WRITER=flad sort_db.py -a && \
+BKMK_WRITER=flad sort_db.py -v && \
+BKMK_WRITER=flad sort_db.py -m && \
+BKMK_WRITER=flad sort_db.py -z && \
+BKMK_WRITER=flad sort_db.py -t || exit 1
--- /dev/null
+#! /bin/sh
+
+PATH=$HOME/lib/bookmarks_db:$PATH
+
+if [ ! -f ./bookmark.htm ]; then
+ echo Cannot find bookmark.htm, aborted
+ exit 1
+fi
+
+
+fromwin < bookmark.htm > bookmarks.html || exit 1
+
+ns-unescape || exit 1
+bkmk2db || exit 1
+#check_db.py -l bookmarks.err || exit 1
+
+#cp $HOME/Internet/WWW/header header
+db2bkmk.py -o private-win.tmp || exit 1
+db2bkmk.py -p "Private links" -o public-win.tmp || exit 1
+
+db2bkmk.py -t $HOME/lib/bookmarks_db/koi2win.db -r || exit 1
+db2bkmk.py -t $HOME/lib/bookmarks_db/koi2win.db -r -p "Private links" -o public-koi.tmp || exit 1
+
+towin < bookmarks.html > bookmark.ht0
+towin < public-win.tmp > bookmark.ht1
+
+
+replace() {
+ sed "s^$2^$3^" $1 > _tmp.$$
+ mv _tmp.$$ $1
+}
+
+replace bookmarks.html http://win.aha.ru/ http://koi.aha.ru/
+replace public-koi.tmp http://win.aha.ru/ http://koi.aha.ru/
+replace bookmarks.html http://win.postman.ru/ http://koi.postman.ru/
+replace public-koi.tmp http://win.postman.ru/ http://koi.postman.ru/
+
+if [ "$1" = "-c" ]; then
+ echo -n "Copying to home: "
+ cp -p bookmarks.html $HOME/.netscape/bookmarks.html && echo "Ok"
+fi
--- /dev/null
+#! /bin/sh
+
+PATH=$HOME/lib/bookmarks_db:$PATH
+
+if [ ! -f ./bookmarks_db.pickle -a ! -f ./bookmarks_db.flad ]; then
+ if [ -f ./bookmarks.html ]; then
+ BKMK=
+ elif [ -f $HOME/.netscape/bookmarks.html ]; then
+ BKMK=$HOME/.netscape/bookmarks.html
+ else
+ echo Cannot find bookmarks.html, aborted
+ exit 1
+ fi
+
+ bkmk2db.py $BKMK || exit 1
+
+elif [ -f ./bookmarks_db.pickle -a -f ./bookmarks.html -a ./bookmarks.html -nt ./bookmarks_db.pickle ]; then
+ bkmk2db.py || exit 1
+
+elif [ -f ./bookmarks_db.flad -a -f ./bookmarks.html -a ./bookmarks.html -nt ./bookmarks_db.flad ]; then
+ bkmk2db.py || exit 1
+
+fi
#! /usr/local/bin/python -O
"""
- Convert Netscape Navigator's bookmarks.html to FLAD database
+ Convert Netscape Navigator's or Mozilla's bookmarks.html to a database
- Written by BroytMann, Jun 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+ Written by BroytMann, Jun 1997 - Aug 2002. Copyright (C) 1997-2002 PhiloSoft Design
"""
-import sys, os, stat, string
-from getopt import getopt
-import bkmk_parser
-from formatter import AbstractFormatter, NullWriter
+import sys, os
+from getopt import getopt
+from bkmk_parser import BkmkParser
def run():
- optlist, args = getopt(sys.argv[1:], "gits")
+ optlist, args = getopt(sys.argv[1:], "is")
- show_pbar = 1
- to_text = 0
- to_gf = 0
+ show_pbar = not __debug__
report_stats = 1
for _opt, _arg in optlist:
- if _opt == '-g':
- to_gf = 1
if _opt == '-i':
show_pbar = 0
- if _opt == '-t':
- to_text = 1
if _opt == '-s':
report_stats = 0
try:
if args:
if len(args) > 1:
sys.stderr.write("bkmk2db: too many arguments\n")
+ sys.stderr.write("Usage: bkmk2db [-is] bookmarks.html\n")
sys.exit(1)
filename = args[0]
else:
filename = 'bookmarks.html' # good name both for DOS (bookmark.htm) and UNIX
+
+ if report_stats:
+ from storage import storage_name
+ sys.stdout.write("Converting %s to %s: " % (filename, storage_name))
+ sys.stdout.flush()
+
if show_pbar:
show_pbar = sys.stderr.isatty()
if show_pbar:
try:
- from tty_pbar import ttyProgressBar
+ from m_lib.pbar.tty_pbar import ttyProgressBar
except ImportError:
show_pbar = 0
if show_pbar:
try:
- size = os.stat(filename)[stat.ST_SIZE]
+ size = os.stat(filename).st_size
except:
print filename, ": no such file"
sys.exit(1)
- fmt = AbstractFormatter(NullWriter())
- if to_text:
- parser = bkmk_parser.Bookmarks2Text(fmt)
- elif to_gf:
- parser = bkmk_parser.Bookmarks2Gadfly(fmt)
- else:
- parser = bkmk_parser.Bookmarks2Flad(fmt)
-
-
- if report_stats:
- str = "Converting " + filename + " to "
- if to_text:
- str = "text"
- elif to_gf:
- str = "GadFly database"
- else:
- str = "FLAD database"
-
- sys.stdout.write("Converting %s to %s: " % (filename, str))
- sys.stdout.flush()
-
if show_pbar:
pbar = ttyProgressBar(0, size)
lng = 0
else:
dos_add = 0 # UNIX' and Mac's len() counts CR or LF correct
- try:
- f = open(filename, 'r')
- except IOError, msg:
- print filename, ":", msg
- sys.exit(1)
- header = open("header", 'w')
- line_no = 0
+ infile = open(filename, 'r')
+ parser = BkmkParser()
- while 1:
- line = f.readline()
- if not line:
- break
+ line_no = 0
+ lng = 0
+ ok = 1
+ for line in infile:
if show_pbar:
lng = lng + len(line) + dos_add
pbar.display(lng)
- line = string.strip(line)
+ line = line.strip()
line_no = line_no + 1
try:
parser.feed(line)
+ except:
+ ok = 0
+ break
- if parser.outfile: # Write header until HTML parser start writing outfile
- if header:
- header.close()
- header = None
- else:
- header.write(line + '\n')
+ try:
+ parser.close()
+ except:
+ ok = 0
- except:
- break # I need total number of lines; interpreter will print traceback on exit
+ infile.close()
if show_pbar:
del pbar
if report_stats:
print "Ok"
print line_no, "lines proceed"
- print parser.urls_no, "urls found"
- print parser.record_no, "records created"
+ print parser.urls, "urls found"
+ print parser.objects, "objects created"
- parser.close()
- f.close()
+ if ok:
+ from storage import storage
+ storage = storage()
+ storage.store(parser.root_folder)
+
+ else:
+ import traceback
+ traceback.print_exc()
+ sys.exit(1)
if __name__ == '__main__':
--- /dev/null
+"""
+ Objects to represent bookmarks.html structure
+
+ Written by BroytMann, Mar 2000 - Jul 2002. Copyright (C) 2000-2002 PhiloSoft Design
+"""
+
+
+from UserList import UserList
+
+class Folder(UserList):
+ isFolder = 1
+ isBookmark = 0
+
+ def __init__(self, add_date = None, comment = ''):
+ UserList.__init__(self)
+ self.comment = comment
+ self.add_date = add_date
+
+
+ def walk_depth(self, walker, level=0):
+ if hasattr(self, "header"): # root folder
+ prune = 0
+ walker.root_folder(self)
+ else:
+ prune = walker.prune_folder(self)
+ if not prune:
+ walker.start_folder(self, level)
+
+ if not prune:
+ for object in self.data:
+ if object.isFolder:
+ object.walk_depth(walker, level+1)
+ elif object.isBookmark:
+ walker.bookmark(object, level)
+ else:
+ walker.ruler(object, level)
+
+ walker.end_folder(self, level)
+
+
+class Bookmark:
+ isFolder = 0
+ isBookmark = 1
+
+ def __init__(self, href, add_date, last_visit, last_modified, comment = ''):
+ self.comment = comment
+ self.href = href
+ self.add_date = add_date
+ self.last_visit = last_visit
+ self.last_modified = last_modified
+
+
+class Ruler:
+ isFolder = 0
+ isBookmark = 0
+
+
+class Walker:
+ """
+ Interface class. Any instance that will be passed to Folder.walk_depth
+ may be derived from this class. It is not mandatory - unlike Java
+ Python does not require interface classes; but it is convenient to have
+ some methods predefined to no-op, in case you do not want to
+ provide end_folder etc.
+ """
+
+ def root_folder(self, r):
+ pass
+
+ def start_folder(self, f, level):
+ pass
+
+ def end_folder(self, f, level):
+ pass
+
+ def bookmark(self, b, level):
+ pass
+
+ def ruler(self, r, level):
+ pass
+
+ def prune_folder(self, folder):
+ return 0
+
+
+class Writer(Walker):
+ def __init__(self, outfile, prune=None):
+ self.outfile = outfile
+ self.prune = prune
+
+ def prune_folder(self, folder):
+ return self.prune == folder.name
+
+
+class Robot:
+ def __init__(self, tempfname, log):
+ self.tempfname = tempfname
+ self.log = log
+
+ def stop(self):
+ pass # Nothing to do on cleanup
+
+
+# Helper class to make inverese links (nodes linked to their parent)
+class InverseLinker(Walker):
+ def root_folder(self, r):
+ self.parent_stack = [r]
+
+ def start_folder(self, f, level):
+ f.parent = self.parent_stack[-1]
+ self.parent_stack.append(f) # Push the folder onto the stack of parents
+
+ def end_folder(self, f, level):
+ del self.parent_stack[-1] # Pop off the stack
+
+ def bookmark(self, b, level):
+ b.parent = self.parent_stack[-1]
+
+ def ruler(self, r, level):
+ r.parent = self.parent_stack[-1]
+
+
+# Helper class to make linear represenatation of the tree
+class Linear(Walker):
+ def root_folder(self, r):
+ r.linear = [r]
+ self.linear = r.linear
+
+ def add_object(self, object):
+ self.linear.append(object)
+
+ def start_folder(self, f, level):
+ self.add_object(f)
+
+ def bookmark(self, b, level):
+ self.add_object(b)
+
+ def ruler(self, r, level):
+ self.add_object(r)
+
+
+# Helper - make linked linear represenatation of the tree, suitable to be stored in sequential storage
+def make_linear(root_folder):
+ linker = InverseLinker()
+ root_folder.walk_depth(linker)
+
+ linear = Linear()
+ root_folder.walk_depth(linear)
+
+
+# Helper, opposite of make_linear - make a tree from the linked linear representation
+def make_tree(linear):
+ root_folder = linear[0]
+ del linear[0]
+
+ for object in linear:
+ object.parent.append(object)
+
+ return root_folder
"""
- Bookmarks parsers
+ Parser for Netscape Navigator's bookmarks.html
- Written by BroytMann, Mar 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design
+ Written by BroytMann, Jun 1997 - Jun 2002. Copyright (C) 1997-2002 PhiloSoft Design
"""
-import os, string, shutil
-from htmllib import HTMLParser
+import string
+from m_lib.www.html import HTMLParser
+from bkmk_objects import Folder, Bookmark, Ruler
-class BookmarksParser(HTMLParser): # Parser for Navigator's bookmarks (abstract class)
- def __init__(self, formatter, verbose=0):
- HTMLParser.__init__(self, formatter, verbose)
- self.urls_no = 0 # cross-reference counter
- self.record_no = 1 # record counter
- self.outfile = None # output file
- self.level = 0 # Indentation level
- self.flag_out = 0 # Is it time to flush?
- self.saved_data = ''
- self.saved_anchor = None
- self.saved_folder = None
- self.saved_ruler = None
+if __debug__:
+ def debug(note):
+ print note
+ def dump_names(folder_stack):
+ l = []
+ for object in folder_stack:
+ if object.isFolder:
+ l.append(object.name)
+ return "'" + string.join(l, "' '") + "'"
- def flush(self):
- if not self.outfile:
- return
-
- record_flushed = 0
+else:
+ def debug(note):
+ pass
+ dump_names = debug
- if self.saved_anchor:
- name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
- self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + self.saved_data)
- self.flush_anchor()
- self.saved_data = ''
- record_flushed = 1
- self.saved_anchor = None
- if self.saved_folder:
- name, add_date, comment = self.saved_folder
- self.saved_folder = (name, add_date, comment + self.saved_data)
- self.flush_folder()
- self.saved_data = ''
- record_flushed = 1
- self.saved_folder = None
+class BkmkParser(HTMLParser):
+ def __init__(self):
+ HTMLParser.__init__(self)
- if self.saved_ruler:
- self.flush_ruler()
- record_flushed = 1
- self.saved_ruler = None
+ self.urls = 0
+ self.objects = 0
- if record_flushed:
- self.record_no = self.record_no + 1
-
- if self.saved_data <> '': # This may occur after ampersand
- self.flag_out = 0
+ self.charset = ""
+ self.recode = None
+ def handle_data(self, data):
+ if data:
+ if self.charset:
+ data = unicode(data, self.charset).encode()
+ self.accumulator = "%s%s" % (self.accumulator, data)
- def close(self):
- HTMLParser.close(self)
+ # Mozilla - get charset
+ def do_meta(self, attrs):
+ http_equiv = ""
+ content = ""
- if self.outfile:
- self.outfile.close()
-
- if self.level <> 0:
- print "Bad HTML: <DL> and </DL> mismatch; level=%d" % self.level
+ for attrname, value in attrs:
+ value = string.strip(value)
+ if attrname == 'http-equiv':
+ http_equiv = value.lower()
+ elif attrname == 'content':
+ content = value
+ if http_equiv == "content-type":
+ try:
+ # extract charset from "text/html; charset=UTF-8"
+ self.charset = content.split('=')[1]
+ except IndexError:
+ pass
- def handle_data(self, data):
- if not self.outfile:
- return
- if data and (data[0] == '&'): # Ampersand parsed by SGMLlib
- self.flag_out = 0
+ def start_title(self, attrs):
+ self.accumulator = "%s<TITLE>" % self.accumulator
- if self.flag_out == 2: # Process comment after <DD> or <HR>
- if self.saved_anchor:
- name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
- self.saved_anchor = (name, href, add_date, last_visit, last_modified, comment + data)
- data = '' # Used
+ def end_title(self):
+ self.accumulator = "%s</TITLE>" % self.accumulator
- if self.saved_folder:
- name, add_date, comment = self.saved_folder
- self.saved_folder = (name, add_date, comment + data)
- data = '' # Used
- self.flag_out = 0
+ # Start root folder
+ def start_h1(self, attrs):
+ root_folder = Folder()
+ self.current_object = root_folder
+ self.root_folder = root_folder
+ self.current_folder = root_folder
+ self.folder_stack = [root_folder]
- if self.flag_out == 1:
- self.flush()
+ self.root_folder.header = self.accumulator
+ self.accumulator = ''
- if data and (data[0] <> '&') and (self.flag_out == 0):
- self.flag_out = 1 # Set flag (to flush data on next call)
+ def end_h1(self):
+ accumulator = self.accumulator
+ self.accumulator = ''
- if data:
- self.saved_data = self.saved_data + data
+ debug("Root folder name: `%s'" % accumulator)
+ self.root_folder.name = accumulator
- def anchor_bgn(self, href, add_date, last_visit, last_modified):
- self.flush()
- self.anchor = (href, add_date, last_visit, last_modified)
+ # Start next folder
+ def start_h3(self, attrs):
+ for attrname, value in attrs:
+ value = string.strip(value)
+ if attrname == 'add_date':
+ add_date = value
+ debug("New folder...")
+ folder = Folder(add_date)
+ self.current_object = folder
+ self.current_folder.append(folder)
+ self.folder_stack.append(folder) # push new folder
+ self.current_folder = folder
+ self.objects = self.objects + 1
- def anchor_end(self):
- if self.anchor:
- href, add_date, last_visit, last_modified = self.anchor
- self.anchor = None
- self.urls_no = self.urls_no + 1
+ def end_h3(self):
+ accumulator = self.accumulator
+ self.accumulator = ''
- self.saved_anchor = (self.saved_data, href, add_date, last_visit, last_modified, '')
- self.saved_data = '' # Used
+ debug("Folder name: `%s'" % accumulator)
+ self.current_folder.name = accumulator
+ # Start bookmark
def start_a(self, attrs):
- href = ''
- add_date = ''
- last_visit = ''
- last_modified = ''
-
for attrname, value in attrs:
value = string.strip(value)
if attrname == 'href':
if attrname == 'last_modified':
last_modified = value
- self.anchor_bgn(href, add_date, last_visit, last_modified)
+ debug("Bookmark points to: `%s'" % href)
+ bookmark = Bookmark(href, add_date, last_visit, last_modified)
+ self.current_object = bookmark
+ self.current_folder.append(bookmark)
+ self.urls = self.urls + 1
+ self.objects = self.objects + 1
+ def end_a(self):
+ accumulator = self.accumulator
+ self.accumulator = ''
- def start_h3(self, attrs): # Navigator marks folders with <H3> tags
- self.flush()
- add_date = ''
+ debug("Bookmark name: `%s'" % accumulator)
+ bookmark = self.current_folder[-1]
+ bookmark.name = accumulator
- for attrname, value in attrs:
- value = string.strip(value)
- if attrname == 'add_date':
- add_date = value
- self.saved_folder = ('', add_date, '')
- self.flag_out = 0
+ def flush(self):
+ accumulator = self.accumulator
+ if accumulator:
+ self.accumulator = ''
- def end_h3(self): # End of folder
- name, add_date, comment = self.saved_folder
- self.saved_folder = (name + self.saved_data, add_date, comment)
- self.saved_data = '' # Used
+ current_object = self.current_object
+ current_object.comment = current_object.comment + accumulator
+ debug("Comment: `%s'" % current_object.comment)
def start_dl(self, attrs):
self.flush()
- if not self.outfile: # We are starting output after 1st <DL> tag to skip header
- self.open_outfile()
-
- self.level = self.level + 1
+ do_dt = start_dl
+ # End of folder
def end_dl(self):
self.flush()
- self.level = self.level - 1
-
-
- def do_dd(self, attrs):
- if self.outfile:
- self.flag_out = 2 # Set flag to signal "comment starting"
-
-
- def do_br(self, attrs):
- if self.outfile:
- self.saved_data = self.saved_data + "<BR>" # Add <BR>...
- self.flag_out = 0 # ...and next line of comment to saved comment
-
-
- def do_hr(self, attrs):
- if self.outfile:
- self.flush()
- self.saved_ruler = 1
-
-
- def handle_charref(self, name):
- if self.outfile:
- self.flag_out = 0
- self.saved_data = "%s&%c" % (self.saved_data, chr(name))
-
-
- def handle_entityref(self, name):
- if self.outfile:
- self.flag_out = 0
- if self.entitydefs.has_key(name): # If it is one of the standard SGML entities - close it with semicolon
- x = ';'
+ debug("End folder")
+ debug("Folder stack: %s" % dump_names(self.folder_stack))
+ if self.folder_stack:
+ del self.folder_stack[-1] # pop last folder
+ if self.folder_stack:
+ self.current_folder = self.folder_stack[-1]
else:
- x = ''
- self.saved_data = "%s&%s%s" % (self.saved_data, name, x)
-
-
- def open_outfile(self):
- self.outfile = open("bookmarks.tmp", 'w')
+ debug("FOLDER STACK is EMPTY!!! (1)")
+ else:
+ debug("FOLDER STACK is EMPTY!!! (2)")
+ self.current_object = None
-class Bookmarks2Text(BookmarksParser):
- def flush_anchor(self):
- self.outfile.write(" "*(self.level-1) + str(self.saved_anchor) + '\n')
-
+ def close(self):
+ HTMLParser.close(self)
+ if self.folder_stack:
+ raise ValueError, "wrong folder stack: %s" % self.folder_stack
- def flush_folder(self):
- self.outfile.write(" "*(self.level-1) + str(self.saved_folder) + '\n')
+ def do_dd(self, attrs):
+ pass
- def flush_ruler(self):
- self.outfile.write(" "*(self.level-1) + "----------\n")
+ do_p = do_dd
- def __del__(self):
- shutil.copy("bookmarks.tmp", "bookmarks.txt")
- os.unlink("bookmarks.tmp")
+ # Start ruler
+ def do_hr(self, attrs):
+ self.flush()
+ debug("Ruler")
+ self.current_folder.append(Ruler())
+ self.current_object = None
+ self.objects = self.objects + 1
-class Bookmarks2Flad(BookmarksParser):
- def __init__(self, formatter, verbose=0):
- BookmarksParser.__init__(self, formatter, verbose)
- self.flush_record = 0
+ # BR in comment
+ def do_br(self, attrs):
+ self.accumulator = "%s<BR>" % self.accumulator
- def flush(self):
- if not self.outfile:
- return
+ # Allow < in the text
+ def unknown_starttag(self, tag, attrs):
+ self.accumulator = "%s<%s>" % (self.accumulator, tag)
- record_flushed = 0
- if self.saved_anchor or self.saved_folder or self.saved_ruler or self.saved_data:
- if self.flush_record:
- self.outfile.write('\n')
- else:
- self.flush_record = 1
-
- BookmarksParser.flush(self)
-
-
- def flush_anchor(self):
- name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
- self.outfile.write("""Level: %d
-Title: %s
-URL: %s
-AddDate: %s
-LastVisit: %s
-LastModified: %s
-Comment: %s
-""" % (self.level, name, href, add_date, last_visit, last_modified, comment))
-
- def flush_folder(self):
- name, add_date, comment = self.saved_folder
- self.outfile.write("""Level: %d
-Folder: %s
-AddDate: %s
-Comment: %s
-""" % (self.level, name, add_date, comment))
-
- def flush_ruler(self):
- self.outfile.write("Level: %s\nRuler: YES\n" % self.level)
-
-
- def __del__(self):
- shutil.copy("bookmarks.tmp", "bookmarks.db")
- os.unlink("bookmarks.tmp")
-
-
-class Bookmarks2Gadfly(BookmarksParser):
- def open_outfile(self):
- import gadfly
- connection = gadfly.gadfly()
- connection.startup("bookmarks", ".")
- self.connection = connection
-
- cursor = connection.cursor()
- cursor.execute("""create table bookmarks (
- rec_no integer,
- level integer,
- title varchar,
- DATA varchar,
- add_date integer,
- last_visit integer,
- last_modified integer,
- comment varchar
- )""")
- self.outfile = cursor
-
- self.template = """insert into bookmarks
- (rec_no, level, title, DATA, add_date, last_visit, last_modified, comment)
- values (?, ?, ?, ?, ?, ?, ?, ?)"""
-
-
- def __del__(self):
- self.connection.commit()
-
-
- def flush_anchor(self):
- name, href, add_date, last_visit, last_modified, comment = self.saved_anchor
- self.outfile.execute(self.template,
- (self.record_no, self.level, name, href,
- add_date, last_visit, last_modified, comment)
- )
-
- def flush_folder(self):
- name, add_date, comment = self.saved_folder
- self.outfile.execute(self.template,
- (self.record_no, self.level, name, "Folder",
- add_date, '', '', comment)
- )
-
- def flush_ruler(self):
- self.outfile.execute(self.template,
- (self.record_no, self.level, '', "Ruler",
- '', '', '', '')
- )
+ # Do not allow unknow end tags
+ def unknown_endtag(self, tag):
+ raise NotImplementedError("Unknow end tag `%s'" % tag)
+++ /dev/null
-#! /usr/local/bin/python -O
-"""
- Test FLAD database for: duplicate URLs, too big indent, incorrect record
- format, spare keys.
-
- Written by BroytMann, Jun 1997 - Feb 2000. Copyright (C) 1997-2000 PhiloSoft Design
-"""
-
-import sys, string
-from getopt import getopt
-from copy import _copy_dict
-
-import fladm
-
-
-def error(err_str):
- global errors_found, report_stats
- if errors_found == 0:
- if report_stats:
- print "errors found"
-
- errors_found = errors_found + 1
- sys.stderr.write("%s\n" % err_str)
-
- if logfile:
- logfile.write("%s\n" % err_str)
-
-
-def check_key(record_no, record, key, allow_empty=1):
- if not record.has_key(key):
- error("Expected `%s' in record %d -- %s" % (key, record_no, str(record)))
- return
-
- if not allow_empty and not record[key]:
- error("Empty key `%s' in record %d -- %s" % (key, record_no, str(record)))
-
- del record[key]
-
-def check_date(record_no, record, key):
- if not record.has_key(key):
- error("Expected `%s' in record %d -- %s" % (key, record_no, str(record)))
- else:
- try:
- _date = string.atoi(record[key])
- except string.atoi_error:
- error("Bad `%s' format in record %d -- %s" % (key, record_no, str(record)))
-
- del record[key]
-
-def check_empty(record_no, record):
- if record <> {}:
- error("Spare keys in record %d -- %s" % (record_no, str(record)))
-
-def check_url(record_no, record):
- # I am not testing here check_url("Level") because it is impossible
- # to come here without "Level" key - fladm.check_record has to reject
- # entire database if there is record without this "must key".
- # If someone adds record without "Level" manually - it is serious error
- # and the following line raise exception.
- del record["Level"]
-
- check_key(record_no, record, "Title")
- check_key(record_no, record, "URL")
- check_key(record_no, record, "Comment")
-
- check_date(record_no, record, "AddDate")
- check_date(record_no, record, "LastVisit")
- check_date(record_no, record, "LastModified")
-
- check_empty(record_no, record)
-
-def check_folder(record_no, record):
- # Read comment above - in the beginning of check_url()
- del record["Level"]
-
- check_key(record_no, record, "Folder")
- check_key(record_no, record, "Comment")
-
- check_date(record_no, record, "AddDate")
- check_empty(record_no, record)
-
-def check_ruler(record_no, record):
- # Read comment above - in the beginning of check_url()
- del record["Level"]
-
- if not record.has_key("Ruler"):
- error("No `Ruler' in record %d -- %s" % (record_no, str(record)))
- else:
- if record["Ruler"] <> "YES": # Impossible: ruler saying it is not ruler
- error("Ruler saying it is not ruler in record %d -- %s" % (record_no, str(record)))
- del record["Ruler"]
-
- check_empty(record_no, record)
-
-
-def run():
- optlist, args = getopt(sys.argv[1:], "l:s")
-
- global errors_found, report_stats, logfile
- report_stats = 1
-
- logfile = None
- logfname = None
-
- for _opt, _arg in optlist:
- if _opt == '-l':
- logfname = _arg
- if _opt == '-s':
- report_stats = 0
- try:
- del _opt, _arg
- except NameError:
- pass
-
- if len(args) > 1:
- sys.stderr.write("check_db: too many arguments\n")
- sys.exit(1)
-
-
- if logfname:
- logfile = open(logfname, 'w')
-
- if report_stats:
- sys.stdout.write("Loading: ")
- sys.stdout.flush()
-
- bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
-
- if report_stats:
- print "Ok"
- sys.stdout.write("Testing: ")
- sys.stdout.flush()
-
- record_no = 0
- save_level = 1
- got_folder = 1 # Start as if we already have one folder
- errors_found = 0
-
- URL_d = {} # Create hash table full of URLs
-
- for record in bookmarks_db:
- record_no = record_no + 1
- level = string.atoi(record["Level"])
-
- if record.has_key("URL"):
- if URL_d.has_key(record["URL"]):
- error("Duplicate URL (rec. %d, 1st at rec. %d): %s" % (record_no, URL_d[record["URL"]], str(record["URL"])))
- else:
- URL_d[record["URL"]] = record_no
-
- check_url(record_no, _copy_dict(record))
-
- elif record.has_key("Folder"):
- check_folder(record_no, _copy_dict(record))
-
- elif record.has_key("Ruler"):
- check_ruler(record_no, _copy_dict(record))
-
- else:
- raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)
-
- if got_folder:
- if (level > save_level + 1):
- error("Indent %d too big (want %d at rec. %d), record: %s" % (level, save_level, record_no, str(record)))
- else:
- if (level > save_level):
- error("Indent %d without folder (rec. %d), record: %s" % (level, record_no, str(record)))
-
- save_level = level
- got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
-
- # End of loop
-
- if logfname:
- logfile.close()
-
- if report_stats:
- print record_no, "records tested"
- if errors_found == 0:
- print "Ok (no errors found)"
- else:
- print "%d errors found" % errors_found
-
-
-if __name__ == '__main__':
- run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Check duplicate URLs in the bookmarks database
+
+ Written by BroytMann, Jun 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
+"""
+
+
+import sys
+
+
+log_file = None
+
+def report_dup(href, object_no):
+ s = "Duplicate URL: %s (first at rec. %d)" % (href, object_no)
+ print s
+
+ if log_file:
+ log_file.write("%s\n" % s)
+
+
+
+def run():
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "sl:")
+
+ report_stats = 1
+ global log_file
+ log_filename = None
+
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-l':
+ log_filename = _arg
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print "BroytMann check_dups, Copyright (C) 2000 PhiloSoft Design"
+
+ if args:
+ sys.stderr.write("check_urls: too many arguments\n")
+ sys.stderr.write("Usage: check_urls [-s] [-l logfile]\n")
+ sys.exit(1)
+
+ if log_filename:
+ log_file = open(log_filename, 'w')
+
+ from storage import storage
+ storage = storage()
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+ from bkmk_objects import make_linear
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
+
+ if report_stats:
+ print "Ok"
+
+
+ dup_dict = {}
+
+ for object_no in range(objects):
+ object = root_folder.linear[object_no]
+
+ if object.isBookmark:
+ href = object.href
+ if dup_dict.has_key(href):
+ report_dup(href, dup_dict[href])
+ else:
+ dup_dict[href] = object_no
+
+
+ if log_filename:
+ log_file.close()
+
+ if report_stats:
+ print "Ok"
+ print objects, "objects passed"
+
+
+if __name__ == '__main__':
+ run()
+++ /dev/null
-#! /usr/local/bin/python -O
-"""
- Test FLAD database for old records
-
- Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
-"""
-
-
-import fladm
-from time import time
-
-now = time()
-thrashold = 2*24*3600 # 2 days
-
-
-def run():
- bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
-
- for record in bookmarks_db:
- if record.has_key("URL"):
- add_date = int(record["AddDate"])
- if now - add_date < thrashold:
- print "New URL:", record["URL"]
-
-
-if __name__ == '__main__':
- run()
+++ /dev/null
-#! /usr/local/bin/python -O
-"""
- Test FLAD database for old records
-
- Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
-"""
-
-
-import fladm
-from time import time
-
-now = time()
-thrashold = 2*30*24*3600 # 2 months
-
-
-def run():
- bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
-
- for record in bookmarks_db:
- if record.has_key("URL"):
- last_visit = int(record["LastVisit"])
- if now - last_visit > thrashold:
- print "Old URL:", record["URL"]
-
-
-if __name__ == '__main__':
- run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Check and show URLs in the bookmarks database where name <> real title
+
+ Written by BroytMann, Jul 2002 - Aug 2002. Copyright (C) 2002 PhiloSoft Design
+"""
+
+
+import sys
+
+
+def run():
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "s")
+
+ report_stats = 1
+
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print "BroytMann check_title, Copyright (C) 2002 PhiloSoft Design"
+
+ if args:
+ sys.stderr.write("check_title: too many arguments\n")
+ sys.stderr.write("Usage: check_title [-s]\n")
+ sys.exit(1)
+
+ from storage import storage
+ storage = storage()
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+ from bkmk_objects import make_linear
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
+
+ if report_stats:
+ print "Ok"
+
+
+ for object_no in range(objects):
+ object = root_folder.linear[object_no]
+
+ if object.isBookmark:
+ if hasattr(object, "moved") or hasattr(object, "error"):
+ continue
+
+ if hasattr(object, "real_title"):
+ if object.name <> object.real_title:
+ print object.href
+ print object.name
+ print object.real_title
+ print
+ else:
+ print object.href
+ print object.name
+ print "NO REAL TITLE!!!"
+ print
+
+
+ if report_stats:
+ print objects, "objects passed"
+
+
+if __name__ == '__main__':
+ run()
+++ /dev/null
-#! /usr/local/bin/python -O
-"""
- Check URL - subprocess
-
- Written by BroytMann, Mar 1999 - Feb 2000. Copyright (C) 1999-2000 PhiloSoft Design
-"""
-
-
-import sys, os, stat, string, time
-import urllib, www_util
-
-import cPickle
-pickle = cPickle
-from subproc import RecordFile
-
-from md5wrapper import md5wrapper
-
-
-ftpcache_key = None
-def myftpwrapper(user, passwd, host, port, dirs):
- global ftpcache_key
- ftpcache_key = (user, host, port, string.joinfields(dirs, '/'))
- return _ftpwrapper(user, passwd, host, port, dirs)
-
-_ftpwrapper = urllib.ftpwrapper
-urllib.ftpwrapper = myftpwrapper
-
-def get_welcome():
- global ftpcache_key
- _welcome = urllib._urlopener.ftpcache[ftpcache_key].ftp.welcome
- ftpcache_key = None # I am assuming there are no duplicate ftp URLs in db.
- # If there are - ftpcache_key in prev line is invalid.
- return _welcome
-
-
-class RedirectException(Exception):
- reloc_dict = {
- 301: "perm",
- 302: "temp"
- }
- def __init__(self, errcode, newurl):
- Exception.__init__(self, "(%s.) to %s" % (self.reloc_dict[errcode], newurl))
-
-
-class MyURLopener(urllib.URLopener):
- # Error 302 -- relocated (temporarily)
- def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
- if headers.has_key('location'):
- newurl = headers['location']
- elif headers.has_key('uri'):
- newurl = headers['uri']
- else:
- newurl = "Nowhere"
- raise RedirectException(errcode, newurl)
-
- # Error 301 -- also relocated (permanently)
- http_error_301 = http_error_302
-
- # Error 401 -- authentication required
- def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
- raise IOError, ('http error', errcode, "Authentication required ", headers)
-
-
-def get_error(msg):
- if type(msg) == type(""):
- return msg
-
- else:
- s = []
- for i in msg:
- s.append("'%s'" % string.join(string.split(str(i), "\n"), "\\n"))
- return "(%s)" % string.join(s)
-
-def check_url(record):
- try:
- now = str(int(time.time()))
- url_type, url_rest = urllib.splittype(record["URL"])
- url_host, url_path = urllib.splithost(url_rest)
- url_path, url_tag = urllib.splittag(url_path)
-
- tempfname = record["TEMPFILE"]
- del record["TEMPFILE"]
-
- fname, headers = urllib.urlretrieve("%s://%s%s" % (url_type, url_host, url_path), tempfname)
-
- last_modified = None
- record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE])
-
- if headers:
- try:
- last_modified = headers["Last-Modified"]
- except KeyError:
- last_modified = None
-
- if last_modified:
- last_modified = www_util.parse_time(last_modified)
-
- if last_modified:
- last_modified = str(int(last_modified))
- else:
- last_modified = record["LastVisit"]
-
- record["LastModified"] = last_modified
-
- md5 = md5wrapper()
- if url_type == "ftp": # Pass welcome message through MD5
- md5.update(get_welcome())
-
- md5.md5file(tempfname)
- record["MD5"] = str(md5)
-
- except IOError, msg:
- if (msg[0] == "http error") and (msg[1] == -1):
- record["NoError"] = "The server did not return any header - it is not an error, actually"
- else:
- record["Error"] = get_error(msg)
-
- except EOFError:
- record["Error"] = "Unexpected EOF (FTP server closed connection)"
-
- except RedirectException, msg:
- record["Moved"] = str(msg)
-
- # Mark this even in case of error
- record["LastTested"] = now
-
-
-def run():
- urllib._urlopener = MyURLopener()
-
- # Some sites allow only Mozilla-compatible browsers; way to stop robots?
- server_version = "Mozilla/3.0 (compatible; Python-urllib/%s)" % urllib.__version__
- urllib._urlopener.addheaders[0] = ('User-agent', server_version)
-
- rec_in = RecordFile(sys.stdin)
- rec_out = RecordFile(sys.stdout)
-
- while 1:
- record = pickle.loads(rec_in.read_record())
- check_url(record)
- rec_out.write_record(pickle.dumps(record))
-
-
-if __name__ == '__main__':
- run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Robot interface - check URLs from bookmarks database
+
+ Written by BroytMann, Mar 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
+"""
+
+
+import sys, os, urllib, tempfile
+tempfname = "check_urls" + tempfile.gettempprefix() + "tmp"
+
+
+def run():
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "ise")
+
+ show_pbar = 1
+ report_stats = 1
+ only_errors = 0
+
+ for _opt, _arg in optlist:
+ if _opt == '-i':
+ show_pbar = 0
+ if _opt == '-s':
+ report_stats = 0
+ if _opt == '-e':
+ only_errors = 1
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print "BroytMann check_urls, Copyright (C) 1997-2002 PhiloSoft Design"
+
+ if args:
+ sys.stderr.write("check_urls: too many arguments\n")
+ sys.stderr.write("Usage: check_urls [-ise]\n")
+ sys.exit(1)
+
+ if show_pbar:
+ show_pbar = sys.stderr.isatty()
+
+ if show_pbar:
+ try:
+ from m_lib.pbar.tty_pbar import ttyProgressBar
+ except ImportError:
+ show_pbar = 0
+
+ from m_lib.flog import makelog, openlog
+ if only_errors:
+ log = openlog("check.log")
+ log("chk_urls restarted for errors")
+ if report_stats:
+ print "chk_urls restarted for errors"
+ else:
+ log = makelog("check.log")
+ log("check_urls started")
+ if report_stats:
+ print " check_urls: normal start"
+
+ from storage import storage
+ storage = storage()
+
+ from robots import robot
+ robot = robot(tempfname, log)
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+ from bkmk_objects import make_linear
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
+
+ if report_stats:
+ print "Ok"
+
+ if report_stats:
+ if only_errors:
+ s = "Rechecking errors: "
+ else:
+ s = "Checking: "
+ sys.stdout.write(s)
+ sys.stdout.flush()
+
+ if show_pbar:
+ pbar = ttyProgressBar(0, objects)
+
+ urls_no = 0
+ object_count = 0
+ size = 0
+
+ checked = {}
+ rcode = 1
+
+ for object_no in range(objects):
+ if show_pbar:
+ pbar.display(object_no+1)
+
+ object = root_folder.linear[object_no]
+ object_count = object_count + 1
+
+ if object.isBookmark:
+ if only_errors:
+ if hasattr(object, "error"):
+ delattr(object, "error")
+ else:
+ continue
+
+ if checked.has_key(object.href):
+ log("Already checked %s" % object.href)
+ old_object = root_folder.linear[checked[object.href]]
+ for attr_name in ("last_visit", "last_modified",
+ "error", "no_error", "moved", "size", "md5", "real_title",
+ "last_tested", "test_time"):
+ if hasattr(old_object, attr_name):
+ setattr(object, attr_name, getattr(old_object, attr_name))
+ else:
+ url_type, url_rest = urllib.splittype(object.href)
+ log("Checking %s:%s" % (url_type, url_rest))
+ rcode = robot.check_url(object, url_type, url_rest)
+
+ if rcode:
+ checked[object.href] = object_no
+ urls_no = urls_no + 1
+ try:
+ size = size + int(object.size)
+ except (AttributeError, TypeError, ValueError):
+ pass # Some object does not have a size :(
+ else:
+ log("Interrupted by user (^C)")
+ break
+ robot.stop()
+
+ if show_pbar:
+ del pbar
+
+ if report_stats:
+ print "Ok"
+ print object_count, "objects passed"
+ print urls_no, "URLs checked"
+ print size, "bytes eaten"
+
+ storage.store(root_folder)
+
+ if rcode:
+ log("check_urls finished ok")
+ log.close()
+
+ try:
+ os.unlink(tempfname)
+ except os.error:
+ pass
+
+
+if __name__ == '__main__':
+ run()
+++ /dev/null
-#! /usr/local/bin/python -O
-"""
- For every URL in the FLAD database get info from the Net
- and store info in check.db
-
- Version 2.0
- Written by BroytMann, Aug 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
-"""
-
-
-import sys, os, stat, string, time
-from getopt import getopt
-
-import urllib, tempfile
-from copy import _copy_dict
-
-import cPickle
-pickle = cPickle
-
-import fladm, fladc, shutil
-from flog import makelog, openlog
-
-
-os.environ["PATH"] = ".:" + os.environ["PATH"]
-from subproc import Subprocess, RecordFile
-
-
-def set_checkpoint(rec_no):
- cpfile = open("check.dat", 'w')
- cpfile.write("# chk_urls checkpoint file\n")
- cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
- cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
- cpfile.write("Record: %d" % rec_no)
- cpfile.close()
-
-def get_checkpoint():
- try:
- cpfile = fladc.load_file("check.dat")
- if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
- (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
- return -3
-
- return string.atoi(cpfile["Record"])
-
- except IOError: # No such file
- return -1
-
- except KeyError: # No such key in checkpoint file
- return -2
-
- except string.atoi_error: # Wrong numeric format
- return -2
-
- return 0
-
-def start(db_name, report_stats):
- start_recno = get_checkpoint()
- if start_recno < 0:
- if start_recno == -1:
- log = makelog("check.log")
- log("chk_urls started")
- if report_stats:
- print " chk_urls: normal start"
-
- elif start_recno == -2:
- log = openlog("check.log")
- log("chk_urls started")
- log(" invalid checkpoint file, checkpoint ignored")
- if report_stats:
- print " chk_urls: invalid checkpoint file, checkpoint ignored"
-
- elif start_recno == -3:
- log = makelog("check.log")
- log("chk_urls started")
- log(" bookmarks.db changed, checkpoint ignored")
- if report_stats:
- print " chk_urls: bookmarks.db changed, checkpoint ignored"
-
- else:
- raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
-
- start_recno = 0
-
- elif start_recno == 0:
- raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
-
- else: # start_recno > 0
- if os.path.exists("check.db"):
- if not os.path.exists("check.old"):
- shutil.copy("check.db", "check.old")
- db_name = "check.db"
-
- log = openlog("check.log")
- log("chk_urls started")
- log(" found valid checkpoint file, continue")
- if report_stats:
- print " chk_urls: found valid checkpoint file, continue"
-
- else:
- log = makelog("check.log")
- log("chk_urls started")
- log(" valid checkpoint, but no check.db file, restarting")
- if report_stats:
- print " chk_urls: valid checkpoint, but no check.db file, restarting"
- start_recno = 0
-
- return start_recno, db_name, log
-
-
-tempfname = "check_urls" + tempfile.gettempprefix() + ".tmp"
-
-
-check_subp = None
-subp_pipe = None
-
-def restart_subp(log, report_stats):
- global check_subp, subp_pipe
- if check_subp:
- log(" restarting hanging subprocess")
- if report_stats:
- print " chk_urls: restarting hanging subprocess"
- del check_subp
- del subp_pipe
-
- check_subp = Subprocess("check_url_sub.py")
- subp_pipe = RecordFile(check_subp)
-
-
-def check_url(record, log, report_stats):
- try:
- record["TEMPFILE"] = tempfname
- subp_pipe.write_record(pickle.dumps(record))
-
- if check_subp.waitForPendingChar(900): # wait 15 minutes
- rec = pickle.loads(subp_pipe.read_record())
- del record["TEMPFILE"]
- for key in rec.keys():
- record[key] = rec[key]
- else:
- restart_subp(log, report_stats)
- del record["TEMPFILE"]
- record["Error"] = "Subprocess connection timed out"
-
- except KeyboardInterrupt:
- return 0
-
- return 1
-
-
-def run():
- optlist, args = getopt(sys.argv[1:], "ise")
-
- show_pbar = 1
- report_stats = 1
- only_errors = 0
- db_name = "bookmarks.db"
-
- for _opt, _arg in optlist:
- if _opt == '-i':
- show_pbar = 0
- if _opt == '-s':
- report_stats = 0
- if _opt == '-e':
- only_errors = 1
- try:
- del _opt, _arg
- except NameError:
- pass
-
- if report_stats:
- print "BroytMann check_urls, Copyright (C) 1997-1999 PhiloSoft Design"
-
- if args:
- if len(args) > 1:
- sys.stderr.write("chk_urls: too many arguments\n")
- sys.exit(1)
- else:
- db_name = args[0]
-
- if show_pbar:
- show_pbar = sys.stderr.isatty()
-
- if show_pbar:
- try:
- from tty_pbar import ttyProgressBar
- except ImportError:
- show_pbar = 0
-
- global db_stat, log
- db_stat = os.stat(db_name)
-
- if only_errors:
- start_recno = 0
- db_name = "check.db"
- log = openlog("check.log")
- log("chk_urls restarted for errors")
- else:
- start_recno, db_name, log = start(db_name, report_stats)
-
- if report_stats:
- sys.stdout.write("Loading %s: " % db_name)
- sys.stdout.flush()
-
- bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
- bookmarks_dbstore = bookmarks_db
-
- if only_errors:
- bookmarks_db = filter(lambda r: r.has_key("Error") and r["Error"][:5] <> "Moved", bookmarks_db)
-
- if report_stats:
- print "Ok"
-
- db_len = len(bookmarks_db)
- if db_len == 0:
- print "Database empty"
- sys.exit(0)
-
- if start_recno >= db_len:
- _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
- log(" " + _s)
- if report_stats:
- print " chk_urls: " + _s
- del _s
- start_recno = 0
-
- if report_stats:
- if only_errors:
- s = "Rechecking errors: "
- else:
- s = "Checking: "
- sys.stdout.write(s)
- sys.stdout.flush()
-
- if show_pbar:
- save_stats = report_stats
- report_stats = 0
- pbar = ttyProgressBar(0, db_len)
-
- urls_no = 0
- record_count = 0
- start_time = time.time()
-
- rcode = 1
- restart_subp(log, report_stats) # Not restart, just start afresh
- checked_dict = {} # Dictionary of checked URLs, mapped to records number
-
- for record_no in range(start_recno, db_len):
- if show_pbar:
- pbar.display(record_no+1)
-
- record = bookmarks_db[record_no]
- record_count = record_count + 1
-
- if only_errors:
- del record["Error"]
-
- if record.has_key("URL"):
- url = record["URL"]
- if checked_dict.has_key(url):
- log("Already checked %s" % url)
- level = record["Level"]
- comment = record["Comment"]
- bookmarks_db[record_no] = _copy_dict(bookmarks_db[checked_dict[url]])
- bookmarks_db[record_no]["Level"] = level
- bookmarks_db[record_no]["Comment"] = comment
- else:
- log("Checking %s" % url)
- rcode = check_url(record, log, report_stats)
- if rcode:
- current_time = time.time()
- if current_time - start_time >= 300: # Save checkpoint and database every 5 min
- bookmarks_dbstore.store_to_file("check.db")
- set_checkpoint(record_no)
- log.flush()
- start_time = current_time
- urls_no = urls_no + 1
- checked_dict[url] = record_no
- else:
- log("Interrupted by user (^C)")
- break
-
- if show_pbar:
- del pbar
- report_stats = save_stats
-
- if report_stats:
- print "Ok"
- print record_count, "records checked"
- print urls_no, "URLs checked"
-
- bookmarks_dbstore.store_to_file("check.db")
-
- if rcode:
- log("chk_urls finished ok")
- log.close()
-
- urllib.urlcleanup()
- if os.path.exists(tempfname):
- os.unlink(tempfname)
-
- if rcode:
- if os.path.exists("check.dat"):
- os.unlink("check.dat")
- else:
- set_checkpoint(record_no)
- sys.exit(1)
-
-
-if __name__ == '__main__':
- run()
+++ /dev/null
-#! /usr/local/bin/python -O
-"""
- For every URL in the FLAD database get info from the Net
- and store info in check.db
-
- Written by BroytMann, Aug-Oct 1997. Copyright (C) 1997 PhiloSoft Design
-"""
-
-import sys, os, string, stat, shutil, time
-from getopt import getopt
-import tempfile
-
-import urllib
-from urllib import URLopener, splittype
-
-from md5wrapper import md5wrapper
-from flog import makelog, openlog
-import fladm, fladc, www_util
-
-
-# Shortcut for basic usage
-_urlopener = None
-
-def urlopen(url):
- global _urlopener
- if not _urlopener:
- _urlopener = URLopener()
- return _urlopener.open(url)
-
-def urlretrieve(url, filename=None):
- global _urlopener
- if not _urlopener:
- _urlopener = URLopener()
- if filename:
- return _urlopener.retrieve(url, filename)
- else:
- return _urlopener.retrieve(url)
-
-def urlcleanup():
- if _urlopener:
- _urlopener.cleanup()
-
-
-_key = None
-
-def myftpwrapper(user, passwd, host, port, dirs):
- global _key
- _key = (user, host, port, string.joinfields(dirs, '/'))
- return _ftpwrapper(user, passwd, host, port, dirs)
-
-_ftpwrapper = urllib.ftpwrapper
-urllib.ftpwrapper = myftpwrapper
-
-def get_welcome():
- global _key
- _welcome = _urlopener.ftpcache[_key].ftp.welcome
- _key = None # I am assuming there are no duplicate ftp URLs in db. If there are - _key in prev line is invalid
- return _welcome
-
-
-def set_checkpoint(rec_no):
- cpfile = open("check.dat", 'w')
- cpfile.write("# chk_urls checkpoint file\n")
- cpfile.write("Size: %d\n" % db_stat[stat.ST_SIZE])
- cpfile.write("MTime: %d\n" % db_stat[stat.ST_MTIME])
- cpfile.write("Record: %d" % rec_no)
- cpfile.close()
-
-def get_checkpoint():
- try:
- cpfile = fladc.load_file("check.dat")
- if (string.atoi(cpfile["Size"]) <> db_stat[stat.ST_SIZE]) or \
- (string.atoi(cpfile["MTime"]) <> db_stat[stat.ST_MTIME]):
- return -3
-
- return string.atoi(cpfile["Record"])
-
- except IOError: # No such file
- return -1
-
- except KeyError: # No such key in checkpoint file
- return -2
-
- except string.atoi_error: # Wrong numeric format
- return -2
-
- return 0
-
-
-tempfname = tempfile.gettempprefix() + "check.tmp"
-
-
-def get_error(msg):
- if type(msg) == type(""):
- return msg
-
- else:
- s = ""
- for i in msg:
- if s <> "":
- s = s + ", "
- x = string.join(string.split(str(i), "\n"), "\\n")
- s = s + "'%s'" % x
- return "(" + s + ")"
-
-def check_url(record, url_type, url_rest):
-
- now = str(int(time.time()))
-
- try:
- fname, headers = urlretrieve(url_type + ':' + url_rest, tempfname)
-
- last_modified = None
-
- record["Size"] = str(os.stat(tempfname)[stat.ST_SIZE])
-
- if headers:
- try:
- last_modified = headers["Last-Modified"]
- except KeyError:
- last_modified = None
-
- if last_modified:
- last_modified = www_util.parse_time(last_modified)
-
- if last_modified:
- last_modified = str(int(last_modified))
- else:
- last_modified = record["LastVisit"]
-
- record["LastModified"] = last_modified
-
- md5 = md5wrapper()
- if url_type == "ftp": # Pass welcome message through MD5
- md5.update(get_welcome())
-
- md5.md5file(tempfname)
- record["MD5"] = str(md5)
-
- except IOError, msg:
- record["Error"] = get_error(msg)
-
- except EOFError:
- record["Error"] = "Unexpected EOF (FTP server closed connection)"
-
- except KeyboardInterrupt:
- return 0
-
- # Mark this even in case of error
- record["LastTested"] = now
-
- return 1
-
-
-def run():
- optlist, args = getopt(sys.argv[1:], "is")
-
- show_pbar = 1
- report_stats = 1
- db_name = "bookmarks.db"
-
- for _opt, _arg in optlist:
- if _opt == '-i':
- show_pbar = 0
- if _opt == '-s':
- report_stats = 0
- try:
- del _opt, _arg
- except NameError:
- pass
-
- if report_stats:
- print "BroytMann chk_urls, Copyright (C) 1997-1998 PhiloSoft Design"
-
- if args:
- sys.stderr.write("chk_urls: too many arguments\n")
- sys.exit(1)
-
- if show_pbar:
- show_pbar = sys.stderr.isatty()
-
- if show_pbar:
- try:
- from tty_pbar import ttyProgressBar
- except ImportError:
- show_pbar = 0
-
- global db_stat, log
- db_stat = os.stat("bookmarks.db")
-
- start_recno = get_checkpoint()
- if start_recno < 0:
- if start_recno == -1:
- log = makelog("check.log")
- log("chk_urls started")
- if report_stats:
- print " chk_urls: normal start"
-
- elif start_recno == -2:
- log = openlog("check.log")
- log("chk_urls started")
- log(" invalid checkpoint file, checkpoint ignored")
- if report_stats:
- print " chk_urls: invalid checkpoint file, checkpoint ignored"
-
- elif start_recno == -3:
- log = makelog("check.log")
- log("chk_urls started")
- log(" bookmarks.db changed, checkpoint ignored")
- if report_stats:
- print " chk_urls: bookmarks.db changed, checkpoint ignored"
-
- else:
- raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
-
- start_recno = 0
-
- elif start_recno == 0:
- raise RuntimeError, "wrong get_checkpoint() return: `%s'" % str(start_recno)
-
- else: # start_recno > 0
- if os.path.exists("check.db"):
- if not os.path.exists("check.old"):
- shutil.copy("check.db", "check.old")
- db_name = "check.db"
-
- log = openlog("check.log")
- log("chk_urls started")
- log(" found valid checkpoint file, continue")
- if report_stats:
- print " chk_urls: found valid checkpoint file, continue"
-
- else:
- log = makelog("check.log")
- log("chk_urls started")
- log(" valid checkpoint, but no check.db file, restarting")
- if report_stats:
- print " chk_urls: valid checkpoint, but no check.db file, restarting"
- start_recno = 0
-
- if report_stats:
- sys.stdout.write("Loading %s: " % db_name)
- sys.stdout.flush()
-
- bookmarks_db = fladm.load_from_file(db_name, fladm.check_record, ["Level"])
- db_len = len(bookmarks_db)
-
- if report_stats:
- print "Ok"
-
- if start_recno >= db_len:
- _s = "start_recno (%d) >= db_len (%d), restarting" % (start_recno, db_len)
- log(" " + _s)
- if report_stats:
- print " chk_urls: " + _s
- del _s
- start_recno = 0
-
- if report_stats:
- sys.stdout.write("Checking: ")
- sys.stdout.flush()
-
- if show_pbar:
- pbar = ttyProgressBar(0, db_len)
-
- urls_no = 0
- record_count = 0
- start_time = time.time()
-
- rcode = 1
- for record_no in range(start_recno, db_len):
- if show_pbar:
- pbar.display(record_no+1)
-
- record = bookmarks_db[record_no]
- record_count = record_count + 1
-
- if record.has_key("URL"):
- url_type, url_rest = splittype(record["URL"])
- log("Checking %s:%s" % (url_type, url_rest))
- rcode = check_url(record, url_type, url_rest)
- if rcode:
- current_time = time.time()
- if current_time - start_time >= 300: # Save checkpoint and database every 5 min
- bookmarks_db.store_to_file("check.db")
- set_checkpoint(record_no)
- log.flush()
- start_time = current_time
- urls_no = urls_no + 1
- else:
- log("Interrupted by user (^C)")
- break
-
- if show_pbar:
- del pbar
-
- if report_stats:
- print "Ok"
- print record_count, "records checked"
- print urls_no, "URLs checked"
-
- bookmarks_db.store_to_file("check.db")
-
- if rcode:
- log("chk_urls finished ok")
- log.close()
-
- urlcleanup()
- if os.path.exists(tempfname):
- os.unlink(tempfname)
-
- if rcode:
- if os.path.exists("check.dat"):
- os.unlink("check.dat")
- else:
- set_checkpoint(record_no)
- sys.exit(1)
-
-
-if __name__ == '__main__':
- run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Convert a bkmk database to a different storage.
+
+ Written by BroytMann, Apr 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
+"""
+
+
+import sys
+
+
+def run():
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "s")
+
+ report_stats = 1
+
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if len(args) <> 1:
+ sys.stderr.write("convert_st: too many or too few arguments\n")
+ sys.stderr.write("Usage: convert_st [-s] new_storage\n")
+ sys.exit(1)
+
+ from storage import storage, import_storage
+ storage = storage()
+
+ new_storage = import_storage(args[0])
+ new_storage = new_storage()
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+
+ if report_stats:
+ print "Ok"
+ sys.stdout.write("Converting to %s: " % new_storage.filename)
+ sys.stdout.flush()
+
+ new_storage.store(root_folder)
+
+ if report_stats:
+ print "Ok"
+
+
+if __name__ == '__main__':
+ run()
+++ /dev/null
-#! /usr/local/bin/python -O
-"""
- Test FLAD database for old records
-
- Written by BroytMann, Feb 2000. Copyright (C) 2000 PhiloSoft Design
-"""
-
-
-import fladm
-
-
-def run():
- bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
- errors = fladm.Flad_WithMustKeys(fladm.check_record, ["Level"])
-
- for record in bookmarks_db:
- if record.has_key("Error"):
- errors.append(record)
-
- errors.store_to_file("errors.db")
-
-
-if __name__ == '__main__':
- run()
#! /usr/local/bin/python -O
"""
- Convert FLAD database back to bookmarks.html suitable for Netscape Navigator
+ Convert a bkmk database back to bookmarks.html (or other format defined by writer)
- Written by BroytMann, Jun 1997 - Mar 1999. Copyright (C) 1997-1999 PhiloSoft Design
+ Written by BroytMann, Mar 2000 - Aug 2002. Copyright (C) 2000-2002 PhiloSoft Design
"""
-import sys, os, string, shutil
-from getopt import getopt
-import fladm
-
-
-def write(str):
- if private_level == 0: # Put in public all except private folder
- public_html.write(str)
- private_html.write(str)
-
-
-def unindent(old_level, new_level):
- while old_level > new_level:
- old_level = old_level - 1
- write(" "*old_level + "</DL><p>\n")
-
-
-def gen_html(bookmarks_db, show_pbar, report_stats):
- global pbar, record_no, urls_no, public_html, private_html, private_level
-
- shutil.copy("header", "public.html")
- shutil.copy("header", "private.html")
-
- public_html = open("public.html", 'a')
- private_html = open("private.html", 'a')
-
- record_no = 0
- urls_no = 0
-
- save_level = 0
- got_folder = 1 # Start as if we already have one folder
- private_level = 0
-
- for record in bookmarks_db:
- record_no = record_no + 1
-
- if show_pbar:
- pbar.display(record_no)
-
- level = string.atoi(record["Level"])
-
- if level == save_level:
- pass
- elif level == save_level + 1:
- if got_folder:
- write(" "*(level - 1) + "<DL><p>\n")
- else:
- raise ValueError, "indent without folder"
- elif level <= save_level - 1:
- unindent(save_level, level)
- else:
- raise ValueError, "new level (%d) too big; must be %d - %d" % (level, save_level-1, save_level+1)
-
- save_level = level
- got_folder = record.has_key("Folder") # Test here to save got_folder for next loop
-
- if private_level == save_level:
- private_level = 0 # We've returned to saved private level - private folder is over
-
- if record.has_key("URL"):
- write(" "*level + '<DT><A HREF="%s" ADD_DATE="%s" LAST_VISIT="%s" LAST_MODIFIED="%s">%s</A>\n' % (record["URL"], record["AddDate"], record["LastVisit"], record["LastModified"], record["Title"]))
- urls_no = urls_no + 1
-
- elif record.has_key("Folder"):
- # Dirty hacks here
- if (record["Folder"] == "Private links") and (private_level == 0):
- private_level = save_level # We found private folder - save its level
-
- if record["Folder"] == "All the rest - Unclassified":
- write(" "*level + '<DT><H3 NEWITEMHEADER ADD_DATE="%s">%s</H3>\n' % (record["AddDate"], record["Folder"]))
- else:
- write(" "*level + '<DT><H3 ADD_DATE="%s">%s</H3>\n' % (record["AddDate"], record["Folder"]))
-
- elif record.has_key("Ruler"):
- write(" "*level + "<HR>\n")
-
- else:
- raise KeyError, "neither \"URL\" nor \"Folder\" nor \"Ruler\" in record " + str(record)
-
- if record.has_key("Comment") and (record["Comment"] <> ''):
- write("<DD>%s\n" % string.join(string.split(record["Comment"], "<BR>"), "<BR>\n"))
-
-
- if save_level >= 0:
- unindent(save_level, 0)
- else:
- raise ValueError, "new level (%d) too little - must be >= 0" % save_level
-
- public_html.close()
- private_html.close()
-
- if show_pbar:
- del pbar
-
- if report_stats:
- print "Ok"
-
-
-def translate(bookmarks_db, transldb_name, transl, show_pbar, report_stats):
- global pbar, record_no, urls_no, public_html, private_html, private_level
-
- new_ext = str(transl)
- os.rename("public.html", "public." + new_ext)
- os.rename("private.html", "private." + new_ext)
-
- transl_d = {}
- transl_db = fladm.load_from_file(transldb_name, fladm.check_record, ["URL1", "URL2"], [""])
- # This prevents any other key to appear in transl.db ^
-
- # Generate translation dictionary (hash table)
- if transl == 1:
- for record in transl_db:
- transl_d[record["URL1"]] = record["URL2"]
- elif transl == 2:
- for record in transl_db:
- transl_d[record["URL2"]] = record["URL1"]
- else:
- raise ValueError, "transl (%d) must be 1 or 2" % transl
-
- del transl_db # Save few bytes of memory
- transl_k = transl_d.keys()
-
- # Translate URLs
- for record in bookmarks_db:
- if record.has_key("URL") and (record["URL"] in transl_k):
- record["URL"] = transl_d[record["URL"]]
-
- gen_html(bookmarks_db, show_pbar, report_stats)
-
- new_ext = str(3 - transl) # Translate 1 to 2, or 2 to 1
- os.rename("public.html", "public." + new_ext)
- os.rename("private.html", "private." + new_ext)
+import sys
def run():
- global pbar, record_no, urls_no, public_html, private_html, private_level
-
- optlist, args = getopt(sys.argv[1:], "ist:r")
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "sp:o:t:r")
- show_pbar = 1
report_stats = 1
+ prune = None
+
+ from writers import writer
+ output_filename = writer.filename
- transldb_name = "" # dictionary translation; default is no translation
transl = 0
+ transl_name = "" # dictionary translation; default is no translation
for _opt, _arg in optlist:
- if _opt == '-i':
- show_pbar = 0
if _opt == '-s':
report_stats = 0
+ if _opt == '-p':
+ prune = _arg
+ if _opt == '-o':
+ output_filename = _arg
if _opt == '-t':
- transldb_name = _arg
transl = 1
+ transl_name = _arg
if _opt == '-r':
transl = 2
try:
if args:
sys.stderr.write("db2bkmk: too many arguments\n")
+ sys.stderr.write("Usage: db2bkmk [-s] [-p prune_folder] [-o filename] [-t trans] [-r]\n")
sys.exit(1)
- if show_pbar:
- show_pbar = sys.stderr.isatty()
-
- if show_pbar:
- try:
- from tty_pbar import ttyProgressBar
- except ImportError:
- show_pbar = 0
+ from storage import storage
+ storage = storage()
if report_stats:
- sys.stdout.write("Loading: ")
+ sys.stdout.write("Loading %s: " % storage.filename)
sys.stdout.flush()
- bookmarks_db = fladm.load_from_file("bookmarks.db", fladm.check_record, ["Level"])
+ root_folder = storage.load()
if report_stats:
print "Ok"
- sys.stdout.write("Converting FLAD database to bookmarks.html: ")
+ sys.stdout.write("Writing %s: " % output_filename)
sys.stdout.flush()
- if show_pbar:
- pbar = ttyProgressBar(0, len(bookmarks_db))
-
- gen_html(bookmarks_db, show_pbar, report_stats)
if transl:
- if report_stats:
- sys.stdout.write("Translating: ")
- sys.stdout.flush()
+ new_ext = str(transl)
+ transl_d = {}
+
+ from m_lib.flad import fladm
+ transl_db = fladm.load_from_file(transl_name, fladm.check_record, ["URL1", "URL2"], [""])
+ # This prevents any other key to appear in transl_db ^
+
+ # Generate translation dictionary (hash table)
+ if transl == 1:
+ for record in transl_db:
+ transl_d[record["URL1"]] = record["URL2"]
+ elif transl == 2:
+ for record in transl_db:
+ transl_d[record["URL2"]] = record["URL1"]
+ else:
+ raise ValueError, "transl (%d) must be 1 or 2" % transl
- if report_stats and show_pbar: # Display bar only without "-i";
- # with "-s" skip it (one bar already
- # displayed, and it is enough)
- pbar = ttyProgressBar(0, len(bookmarks_db))
+ del transl_db # Save few bytes of memory
- else:
- show_pbar = 0
+ from bkmk_objects import Walker
+ class Transl(Walker):
+ def __init__(self, transl_d):
+ self.transl_d = transl_d
+
+ def bookmark(self, b, level):
+ href = b.href
+ transl_d = self.transl_d
- translate(bookmarks_db, transldb_name, transl, show_pbar, report_stats)
+ if transl_d.has_key(href):
+ b.href = transl_d[href]
+ root_folder.walk_depth(Transl(transl_d))
+
+
+ outfile = open(output_filename, 'w')
+ root_folder.walk_depth(writer(outfile, prune))
+ outfile.close()
if report_stats:
- print record_no, "records proceed"
- print urls_no, "urls created"
+ print "Ok"
if __name__ == '__main__':
--- /dev/null
+
+ Bookmarks Database and Internet Robot
+
+WHAT IS IT
+ There is a set of classes, libraries, programs and plugins I use to
+manipulate my bookmarks.html. I like Netscape Navigator, but I need more
+features, so I write and maintain these programs for my needs. I need to
+extend Navigator's "What's new" feature (Navigator 4 calls it "Update
+bookmarks").
+
+
+WHAT'S NEW in version 3.3.1
+ New shell scripts in the example area.
+
+
+WHAT'S NEW in version 3.3.0
+ Required Python 2.2.
+ HTML parser. If the protocol is HTTP, and there is Content-Type header, and
+content type is text/html, the object is parsed to extract its title; if the
+Content-Type header has charset, or if the HTML has <META> with charset, the
+title is converted from the given charset to the default charset. The object is
+also parsed to extract <META> tag with redirect.
+
+
+WHAT'S NEW in version 3.0
+ Complete rewrite from scratch. Created mechanism for pluggable storage
+managers, writers (DB dumpers/exporters) and robots.
+
+
+WHERE TO GET
+ Master site: http://phd.pp.ru/Software/Python/#bookmarks_db
+
+ Faster mirrors: http://phd.by.ru/Software/Python/#bookmarks_db
+ http://phd2.chat.ru/Software/Python/#bookmarks_db
+
+
+AUTHOR
+ Oleg Broytmann <phd@phd.pp.ru>
+
+COPYRIGHT
+ Copyright (C) 1997-2002 PhiloSoft Design
+
+LICENSE
+ GPL
+
+STATUS
+ Storage managers: pickle, FLAD (Flat ASCII Database).
+ Writers: HTML, text, FLAD (full database or only errors).
+ Robots (URL checker): simple, simple+timeoutscoket, forking.
+
+TODO
+ Parse downloaded file and get some additional information out of headers
+ and parsed data - title, for example. Or redirects using <META HTTP-Equiv>.
+ (Partially done - now extracting title).
+
+ Documentation.
+
+ Merge "writers" to storage managers.
+ New storage managers: shelve, SQL, ZODB, MetaKit.
+ Robots (URL checkers): threading, asyncore-based.
+ Aliases in bookmarks.html.
+
+ Configuration file for configuring defaults - global defaults for the system
+ and local defaults for subsystems.
+
+ Ruleset-based mechanisms to filter out what types of URLs to check: checking
+ based on URL schema, host, port, path, filename, extension, etc.
+
+ Detailed reports on robot run - what's old, what's new, what was moved,
+ errors, etc.
+ WWW-interface to the report.
+
+ Bigger database. Multiuser database. Robot should operate on a part of
+ the DB.
+ WWW-interface to the database. User will import/export/edit bookmarks,
+ schedule robot run, etc.
--- /dev/null
+
+ Bookmarks Database and Internet Robot
+
+ Here is a set of classes, libraries, programs and plugins I use to
+manipulate my bookmarks.html. I like Netscape Navigator, but I need more
+features, so I write and maintain these programs for my needs. I need to
+extend Navigator's "What's new" feature (Navigator 4 named it "Update
+bookmarks").
+
+ These programs are intended to run as follows.
+1. bkmk2db converts bookmarks.html to bookmarks.db.
+2. check_urls (Internet robot) runs against bookmarks.db, checks every URL and
+ saves results in check.db.
+3. db2bkmk converts bookmarks.db back to bookmarks.html.
+ Then I use this bookmarks file and...
+4. bkmk2db converts bookmarks.html to bookmarks.db.
+5. check_urls (Internet robot) runs against bookmarks.db, checks every URL and
+ saves results in check.db (old file copied to check.old).
+6. (An yet unnamed program) will compare check.old with check.db and generate
+detailed report. For example:
+ this URL is unchanged
+ this URL is changed
+ this URL is unavailable due to: host not found...
+
+AUTHOR
+ Oleg Broytmann <phd@phd.pp.ru>
+
+COPYRIGHT and LEGAL ISSUES
+ Copyright (C) 1997-2002 PhiloSoft Design
+All sources protected by GNU GPL. Programs are provided "as-is", without
+any kind of warranty. All usual blah-blah-blah.
+
+ #include <disclaimer>
+
+LICENSE
+ GPL
+
+------------------------------ environ ------------------------------
+
+ These programs use the following environment variables:
+
+BKMK_STORAGE - use this storage plugin; default is pickle storage.
+BKMK_WRITER - use this writer plugin; default is HTML writer.
+BKMK_ROBOT - use this robot plugin; default is forking robot.
+
+
+------------------------------ bkmk2db ------------------------------
+ NAME
+ bkmk2db.py - script to convert bookmarks.html to a database.
+
+ SYNOPSIS
+ bkmk2db.py [-is] [/path/to/bookmarks.html]
+
+ DESCRIPTION
+ bkmk2db.py splits given file (or ./bookmarks.html) into a database
+ (using storage plugin).
+
+ Options:
+ -i
+ Inhibit progress bar. Default is to display progress bar if
+ stderr.isatty()
+
+ -s
+ Suppress output of statistics at the end of the program. Default
+ is to write how many lines the program read and how many URLs
+ parsed. Also suppress some messages during run.
+
+ BUGS
+ Aliases are not supported (yet).
+
+
+------------------------------ db2bkmk ------------------------------
+ NAME
+ db2bkmk.py - script to reconstruct bookmarks.html back from a
+ database.
+
+ SYNOPSIS
+ db2bkmk.py [-s] [-p prune] [-o output_file] [-t dict.db [-r]]
+
+ DESCRIPTION
+ db2bkmk.py reads bookmarks.db and creates two HTML files -
+
+ Options:
+ -s
+ Suppress output of statistics at the end of the program. Default is
+ to write how many records the program proceed and how many URLs
+ created. Also suppress some messages during run.
+
+ -p prune
+ Prune bookmarks tree if encounter a folder with this name.
+
+ -o output_file
+ Put output into different file.
+
+ -t dict.db
+ For most tasks, if someone need to process bookmarks.db in a
+ regular way (for example, replace all "gopher://gopher." with
+ "http://www."), it is easy to write special program, processing
+ every DB record. But there are cases when someone need to process
+ bookmarks.db in a non-regular way: one URL must be changed
+ in one way, another URL - in second way, etc. The -t option allows to
+ use external dictionary for such translation. The dictionary itself
+ is FLAD database, where every record have two keys - URL1 and
+ URL2. With -t option in effect, db2bkmk generates translated
+ version of bookmarks.html, where every URL1 is replaced with
+ corresponding URL2 from the translation dictionary. (See koi2win.db
+ for example of translation dictionary)
+
+ -r
+ Reverse the effect of -t option - translate from URL2 to URL1.
+
+
+------------------------------ check_urls -----------------------------
+ NAME
+ check_urls.py - Internet robot
+
+ SYNOPSIS
+ check_urls.py [-ise]
+
+ DESCRIPTION
+ check_urls.py runs a robot plugin against every URL. Additional field
+ Error appeared in records that have not been checked by some reasons;
+ the reason is a content of Error field.
+
+ Options:
+ -i
+ Inhibit progress bar. Default is to display progress bar if
+ stderr.isatty()
+
+ -s
+ Suppress output of statistics at the end of the program. Default is
+ to write how many records the program proceed and how many URLs
+ checked. Also suppress some messages during run.
+
+ -e
+ Check only those URLs that has "error" mark in DB.
+
+ BUGS
+ Ugly mechanism to catch welcome message from FTP server (from urllib).
+
+
+------------------------------ convert_st -----------------------------
+ NAME
+ convert_st.py - convert between storages.
+
+ SYNOPSIS
+ conver_st.py [-s] new_format.
+
+ DESCRIPTION
+ convert_st.py converts the database from one format to another.
+
+ Options:
+ -s
+ Suppress output of statistics at the end of the program. Default is
+ to write how many records the program proceed and how many URLs
+ checked. Also suppress some messages during run.
+
+
+------------------------------ sort_db -----------------------------
+ NAME
+ sort_db.py - sort DB.
+
+ SYNOPSIS
+ sort_db.py [-savmr]
+
+ DESCRIPTION
+ sort_db.py sorts the database according to one of the time
+ fields and dump sorted list of bookmarks.
+
+ Options:
+ -s
+ Suppress output of statistics at the end of the program. Default is
+ to write how many records the program proceed and how many URLs
+ checked. Also suppress some messages during run.
+
+ -a
+ Sort by add_date.
+
+ -v
+ Sort by last_visit.
+
+ -m
+ Sort by last_modified.
+
+ -r
+ Reverse sort.
+
+
+------------------------------ check_dups -----------------------------
+ NAME
+ check_dups.py - check duplicated URLs in the DB.
+
+ SYNOPSIS
+ check_dups.py [-s] [-l logfile]
+
+ DESCRIPTION
+ check_dups.py prints out a list of duplicated URLs (if any).
+
+ Options:
+ -s
+ Suppress output of statistics at the end of the program. Default is
+ to write how many records the program proceed and how many URLs
+ checked. Also suppress some messages during run.
+
+ -l logfile
+ Save the list of dups in the logfile.
+
+
+------------------------------ bkmk-add -----------------------------
+ NAME
+ bkmk-add - add a bookmark to the DB.
+
+ SYNOPSIS
+ bkmk-add [-s] [-t title] url
+
+ DESCRIPTION
+ bkmk-add adds a bookmark to the DB.
+
+ Options:
+ -s
+ Suppress output of statistics at the end of the program. Default is
+ to write how many records the program proceed and how many URLs
+ checked. Also suppress some messages during run.
+
+ -t title
+ Force title of the bookmark.
--- /dev/null
+ Parse downloaded file and get some additional information out of headers
+ and parsed data - title, for example. Or redirects using <META HTTP-Equiv>.
+ (Partially done - now extracting title).
+
+ Documentation.
+
+ Merge "writers" to storage managers.
+ New storage managers: shelve, SQL, ZODB, MetaKit.
+ Robots (URL checkers): threading, asyncore-based.
+ Aliases in bookmarks.html.
+
+ Configuration file for configuring defaults - global defaults for the system
+ and local defaults for subsystems.
+
+ Ruleset-based mechanisms to filter out what types of URLs to check: checking
+ based on URL schema, host, port, path, filename, extension, etc.
+
+ Detailed reports on robot run - what's old, what's new, what was moved,
+ errors, etc.
+ WWW-interface to the report.
+
+ Bigger database. Multiuser database. Robot should operate on a part of
+ the DB.
+ WWW-interface to the database. User will import/export/edit bookmarks,
+ schedule robot run, etc.
--- /dev/null
+#!/usr/bin/perl
+
+# hotexplode -- a program for "exploding" a xmosaic hotlist or Netscape
+# bookmark file into a hierarchial multi-page structure.
+# acb 60 Chs 3162
+
+# revision history:
+# v1.0: 1-3-1996: initial version
+
+$date = `date`;
+
+# customise below
+
+# header: some arbitrary HTML text which is appended below the title and
+# above the hotlist data
+
+$header = <<FOO;
+<hr width="50%">
+<blockquote>
+This hotlist was generated with
+<a href="http://www.zikzak.net/~acb/hacks/hotexplode.html">hotexplode</a>
+on $date.
+<p>
+<b>WARNING:</b> The inclusion of a link to a page on
+this hotlist is not an indication of the maintainer's
+approval of or agreement with its content.
+</blockquote>
+<hr width="50%">
+<blockquote>
+Please <b>DO NOT</b> bookmark this page. Bookmark
+<a href="http://phd.pp.ru/Bookmarks/">main page</A> instead.
+Any other page in the hierarchy may disappear at any time.
+</blockquote>
+FOO
+
+$footer = <<FOO;
+<hr>
+FOO
+
+# which directory shall contain the hotlist?
+
+$outdir = "hotlist";
+
+
+# end of customisable portion
+
+require "getopts.pl";
+
+&Getopts("o:t:v");
+
+$outdir = $opt_o if $opt_o;
+
+
+# seek forward to the title
+while (<>) {
+ if (/<TITLE>([^\<\>]*)<\/TITLE>/) {
+ $title = $1;
+ last;
+ }
+}
+
+$title = $opt_t if $opt_t;
+
+# seek forward to the start of the list
+
+
+while (<>) {
+ if(/<UL>/) { warn "Detected xmosaic hotlist format\n" if $opt_v;
+ &parse_mosaic_hotlist($outdir, $title); last; }
+ if(/<DL>/) { warn "Detected Netscape bookmark format\n" if $opt_v;
+ &parse_netscape_bookmarks($outdir, $title); last; }
+}
+
+# parse an xmosaic hotlist
+# exit when we meet a </UL>
+# arguments: pathname of directory in which output is to be placed,
+# title
+
+sub parse_mosaic_hotlist {
+ # we write the file at the very end, because (I think) filehandles do
+ # not have local scope, and this is recursive
+ local($prefix, $title) = @_;
+ local($result) = "<HTML><HEAD><TITLE>$title </TITLE></HEAD>\
+<BODY>\n<CENTER><H1>$title </H1></CENTER>\n $header \n<hr>\n<ul>";
+
+ warn "Creating $prefix...\n" if $opt_v;
+
+ # create the directory, if needed
+ mkdir($prefix, 0755) unless -d $prefix;
+
+ while (<>) {
+ last if (/<\/UL>/);
+
+ if(/<LI> *<A HREF=\"([^\"]*)\"[^\>]*>([^\<]*)<\/A>/) {
+ #
+ # A URL
+ #
+ local($url,$name) = ($1, $2);
+ $result = $result."<li><a href=\"$url\">$name </a>\n";
+ next;
+ }
+ if(/<LI> (.*)$/) {
+ #
+ # we've got a live one here...
+ #
+ local($subtitle)=local($filename)=$1;
+ $filename =~ tr/0-9A-Za-z//cd;
+ $filename =~ tr/A-Z/a-z/;
+ <>; # eat the "<UL>" line.
+ $result .= "<li><b><a href=\"${filename}/index.html\">${subtitle}</a></b>\n";
+ &parse_mosaic_hotlist("${prefix}/${filename}", "${title}:${subtitle}");
+ next;
+ }
+
+ }
+
+ $result = $result . $footer . "</body></html>";
+ # write it to a file
+ open(FILE, ">${prefix}/index.html");
+ print FILE $result;
+ close(FILE);
+}
+
+# parse a Netscape bookmarks list
+# exit when we meet a </DL>
+# arguments: pathname of directory in which output is to be placed,
+# subtitle
+
+sub parse_netscape_bookmarks {
+ # we write the file at the very end, because (I think) filehandles do
+ # not have local scope, and this is recursive
+ local($prefix, $title) = @_;
+ local($result) = "<HTML><HEAD><TITLE>$title </TITLE></HEAD>\
+<BODY>\n<CENTER><H1>$title </H1></CENTER>\n $header \n<hr>\n<dl>";
+
+ warn "Creating $prefix...\n" if $opt_v;
+
+ # create the directory, if needed
+ mkdir($prefix, 0755) unless -d $prefix;
+
+ while (<>) {
+ last if (/<\/DL>/);
+ if (/<DT><H3[^\>]*>([^\<]*)<\/H3>/) {
+ #
+ # a nested list
+ #
+ local($subtitle)=$1;
+ local($filename)=$1;
+ $filename =~ tr/0-9A-Za-z//cd;
+ $filename =~ tr/A-Z/a-z/;
+ # parse the description here
+ local($desc)="";
+ while(<>) {
+ last if (/<DL>/);
+ $desc = $desc . $_;
+ }
+ $result = $result . "<dt><b><a href=\"${filename}/index.html\">${subtitle}</a></b>\n";
+ unless("$desc" eq "") { $result = $result . $desc; }
+ &parse_netscape_bookmarks("${prefix}/${filename}",
+ "${title}:${subtitle}");
+ next;
+ }
+ if (/<DT><A HREF=\"([^\"]*)\"[^\>]*>([^\<]*)<\/A>/) {
+ #
+ # A URL
+ #
+ local($url, $name) = ($1, $2);
+ $result = $result."<dt><a href=\"$url\">$name </a>\n";
+ next;
+ }
+ $result = $result . $_;
+ }
+ $result = $result . $footer . "</body></html>";
+ # write it to a file
+ open(FILE, ">${prefix}/index.html");
+ print FILE $result;
+ close(FILE);
+}
+
+
-URL1: http://www.xland.ru:8088/tel_koi/owa/tel.intro
-URL2: http://www.xland.ru:8088/tel_win/owa/tel.intro
-
-URL1: http://meteo.infospace.ru/koi/moscow/html/r_index.htm
-URL2: http://meteo.infospace.ru/win/moscow/html/r_index.htm
-
-URL1: http://meteo.infospace.ru/koi/wcond/html/r_index.ssi
-URL2: http://meteo.infospace.ru/win/wcond/html/r_index.ssi
-
-URL1: http://koi.dzik.aha.ru/
-URL2: http://www.dzik.aha.ru/
-
-URL1: http://www-psb.ad-sbras.nsc.ru/kruglk.htm
-URL2: http://www-psb.ad-sbras.nsc.ru/kruglw.htm
--- /dev/null
+#! /bin/sh
+
+dest_dir=$HOME/.mozilla/phd/`ls -1 $HOME/.mozilla/phd`
+sed -e 's/</</g' -e 's/>/>/g' -e 's/&/\&/g' \
+ $dest_dir/bookmarks.html >_tmp.$$ && \
+exec mv _tmp.$$ $dest_dir/bookmarks.html
--- /dev/null
+#! /bin/sh
+
+sed -e 's/</</g' -e 's/>/>/g' -e 's/&/\&/g' \
+ $HOME/.netscape/bookmarks.html >_tmp.$$ && \
+exec mv _tmp.$$ $HOME/.netscape/bookmarks.html
+++ /dev/null
-
- BOOKMARKS database and internet robot
-
- Here is a set of classes, libraries and programs I use to manipulate my
-bookmarks.html. I like Netscape Navigator, but I need more features, so I am
-writing these programs for my needs. I need to extend Navigator's "What's new"
-feature (Navigator 4 named it "Update bookmarks").
-
- These programs are intended to run as follows.
-1. bkmk2db converts bookmarks.html to bookmarks.db.
-2. chk_urls (Internet robot) runs against bookmarks.db, checks every URL and
- saves results in check.db.
-3. db2bkmk converts bookmarks.db back to bookmarks.html.
- Then I use this bookmarks file and...
-4. bkmk2db converts bookmarks.html to bookmarks.db.
-5. chk_urls (Internet robot) runs against bookmarks.db, checks every URL and
- saves results in check.db (old file copied to check.old).
-6. (An yet unnamed program) will compare check.old with check.db and generate
-detailed report. For example:
- this URL is unchanged
- this URL is changed
- this URL is unavailable due to: host not found...
-
- Bookmarks database programs are almost debugged. What need to be done is
-support for aliases. Second version of the internet robot is finished.
-
- Although not required, these programs work fine with tty_pbar.py (my little
-module for creating text-mode progress bars).
-
-COPYRIGHT and LEGAL ISSUES
- All programs copyrighted by Oleg Broytmann and PhiloSoft Design. All
-sources protected by GNU GPL. Programs are provided "as-is", without any kind
-of warranty. All usual blah-blah-blah.
-
- #include <disclaimer>
-
-
------------------------------- bkmk2db ------------------------------
- NAME
- bkmk2db.py - script to convert bookmarks.html to FLAD database.
-
- SYNOPSIS
- bkmk2db.py [-its] [/path/to/bookmarks.html]
-
- DESCRIPTION
- bkmk2db.py splits given file (or ./bookmarks.html) into FLAD database
- bookmarks.db in current directory.
-
- Options:
- -i
- Inhibit progress bar. Default is to display progress bar if
- stderr.isatty()
-
- -t
- Convert to text file (for debugging). Default is to convert to
- FLAD.
-
- -s
- Suppress output of statistics at the end of the program. Default
- is to write how many lines the program read and how many URLs
- parsed. Also suppress some messages during run.
-
- BUGS
- The program starts working by writing lines to header file until
- BookmarksParser initializes its own output file (this occur when
- parser encountered 1st <DL> tag). It is misdesign.
-
- Empty comments (no text after <DD>) are not marked specially in
- database, so db2bkmk.py will not reconstruct it. I don't need empty
- <DD>s, so I consider it as feature, not a real bug.
-
- Aliases are not supported (yet).
-
-
------------------------------- db2bkmk ------------------------------
- NAME
- db2bkmk.py - script to reconstruct bookmarks.html back from FLAD
- database.
-
- SYNOPSIS
- db2bkmk.py [-is] [-t dict.db [-r]]
-
- DESCRIPTION
- db2bkmk.py reads bookmarks.db and creates two HTML files -
- public.html and private.html. The latter is just full
- bookmarks.html, while the former file hides private folder.
-
- Options:
- -i
- Inhibit progress bar. Default is to display progress bar if
- stderr.isatty()
-
- -s
- Suppress output of statistics at the end of the program. Default is
- to write how many records the program proceed and how many URLs
- created. Also suppress some messages during run.
-
- -t dict.db
- For most tasks, if someone need to process bookmarks.db in a
- regular way (for example, replace all "gopher://gopher." with
- "http://www."), it is easy to write special program, processing
- every DB record. For some tasks it is even simpler and faster to
- write sed/awk scripts. But there are cases when someone need to
- process bookmarks.db in a non-regular way: one URL must be changed
- in one way, another URL - in second way, etc. The -t option allows
- to use external dictionary for such translation. The dictionary
- itself is again FLAD database, where every record have two keys -
- URL1 and URL2. With -t option in effect, db2bkmk generates
- {private,public}.html, renames them to {private,public}.1, and
- then translates the entire bookmarks.db again, generating
- {private,public}.2 (totally 4 files), where every URL1 replaced
- with URL2 from dictionary. (See koi2win.db for example of
- translation dictionary)
-
- -r
- Reverse the effect of -t option - translate from URL2 to URL1.
-
- BUGS
- There are three hacks under line marked with "Dirty hacks here":
- 1. if record["Folder"] == "Private links":
- This is to hide passwords from my bookmarks file.
-
- 2. if record["Folder"] == "All the rest - Unclassified":
- outfile.write(" "*level + "<DT><H3 NEWITEMHEADER ...")
- First, I compare folder name with fixed string. This is real string
- from my bookmarks.html. If anyone want to use the program (s)he
- should change at least the very strings "Private links" and "All the
- rest - Unclassified". Second, I use netscapism "NEWITEMHEADER".
- Yes, I wrote these programs for Navigator's bookmarks.html, but I
- still would not like to use too many netscapisms here.
-
-
------------------------------- check_db ------------------------------
- NAME
- check_db.py - script to test generated FLAD database.
-
- SYNOPSIS
- check_db.py [-s] [-l logfile.err]
-
- DESCRIPTION
- check_db.py reads bookmarks.db and tests for various conditions and
- possible errors. Current tests are for duplicated URLs and too big
- indent. "Indent without folder" or "Indent too big" may occur if
- someone edit bookmarks.db manually, inserting a record with incorrect
- (higher) level (lower levels indents are ok). Every record tested for
- correct format (that there are no spare keys, date formats are
- correct).
-
- Options:
- -l logfile.err
- Put error log into log file (errors are printed to stderr
- anyway).
-
- -s
- Suppress information messages while running (errors are printed
- anyway).
-
-
------------------------------- chk_urls -----------------------------
- NAME
- chk_urls.py - Internet robot
-
- SYNOPSIS
- chk_urls.py [-is]
-
- DESCRIPTION
- chk_urls.py runs against bookmarks.db, checking every URL and store
- results in check.db. check.db is FLAD database almost identical to
- bookmarks.db, with modified LastVisit/LastModified fields. Additional
- field Error appeared in records that have not been checked by some
- reasons; the reason is a content of Error field.
- After every 100 URLs chk_urls creates checkpoint file check.dat (in
- set_checkpoint()). The file is FLAD suitable to pass to
- fladc.load_file() (in get_checkpoint()). If interrupted by ^C, killed
- or crashed, chk_urls can be restarted, and checkpoint file helps to
- restart from interrupted state. Checkpoint stores size and mtime of
- bookmarks.db (to note if the file changed while chk_urls interrupted)
- and last checked record. If chk_urls cannot find checkpoint file, or
- bookmarks.html changed, chk_urls will restart from the beginning. If
- there is valid checkpoint and size/mtime are ok - chk_urls will start
- from interrupted record.
-
- Options:
- -i
- Inhibit progress bar. Default is to display progress bar if
- stderr.isatty()
-
- -s
- Suppress output of statistics at the end of the program. Default is
- to write how many records the program proceed and how many URLs
- checked. Also suppress some messages during run.
-
- BUGS
- Ugly mechanism to catch welcome message from FTP server (from urllib).
-
-
------------------------------- chk_urls -----------------------------
- NAME
- check_urls2.py - Internet robot
-
- SYNOPSIS
- check_urls2.py [-is]
-
- DESCRIPTION
- check_urls2 is just a second version of chk_urls.py. It forks off a child
- process and the child check URLs. The parent monitors the child and kills
- it, if there is no answer within 15 minutes.
--- /dev/null
+"""
+ Thin wrapper for module Robots. Provides "default" robot
+"""
+
+
+from os import environ
+robot_name = environ.get("BKMK_ROBOT", "forking")
+
+def import_robot(robot_name):
+ exec "from Robots import bkmk_r%s" % robot_name
+ exec "robot = bkmk_r%s.robot_%s" % (robot_name, robot_name)
+ return robot
+
+robot = import_robot(robot_name)
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Run through the bookmarks database and set name to real title
+
+ Written by BroytMann, Jul 2002 - Aug 2002. Copyright (C) 2002-2002 PhiloSoft Design
+"""
+
+
+import sys
+
+
+def run():
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "s")
+
+ report_stats = 1
+
+ for _opt, _arg in optlist:
+ if _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ if report_stats:
+ print "BroytMann check_title, Copyright (C) 2002 PhiloSoft Design"
+
+ if args:
+ sys.stderr.write("set-real_title: too many arguments\n")
+ sys.stderr.write("Usage: set-real_title [-s]\n")
+ sys.exit(1)
+
+ from storage import storage
+ storage = storage()
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+ from bkmk_objects import make_linear
+ make_linear(root_folder)
+ objects = len(root_folder.linear)
+
+ if report_stats:
+ print "Ok"
+
+
+ changed = 0
+ for object_no in range(objects):
+ object = root_folder.linear[object_no]
+
+ if object.isBookmark:
+ if not hasattr(object, "real_title"):
+ continue
+
+ real_title = object.real_title
+ if not real_title:
+ real_title = object.href
+ if object.name <> real_title:
+ object.name = real_title
+ changed += 1
+
+
+ if changed and report_stats:
+ sys.stdout.write("Saving %s: " % storage.filename)
+ sys.stdout.flush()
+
+ if not changed and report_stats:
+ sys.stdout.write("No need to save data\n")
+ sys.stdout.flush()
+
+ if changed:
+ storage.store(root_folder)
+
+ if changed and report_stats:
+ print "Ok"
+ print objects, "objects passed"
+ print changed, "objects changed"
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+#! /usr/local/bin/python -O
+"""
+ Sort bookmarks DB according to a rule:
+ -a - by AddDate
+ -v - by LastVisit
+ -m - by LastModified
+ -z - by Size
+ -t - by LastTested
+ default is -m
+ -r - reverse the sort order
+
+ Written by BroytMann, Apr 2000. Copyright (C) 2000 PhiloSoft Design
+"""
+
+
+import sys
+
+
+class SortBy:
+ def __init__(self, sort_by):
+ self.sort_by = sort_by
+
+ def __call__(self, o1, o2):
+ try:
+ attr1 = int(getattr(o1, self.sort_by))
+ except (TypeError, AttributeError):
+ return 1
+
+ try:
+ attr2 = int(getattr(o2, self.sort_by))
+ except (TypeError, AttributeError):
+ return -1
+
+ return cmp(attr1, attr2)
+
+
+def walk_linear(linear, walker):
+ for object in linear:
+ if object.isBookmark:
+ walker.bookmark(object, 0)
+
+
+def run():
+ from getopt import getopt
+ optlist, args = getopt(sys.argv[1:], "avmztrs")
+
+ sort_by = "last_modified"
+ reverse = 0
+ report_stats = 1
+
+ for _opt, _arg in optlist:
+ if _opt == '-a':
+ sort_by = "add_date"
+ elif _opt == '-v':
+ sort_by = "last_visit"
+ elif _opt == '-m':
+ sort_by = "last_modified"
+ elif _opt == '-z':
+ sort_by = "size"
+ elif _opt == '-t':
+ sort_by = "last_tested"
+ elif _opt == '-r':
+ reverse = 1
+ elif _opt == '-s':
+ report_stats = 0
+ try:
+ del _opt, _arg
+ except NameError:
+ pass
+
+ from storage import storage
+ storage = storage()
+
+ if report_stats:
+ sys.stdout.write("Loading %s: " % storage.filename)
+ sys.stdout.flush()
+
+ root_folder = storage.load()
+
+ if report_stats:
+ print "Ok"
+ sys.stdout.write("Sorting (by %s): " % sort_by)
+ sys.stdout.flush()
+
+ from bkmk_objects import make_linear
+ make_linear(root_folder)
+
+ linear = root_folder.linear
+ del linear[0] # exclude root folder from sorting
+
+ by = SortBy(sort_by)
+ linear.sort(by)
+
+ from writers import writer
+ output_filename = "%s-sorted_by-%s" % (writer.filename, sort_by)
+
+ if reverse:
+ linear.reverse()
+ output_filename = output_filename + "-reverse"
+
+ if report_stats:
+ print "done"
+ sys.stdout.write("Writing %s: " % output_filename)
+ sys.stdout.flush()
+
+ outfile = open(output_filename, 'w')
+ writer = writer(outfile)
+ writer.root_folder(root_folder)
+ walk_linear(linear, writer)
+ outfile.close()
+
+ if report_stats:
+ print "Ok"
+
+
+if __name__ == '__main__':
+ run()
--- /dev/null
+"""
+ Thin wrapper for module Storage. Provides "default" storage
+"""
+
+
+from os import environ
+storage_name = environ.get("BKMK_STORAGE", "pickle")
+
+def import_storage(storage_name):
+ exec "from Storage import bkmk_st%s" % storage_name
+ exec "storage = bkmk_st%s.storage_%s" % (storage_name, storage_name)
+ return storage
+
+storage = import_storage(storage_name)
--- /dev/null
+"""
+ Thin wrapper for module Writers. Provides "default" writer
+"""
+
+
+from os import environ
+writer_name = environ.get("BKMK_WRITER", "html")
+
+def import_writer(writer_name):
+ exec "from Writers import bkmk_w%s" % writer_name
+ exec "writer = bkmk_w%s.writer_%s" % (writer_name, writer_name)
+ return writer
+
+writer = import_writer(writer_name)