From 8d512450564d557d464af87861b0dd252ac67cea Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Thu, 8 Aug 2024 07:31:23 +0300 Subject: [PATCH] Feat(Robots): Remove urllib-based robots --- Robots/bkmk_rforking.py | 103 ----- Robots/bkmk_rforking_sub.py | 51 --- Robots/bkmk_rurllib.py | 139 ------- Robots/bkmk_rurllib2.py | 86 ----- Robots/bkmk_rurllib_py3.py | 165 -------- doc/ANNOUNCE | 31 +- doc/ChangeLog | 6 + robots.py | 4 +- setup.py | 2 +- subproc.py | 735 ------------------------------------ 10 files changed, 11 insertions(+), 1311 deletions(-) delete mode 100644 Robots/bkmk_rforking.py delete mode 100755 Robots/bkmk_rforking_sub.py delete mode 100644 Robots/bkmk_rurllib.py delete mode 100644 Robots/bkmk_rurllib2.py delete mode 100644 Robots/bkmk_rurllib_py3.py delete mode 100755 subproc.py diff --git a/Robots/bkmk_rforking.py b/Robots/bkmk_rforking.py deleted file mode 100644 index ad5b09c..0000000 --- a/Robots/bkmk_rforking.py +++ /dev/null @@ -1,103 +0,0 @@ -"""Forking robot - -This file is a part of Bookmarks database and Internet robot. -""" - -__author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2023 PhiloSoft Design" -__license__ = "GNU GPL" - -__all__ = ['robot_forking'] - - -import os -import sys - -try: - import cPickle as pickle -except ImportError: - import pickle - -from subproc import Subprocess, RecordFile -from bkmk_objects import Robot, copy_bkmk - - -# This is to catch 'close failed: [Errno 9] Bad file descriptor' message -# from os.close() in Subprocess.die() and errors from the subprocess. -sys.stderr = open("err.log", 'at') - -check_subp = None -subp_pipe = None - - -def stop_subp(log): - global check_subp, subp_pipe - if check_subp: - if log: log(" restarting hanging subprocess") - del check_subp - del subp_pipe - - -def restart_subp(log): - global check_subp, subp_pipe - stop_subp(log) - - check_subp = Subprocess("%s/Robots/bkmk_rforking_sub.py" - % os.path.dirname(sys.argv[0]), - control_stderr=True) - subp_pipe = RecordFile(check_subp) - - -_set_subproc = True - - -class robot_forking(Robot): - subproc = 'urllib' # Default subprocess - - def check_bookmark(self, bookmark): - global _set_subproc - if _set_subproc: - _set_subproc = False - - subproc = self.subproc - subproc_attrs = [] - for attr in dir(self): - if attr.startswith('subproc_'): - subproc_attrs.append( - (attr[len('subproc_'):], getattr(self, attr)) - ) - if subproc_attrs: - subproc += ':' + ':'.join( - ['='.join((k, v)) for k, v in subproc_attrs] - ) - os.environ['BKMK_ROBOT'] = subproc - - if not check_subp: - restart_subp(self.log) # Not restart, just start afresh - - try: - save_parent = bookmark.parent - bookmark.parent = None - subp_pipe.write_record(pickle.dumps(bookmark)) - - if check_subp.waitForPendingChar(60): # wait a minute - new_b = pickle.loads(subp_pipe.read_record()) - copy_bkmk(new_b, bookmark) - else: - bookmark.error = "Subprocess connection timed out" - restart_subp(self.log) - - bookmark.parent = save_parent - - while True: - error = check_subp.readPendingErrLine() - if not error: - break - sys.stderr.write("(subp) %s" % error) - sys.stderr.flush() - - except KeyboardInterrupt: - return - - def stop(self): - stop_subp(None) # Stop subprocess; do not log restarting diff --git a/Robots/bkmk_rforking_sub.py b/Robots/bkmk_rforking_sub.py deleted file mode 100755 index 76d070d..0000000 --- a/Robots/bkmk_rforking_sub.py +++ /dev/null @@ -1,51 +0,0 @@ -#! /usr/bin/env python3 -"""Subprocess for the forking robot - check URL using bkmk_rurlib robot - -This file is a part of Bookmarks database and Internet robot. - -""" - -__author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 1999-2023 PhiloSoft Design" -__license__ = "GNU GPL" - -__all__ = [] - - -import os -import sys - -try: - import cPickle - pickle = cPickle -except ImportError: - import pickle - -lib_dir = os.path.normpath(os.path.dirname(os.path.dirname(sys.argv[0]))) -sys.path.append(lib_dir) # for bkmk_objects.py - -from subproc import RecordFile # noqa: E402 import not at top of file - - -def run(): - bkmk_in = RecordFile(getattr(sys.stdin, 'buffer', None) or sys.stdin) - bkmk_out = RecordFile(getattr(sys.stdout, 'buffer', None) or sys.stdout) - - from m_lib.flog import openlog - log = openlog("check2.log") - log.outfile.reconfigure(encoding='utf-8') - from robots import robot - robot = robot(log) - - while 1: - bookmark = pickle.loads(bkmk_in.read_record()) - log(bookmark.href) - robot.check_bookmark(bookmark) - bkmk_out.write_record(pickle.dumps(bookmark)) - log.outfile.flush() - - log.close() - - -if __name__ == '__main__': - run() diff --git a/Robots/bkmk_rurllib.py b/Robots/bkmk_rurllib.py deleted file mode 100644 index 2731ad5..0000000 --- a/Robots/bkmk_rurllib.py +++ /dev/null @@ -1,139 +0,0 @@ -"""Simple, strightforward robot based on urllib - -This file is a part of Bookmarks database and Internet robot. - -""" - -__author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design" -__license__ = "GNU GPL" - -__all__ = ['robot_urllib'] - - -import urllib - -from Robots.bkmk_robot_base import robot_base, request_headers, get_error - - -class RedirectException(Exception): - def __init__(self, errcode, newurl): - Exception.__init__(self) - self.errcode = errcode - self.newurl = newurl - - -class MyURLopener(urllib.URLopener): - # Error 301 -- relocated (permanently) - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - if 'location' in headers: - newurl = headers['location'] - elif 'uri' in headers: - newurl = headers['uri'] - else: - newurl = "Nowhere" - raise RedirectException(errcode, newurl) - - # Error 302 -- relocated (temporarily) - http_error_302 = http_error_301 - # Error 303 -- relocated (see other) - http_error_303 = http_error_301 - # Error 307 -- relocated (temporarily) - http_error_307 = http_error_301 - # Error 308 -- relocated (permanently) - http_error_308 = http_error_301 - - # Error 401 -- authentication required - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - raise IOError( - ('http error', errcode, "Authentication required ", headers)) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - if fp: - fp.read() - fp.close() - raise IOError(('http error', errcode, errmsg, headers)) - - -def add_headers(opener): - try: - _user_agent = request_headers.pop('User-Agent') - except KeyError: - pass - else: - opener.addheaders[0] = ('User-Agent', _user_agent) - for h, v in request_headers.items(): - opener.addheader(h, v) - - -urllib._urlopener = opener = MyURLopener() -add_headers(opener) - -urllib_ftpwrapper = urllib.ftpwrapper -ftpcache_key = None - - -class myftpwrapper(urllib_ftpwrapper): - def __init__(self, user, passwd, host, port, dirs): - urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs) - global ftpcache_key - ftpcache_key = (user, host, port, '/'.join(dirs)) - - -urllib.ftpwrapper = myftpwrapper - - -class robot_urllib(robot_base): - def version_str(self): - return 'urllib' - - def get(self, bookmark, url, accept_charset=False, use_proxy=False): - if use_proxy: - opener.proxies = {'http': self.proxy, 'https': self.proxy} - else: - opener.proxies = {} - try: - # Set fake referer to the base URL - opener.addheaders[2] = ('Referer', url) - - if accept_charset and bookmark.charset: - opener.addheader('Accept-Charset', bookmark.charset) - try: - fname, headers = urllib.urlretrieve(url) - finally: - if accept_charset and bookmark.charset: - # Remove Accept-Charset - del opener.addheaders[-1] - - infile = open(fname, 'rt') - content = infile.read() - infile.close() - - return None, None, None, headers, content - - except RedirectException as e: - return None, e.errcode, e.newurl, None, None - - except IOError as e: - if (e[0] == "http error") and (e[1] == -1): - error = None - bookmark.no_error = "The server did not return any header - " - "it is not an error, actually" - self.log(' no headers: %s' % bookmark.no_error) - else: - error = get_error(e) - self.log(' Error: %s' % error) - - return error, None, None, None, None - - def get_ftp_welcome(self): - global ftpcache_key - _welcome = opener.ftpcache[ftpcache_key].ftp.welcome - # I am assuming there are no duplicate ftp URLs in db. - # If there are - ftpcache_key in next line is invalid. - ftpcache_key = None - return _welcome - - def finish_check_url(self, bookmark): - robot_base.finish_check_url(self, bookmark) - urllib.urlcleanup() diff --git a/Robots/bkmk_rurllib2.py b/Robots/bkmk_rurllib2.py deleted file mode 100644 index 13d2c3b..0000000 --- a/Robots/bkmk_rurllib2.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Robot based on urllib2 - -This file is a part of Bookmarks database and Internet robot. - -""" - -__author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2014-2024 PhiloSoft Design" -__license__ = "GNU GPL" - -__all__ = ['robot_urllib2'] - - -import httplib -import urllib2 -from Robots.bkmk_robot_base import robot_base, request_headers, get_error - - -_fw = None - - -class FTPHandler(urllib2.FTPHandler): - def connect_ftp(self, user, passwd, host, port, dirs, timeout): - global _fw - _fw = urllib2.FTPHandler.connect_ftp(self, user, passwd, host, port, - dirs, timeout) - return _fw - - -opener = urllib2.OpenerDirector() -default_classes = [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler, - FTPHandler, urllib2.HTTPErrorProcessor] -if hasattr(httplib, 'HTTPS'): - default_classes.insert(0, urllib2.HTTPSHandler) -for klass in default_classes: - opener.add_handler(klass()) - -urllib2.install_opener(opener) - - -class robot_urllib2(robot_base): - def version_str(self): - return 'urllib2' - - def get(self, bookmark, url, accept_charset=False, use_proxy=False): - request = urllib2.Request(url) - for h, v in request_headers.items(): - request.add_header(h, v) - if accept_charset and bookmark.charset: - request.add_header('Accept-Charset', bookmark.charset) - - global _fw - _fw = None - - try: - response = urllib2.urlopen(request) - - except urllib2.HTTPError as e: - if e.code in (301, 302, 303, 307, 308): - return None, e.code, e.hdrs['Location'], None, None - else: - self.log(' HTTP Error %s: %s' % (e.code, e.msg)) - return ("HTTP Error %s: %s" % (e.code, e.msg), - e.code, None, None, None) - - except urllib2.URLError as e: - self.log(' URL Error: %s' % e.reason) - return "URL Error: %s" % e.reason, None, None, None, None - - except httplib.HTTPException as e: - error = get_error(e) - self.log(' HTTP Exception: %s' % error) - return "HTTP Exception: %s" % error, None, None, None, None - - except IOError as e: - error = get_error(e) - self.log(' I/O Error: %s' % error) - return "I/O Error: %s" % error, None, None, None, None - - else: - return None, None, None, response.info(), response.read() - - def get_ftp_welcome(self): - if _fw is None: - return '' - return _fw.ftp.welcome diff --git a/Robots/bkmk_rurllib_py3.py b/Robots/bkmk_rurllib_py3.py deleted file mode 100644 index 7947046..0000000 --- a/Robots/bkmk_rurllib_py3.py +++ /dev/null @@ -1,165 +0,0 @@ -"""Simple, strightforward robot based on urllib - -This file is a part of Bookmarks database and Internet robot. - -""" - -__author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2000-2024 PhiloSoft Design" -__license__ = "GNU GPL" - -__all__ = ['robot_urllib_py3'] - - -import http.client -import socket -import sys -import urllib -import urllib.request - -from Robots.bkmk_robot_base import robot_base, get_error - -# Fake to import 'add_headers' -urllib.URLopener = urllib.request.URLopener -urllib.ftpwrapper = urllib.request.ftpwrapper -from Robots.bkmk_rurllib import add_headers # noqa: E402 import not at top - - -class RedirectException(Exception): - def __init__(self, errcode, newurl): - Exception.__init__(self) - self.errcode = errcode - self.newurl = newurl - - -class MyURLopener(urllib.request.URLopener): - # Error 301 -- relocated (permanently) - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - if 'location' in headers: - newurl = headers['location'] - elif 'uri' in headers: - newurl = headers['uri'] - else: - newurl = "Nowhere" - raise RedirectException(errcode, newurl) - - # Error 302 -- relocated (temporarily) - http_error_302 = http_error_301 - # Error 303 -- relocated (see other) - http_error_303 = http_error_301 - # Error 307 -- relocated (temporarily) - http_error_307 = http_error_301 - # Error 308 -- relocated (permanently) - http_error_308 = http_error_301 - - # Error 401 -- authentication required - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - raise IOError( - ('http error', errcode, "Authentication required ", headers)) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - if fp: - fp.read() - fp.close() - raise IOError(('http error', errcode, errmsg, headers)) - - def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - return urllib.request.URLopener.open(self, fullurl, data) - - -urllib.request._opener = opener = MyURLopener() -add_headers(opener) - -urllib_ftpwrapper = urllib.request.ftpwrapper -ftpcache_key = None - - -class myftpwrapper(urllib_ftpwrapper): - def __init__(self, user, passwd, host, port, dirs): - urllib_ftpwrapper.__init__(self, user, passwd, host, port, dirs) - global ftpcache_key - ftpcache_key = (user, host, port, '/'.join(dirs)) - - -urllib.request.ftpwrapper = myftpwrapper - - -class robot_urllib_py3(robot_base): - def version_str(self): - return 'urllib' - - def get(self, bookmark, url, accept_charset=False, use_proxy=False): - if use_proxy: - opener.proxies = {'http': self.proxy, 'https': self.proxy} - else: - opener.proxies = {} - try: - # Set fake referer to the base URL - opener.addheaders[2] = ('Referer', url) - - if accept_charset and bookmark.charset: - opener.addheader('Accept-Charset', bookmark.charset) - try: - fname, headers = urllib.request.urlretrieve(url) - finally: - if accept_charset and bookmark.charset: - # Remove Accept-Charset - del opener.addheaders[-1] - - possible_encodings = [] - for encoding in ( - bookmark.charset, - sys.getfilesystemencoding(), - 'utf-8', - ): - if encoding and encoding not in possible_encodings: - possible_encodings.append(encoding) - content = e = None - infile = open(fname, 'rb') - try: - content = infile.read() - except Exception: - content = None - finally: - infile.close() - - if content is None: - e = str(e) - return ( - 'ERROR: ' + e, - None, None, None, None - ) - return None, None, None, headers, content - - except RedirectException as e: - return None, e.errcode, e.newurl, None, None - - except (OSError, http.client.IncompleteRead) as e: - error = str(e) - self.log(' Error: %s' % error) - return error, None, None, None, None - - except IOError as e: - if (e[0] == "http error") and (e[1] == -1): - error = None - bookmark.no_error = "The server did not return any header - " - "it is not an error, actually" - self.log(' no headers: %s' % bookmark.no_error) - else: - error = get_error(e) - self.log(' Error: %s' % error) - - return error, None, None, None, None - - def get_ftp_welcome(self): - global ftpcache_key - _welcome = opener.ftpcache[ftpcache_key].ftp.welcome - # I am assuming there are no duplicate ftp URLs in db. - # If there are - ftpcache_key in next line is invalid. - ftpcache_key = None - return _welcome - - def finish_check_url(self, bookmark): - robot_base.finish_check_url(self, bookmark) - urllib.request.urlcleanup() - urllib.request._opener = opener diff --git a/doc/ANNOUNCE b/doc/ANNOUNCE index fe8cf61..5948b9c 100644 --- a/doc/ANNOUNCE +++ b/doc/ANNOUNCE @@ -7,36 +7,11 @@ bookmarks.html. WHAT'S NEW -Version 5.5.1 (2024-08-??) +Version 5.6.0 (2024-??-??) - Use aioftp in aiohttp robot. + Removed urllib-based robots. - Do not route ftp requests via http(s) proxy; socks5 proxies are ok. - - Install socks dependency for python-requests. - - Default list of robots is now curl,requests,aiohttp,forking; - aiohttp doesn't properly work with SOCKS5 proxies. - -Version 5.5.0 (2024-08-06) - - Robot based on aiohttp. - - Default list of robots is now aiohttp,curl,requests,forking. - -Version 5.4.1 (2024-08-04) - - Fix(bkmk_rcurl): IDNA-encode URLs. PycURL doesn't encode URLs itself - and requires URLs to be in ASCII encoding. - - Separate connection timeout for PycURL robot. - Also will be used for aiohttp. - -Version 5.4.0 (2024-08-02) - - Robot based on PycURL. - - Try robots from a list. Default list is curl,requests,forking. + Default list of robots is now curl,requests,aiohttp. WHERE TO GET diff --git a/doc/ChangeLog b/doc/ChangeLog index 47013f5..e4a5013 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,9 @@ +Version 5.6.0 (2024-??-??) + + Removed urllib-based robots. + + Default list of robots is now curl,requests,aiohttp. + Version 5.5.1 (2024-08-??) Use aioftp in aiohttp robot. diff --git a/robots.py b/robots.py index d509a2f..3a9491d 100644 --- a/robots.py +++ b/robots.py @@ -16,12 +16,10 @@ from os import environ from bkmk_objects import parse_params, set_params robot_names, robot_params = parse_params( - environ.get("BKMK_ROBOT", "curl,requests,aiohttp,forking")) + environ.get("BKMK_ROBOT", "curl,requests,aiohttp")) def import_robot(robot_name): - if (robot_name == 'urllib') and (sys.version_info[0] >= 3): - robot_name = 'urllib_py3' ns = locals() exec("from Robots import bkmk_r%s" % robot_name, globals(), ns) exec("robot = bkmk_r%s.robot_%s" % (robot_name, robot_name), globals(), ns) diff --git a/setup.py b/setup.py index d604383..638e946 100755 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import setup setup( name='bookmarks_db', - version='5.5.0', + version='5.6.0', description='Bookmarks database and Internet robot', long_description=open('README', 'r').read(), long_description_content_type="text/plain", diff --git a/subproc.py b/subproc.py deleted file mode 100755 index 37cb253..0000000 --- a/subproc.py +++ /dev/null @@ -1,735 +0,0 @@ -#! /usr/bin/env python3 - -"""Run a subprocess and communicate with it via stdin, stdout, and stderr. - -Requires that platform supports, eg, posix-style 'os.pipe' and 'os.fork' -routines. - -Subprocess class features: - - - provides non-blocking stdin and stderr reads - - - provides subprocess stop and continue, kill-on-deletion - - - provides detection of subprocess startup failure - - - Subprocess objects have nice, informative string rep (as every good object - ought).""" - -__version__ = "Revision: 2.0 " - -# Id: subproc.py,v 1.15 1998/12/14 20:53:16 klm Exp -# Originally by ken manheimer, ken.manheimer@nist.gov, jan 1995. - -# Prior art: Initially based python code examples demonstrating usage of pipes -# and subprocesses, primarily one by jose pereira. - -# Implementation notes: -# - I'm not using the fcntl module to implement non-blocking file descriptors, -# because i don't know what all in it is portable and what is not. I'm not -# about to provide for different platform contingencies - at that extent, the -# effort would be better spent hacking 'expect' into python. -# - Todo? - Incorporate an error-output handler approach, where error output is -# checked on regular IO, when a handler is defined, and passed to the -# handler (eg for printing) immediately as it shows... -# - Detection of failed subprocess startup is a gross kludge, at present. - -# - new additions (1.3, 1.4): -# - Readbuf, taken from donn cave's iobuf addition, implements non-blocking -# reads based solely on os.read with select, while capitalizing big-time on -# multi-char read chunking. -# - Subproc deletion frees up pipe file descriptors, so they're not exhausted. -# -# ken.manheimer@nist.gov - -# This is a modified version by Oleg Broytman . -# The original version is still preserved at -# https://www.python.org/ftp/python/contrib-09-Dec-1999/System/subproc.tar.gz - -import os -import select -import signal -import sys -import time - - -class SubprocessError(Exception): - pass - - -# You may need to increase execvp_grace_seconds, if you have a large or slow -# path to search: -execvp_grace_seconds = 0.5 - - -class Subprocess: - """Run and communicate asynchronously with a subprocess. - - Provides non-blocking reads in the form of .readPendingChars and - .readPendingLine. - - .readline will block until it gets a complete line. - - .peekPendingChar does a non-blocking, non-consuming read for pending - output, and can be used before .readline to check non-destructively for - pending output. .waitForPendingChar(timeout, pollPause=.1) blocks until - a new character is pending, or timeout secs pass, with granularity of - pollPause seconds. - - There are corresponding read and peekPendingErrXXX routines, to read from - the subprocess stderr stream.""" - - pid = 0 - cmd = b'' - expire_noisily = 1 # Announce subproc destruction? - pipefiles = [] - readbuf = 0 # fork will assign to be a readbuf obj - errbuf = 0 # fork will assign to be a readbuf obj - - def __init__(self, cmd, control_stderr=0, expire_noisily=0, - in_fd=0, out_fd=1, err_fd=2): - """Launch a subprocess, given command string COMMAND.""" - self.cmd = cmd - self.pid = 0 - self.expire_noisily = expire_noisily - self.control_stderr = control_stderr - self.in_fd, self.out_fd, self.err_fd = in_fd, out_fd, err_fd - self.fork() - - def fork(self, cmd=None): - """Fork a subprocess with designated COMMAND (default, self.cmd).""" - if cmd: - self.cmd = cmd - cmd = self.cmd.split() - pRc, cWp = os.pipe() # parent-read-child, child-write-parent - cRp, pWc = os.pipe() # child-read-parent, parent-write-child - pRe, cWe = os.pipe() # parent-read-error, child-write-error - self.pipefiles = [pRc, cWp, cRp, pWc, pRe, cWe] - - self.pid = os.fork() - - if self.pid == 0: #### CHILD #### noqa: E262 - # Preserve handle on *parent* stderr - parentErr = os.dup(self.in_fd) - # Reopen stdin, out, err, on pipe ends: - os.dup2(cRp, self.in_fd) # cRp = sys.stdin - os.dup2(cWp, self.out_fd) # cWp = sys.stdout - if self.control_stderr: - os.dup2(cWe, self.err_fd) # cWe = sys.stderr - # Ensure (within reason) stray file descriptors are closed: - excludes = [self.in_fd, self.out_fd, self.err_fd] - for i in range(4, 100): - if i not in excludes: - try: - os.close(i) - except os.error: - pass - - try: - os.execvp(cmd[0], cmd) - os._exit(1) # Shouldn't get here - - except os.error as e: - if self.control_stderr: - os.dup2(parentErr, 2) # Reconnect to parent's stdout - sys.stderr.write("**execvp failed, '%s'**\n" % - str(e)) - os._exit(1) - os._exit(1) # Shouldn't get here. - - else: ### PARENT ### noqa: E262 - # Connect to the child's file descriptors, using our customized - # fdopen: - self.toChild = os.fdopen(pWc, 'wb') - self.toChild_fdlist = [pWc] - self.readbuf = ReadBuf(pRc) - self.errbuf = ReadBuf(pRe) - time.sleep(execvp_grace_seconds) - try: - pid, err = os.waitpid(self.pid, os.WNOHANG) - except os.error as error: - errno, msg = error - if errno == 10: - self.pid = None - raise SubprocessError("Subprocess '%s' failed." % self.cmd) - self.pid = None - raise SubprocessError( - "Subprocess failed[%d]: %s" % (errno, msg)) - if pid == self.pid: - # child exited already - self.pid = None - sig = err & 0xff - rc = (err & 0xff00) >> 8 - if sig: - raise SubprocessError( - "child killed by signal %d with a return code of %d" - % (sig, rc)) - if rc: - raise SubprocessError( - "child exited with return code %d" % rc) - # Child may have exited, but not in error, so we won't say - # anything more at this point. - - ### Write input to subprocess ### noqa: E266 - - def write(self, str): - """Write a STRING to the subprocess.""" - - if not self.pid: - raise SubprocessError("no child") # ===> - if select.select([], self.toChild_fdlist, [], 0)[1]: - self.toChild.write(str) - self.toChild.flush() - else: - # XXX Can write-buffer full be handled better?? - raise IOError("write to %s blocked" % self) # ===> - - def writeline(self, line=b''): - """Write STRING, with added newline termination, to subprocess.""" - self.write(line + b'\n') - - ### Get output from subprocess ### noqa: E266 - - def peekPendingChar(self): - """Return, but (effectively) do not consume a single pending output - char, or return null string if none pending.""" - - return self.readbuf.peekPendingChar() # ===> - - def peekPendingErrChar(self): - """Return, but (effectively) do not consume a single pending output - char, or return null string if none pending.""" - - return self.errbuf.peekPendingChar() # ===> - - def waitForPendingChar(self, timeout, pollPause=.1): - """Block max TIMEOUT secs until we peek a pending char, returning the - char, or '' if none encountered. - - Pause POLLPAUSE secs (default .1) between polls.""" - - accume = 0 - while 1: - nextChar = self.readbuf.peekPendingChar() - if nextChar or (accume > timeout): - return nextChar - time.sleep(pollPause) - accume = accume + pollPause - - def read(self, n=None): - """Read N chars, or all pending if no N specified.""" - if n is None: - return self.readPendingChars() - got = b'' - while n: - got0 = self.readPendingChars(n) - got = got + got0 - n = n - len(got0) - return got - - def readPendingChars(self, max=None): - """Read all currently pending subprocess output as a single string.""" - return self.readbuf.readPendingChars(max) - - def readPendingErrChars(self): - """Read all currently pending subprocess error output as a single - string.""" - if self.control_stderr: - return self.errbuf.readPendingChars() - else: - raise SubprocessError("Haven't grabbed subprocess error stream.") - - def readPendingLine(self): - """Read currently pending subprocess output, up to a complete line - (newline inclusive).""" - return self.readbuf.readPendingLine() - - def readPendingErrLine(self): - """Read currently pending subprocess error output, up to a complete - line (newline inclusive).""" - if self.control_stderr: - return self.errbuf.readPendingLine() - else: - raise SubprocessError("Haven't grabbed subprocess error stream.") - - def readline(self): - """Return next complete line of subprocess output, blocking until - then.""" - return self.readbuf.readline() - - def readlineErr(self): - """Return next complete line of subprocess error output, blocking until - then.""" - if self.control_stderr: - return self.errbuf.readline() - else: - raise SubprocessError("Haven't grabbed subprocess error stream.") - - ### Subprocess Control ### noqa: E266 - - def active(self): - """True if subprocess is alive and kicking.""" - return self.status(boolean=1) - - def status(self, boolean=0): - """Return string indicating whether process is alive or dead.""" - active = 0 - if not self.cmd: - status = 'sans command' - elif not self.pid: - status = 'sans process' - elif not self.cont(): - status = "(unresponding) '%s'" % self.cmd - else: - status = "'%s'" % self.cmd - active = 1 - if boolean: - return active - else: - return status - - def stop(self, verbose=1): - """Signal subprocess with STOP (17), returning 'stopped' if ok, or 0 - otherwise.""" - try: - os.kill(self.pid, signal.SIGSTOP) - except os.error: - if verbose: - print( - "Stop failed for '%s' - '%s'" % (self.cmd, sys.exc_value)) - return 0 - if verbose: - print("Stopped '%s'" % self.cmd) - return 'stopped' - - def cont(self, verbose=0): - """Signal subprocess with CONT (19), returning 'continued' if ok, or 0 - otherwise.""" - try: - os.kill(self.pid, signal.SIGCONT) - except os.error: - if verbose: - print(("Continue failed for '%s' - '%s'" % - (self.cmd, sys.exc_value))) - return 0 - if verbose: - print("Continued '%s'" % self.cmd) - return 'continued' - - def die(self): - """Send process PID signal SIG (default 9, 'kill'), returning None once - it is successfully reaped. - - SubprocessError is raised if process is not successfully killed.""" - - if not self.pid: - raise SubprocessError("No process") # ===> - elif not self.cont(): - raise SubprocessError("Can't signal subproc %s" % self) # ===> - - # Try sending first a TERM and then a KILL signal. - sigs = [('TERMinated', signal.SIGTERM), ('KILLed', signal.SIGKILL)] - for sig in sigs: - try: - os.kill(self.pid, sig[1]) - except OSError: - # keep trying - pass - # Try a couple or three times to reap the process with waitpid: - for i in range(5): - # WNOHANG == 1 on sunos, presumably same elsewhere. - if os.waitpid(self.pid, os.WNOHANG): - if self.expire_noisily: - print(("\n(%s subproc %d '%s' / %s)" % - (sig[0], self.pid, self.cmd, - hex(id(self))[2:]))) - for i in self.pipefiles: - try: - os.fdopen(i).close() - except OSError: - pass - del self.pipefiles[:] - self.pid = 0 - return None # ===> - time.sleep(.1) - # Only got here if subprocess is not gone: - raise SubprocessError( - "Failed kill of subproc %d, '%s', with signals %s" % - (self.pid, self.cmd, map(lambda x: x[0], sigs))) - - def __del__(self): - """Terminate the subprocess""" - if self.pid: - self.die() - - def __repr__(self): - status = self.status() - return '' - - -############################################################################## -##### Non-blocking read operations ##### noqa: E266 -############################################################################## - - -class ReadBuf: - """Output buffer for non-blocking reads on selectable files like pipes and - sockets. Init with a file descriptor for the file.""" - - def __init__(self, fd, maxChunkSize=1024): - """Encapsulate file descriptor FD, with optional MAX_READ_CHUNK_SIZE - (default 1024).""" - - if fd < 0: - raise ValueError - self.fd = fd - self.eof = 0 # May be set with stuff still in .buf - self.buf = b'' - self.chunkSize = maxChunkSize # Biggest read chunk, default 1024. - - def fileno(self): - return self.fd - - def peekPendingChar(self): - """Return, but don't consume, first character of unconsumed output from - file, or empty string if none.""" - - if self.buf: - return self.buf[0] # ===> - - if self.eof: - return b'' # ===> - - sel = select.select([self.fd], [], [self.fd], 0) - if sel[2]: - self.eof = 1 - if sel[0]: - self.buf = os.read(self.fd, self.chunkSize) # ===> - return self.buf[0] # Assume select don't lie. - else: - return b'' # ===> - - def readPendingChar(self): - """Consume first character of unconsumed output from file, or empty - string if none.""" - - if self.buf: - got, self.buf = self.buf[0], self.buf[1:] - return got # ===> - - if self.eof: - return b'' # ===> - - sel = select.select([self.fd], [], [self.fd], 0) - if sel[2]: - self.eof = 1 - if sel[0]: - return os.read(self.fd, 1) # ===> - else: - return b'' # ===> - - def readPendingChars(self, max=None): - """Consume uncomsumed output from FILE, or empty string if nothing - pending.""" - - got = b"" - if self.buf: - if max and (len(self.buf) > max): - got = self.buf[0:max] - self.buf = self.buf[max:] - else: - got, self.buf = self.buf, b'' - return got - - if self.eof: - return b'' - - sel = select.select([self.fd], [], [self.fd], 0) - if sel[2]: - self.eof = 1 - if sel[0]: - got = got + os.read(self.fd, self.chunkSize) - if max == 0: - self.buf = got - return b'' - elif max is None: - return got - elif len(got) > max: - self.buf = self.buf + got[max:] - return got[:max] - else: - return got - else: - return b'' - - def readPendingLine(self, block=0): - """Return pending output from FILE, up to first newline (inclusive). - - Does not block unless optional arg BLOCK is true. - - Note that an error will be raised if a new eof is encountered without - any newline.""" - - if self.buf: - to = self.buf.find(b'\n') - if to != -1: - got, self.buf = self.buf[:to+1], self.buf[to+1:] - return got # ===> - got, self.buf = self.buf, b'' - else: - if self.eof: - return b'' # ===> - got = b'' - - # Herein, 'got' contains the (former) contents of the buffer, and it - # doesn't include a newline. - fdlist = [self.fd] - period = block and 1.0 or 0 # don't be too busy while waiting - while 1: # (we'll only loop if block set) - sel = select.select(fdlist, [], fdlist, period) - if sel[2]: - self.eof = 1 - if sel[0]: - got = got + os.read(self.fd, self.chunkSize) - - to = got.find(b'\n') - if to != -1: - got, self.buf = got[:to+1], got[to+1:] - return got # ===> - if not block: - return got # ===> - if self.eof: - self.buf = b'' # this is how an ordinary file acts... - return got - # otherwise - no newline, blocking requested, no eof - loop. # ==^ - - def readline(self): - """Return next output line from file, blocking until it is received.""" - - return self.readPendingLine(1) # ===> - - -############################################################################# -##### Encapsulated reading and writing ##### noqa: E266 -############################################################################# -# Encapsulate messages so the end can be unambiguously identified, even -# when they contain multiple, possibly empty lines. - - -class RecordFile: - """Encapsulate stream object for record-oriented IO. - - Particularly useful when dealing with non-line oriented communications - over pipes, eg with subprocesses.""" - - # Message is written preceded by a line containing the message length. - - def __init__(self, f): - self.file = f - - def write_record(self, s): - "Write so self.read knows exactly how much to read." - f = self.__dict__['file'] - f.write(b"%d\n%s" % (len(s), s)) - if hasattr(f, 'flush'): - f.flush() - - def read_record(self): - "Read and reconstruct message as prepared by self.write." - f = self.__dict__['file'] - line = f.readline()[:-1] - if line: - try: - _l = int(line) - except ValueError: - raise IOError(("corrupt %s file structure" - % self.__class__.__name__)) - return f.read(_l) - else: - # EOF. - return b'' - - def __getattr__(self, attr): - """Implement characteristic file object attributes.""" - f = self.__dict__['file'] - if hasattr(f, attr): - return getattr(f, attr) - else: - raise AttributeError(attr) - - def __repr__(self): - return "<%s of %s at %s>" % (self.__class__.__name__, - self.__dict__['file'], - hex(id(self))[2:]) - - -def record_trial(s): - """Exercise encapsulated write/read with an arbitrary string. - - Raise IOError if the string gets distorted through transmission!""" - from StringIO import StringIO - sf = StringIO() - c = RecordFile(sf) - c.write(s) - c.seek(0) - r = c.read() - show = " start:\t %s\n end:\t %s\n" % (repr(s), repr(r)) - if r != s: - raise IOError("String distorted:\n%s" % show) - - -############################################################################# -##### An example subprocess interfaces ##### noqa: E266 -############################################################################# - - -class Ph: - """Convenient interface to CCSO 'ph' nameserver subprocess. - - .query('string...') method takes a query and returns a list of dicts, each - of which represents one entry.""" - - # Note that i made this a class that handles a subprocess object, rather - # than one that inherits from it. I didn't see any functional - # disadvantages, and didn't think that full support of the entire - # Subprocess functionality was in any way suitable for interaction with - # this specialized interface. ? klm 13-Jan-1995 - - def __init__(self): - try: - self.proc = Subprocess('ph', 1) - except: - raise SubprocessError('failure starting ph: %s' % # ===> - str(sys.exc_value)) - - def query(self, q): - """Send a query and return a list of dicts for responses. - - Raise a ValueError if ph responds with an error.""" - - self.clear() - - self.proc.writeline(b'query ' + q) - got = [] - it = {} - while 1: - response = self.getreply() # Should get null on new prompt. - errs = self.proc.readPendingErrChars() - if errs: - sys.stderr.write(errs) - if it: - got.append(it) - it = {} - if not response: - return got # ===> - elif isinstance(response, str): - raise ValueError("ph failed match: '%s'" % response) # ===> - for line in response: - # convert to a dict: - line = line.split(b':') - it[line.strip([0])] = (b''.join(line[1:])).strip() - - def getreply(self): - """Consume next response from ph, returning list of lines or string - err.""" - # Key on first char: (First line may lack newline.) - # - dash discard line - # - 'ph> ' conclusion of response - # - number error message - # - whitespace beginning of next response - - nextChar = self.proc.waitForPendingChar(60) - if not nextChar: - raise SubprocessError('ph subprocess not responding') # ===> - elif nextChar == b'-': - # dashed line - discard it, and continue reading: - self.proc.readline() - return self.getreply() # ===> - elif nextChar == b'p': - # 'ph> ' prompt - don't think we should hit this, but what the hay: - return '' # ===> - elif nextChar in b'0123456789': - # Error notice - we're currently assuming single line errors: - return self.proc.readline()[:-1] # ===> - elif nextChar in b' \t': - # Get content, up to next dashed line: - got = [] - while nextChar != b'-' and nextChar != b'': - got.append(self.proc.readline()[:-1]) - nextChar = self.proc.peekPendingChar() - return got - - def __repr__(self): - return "\n" % (self.proc.status(), - hex(id(self))[2:]) - - def clear(self): - """Clear-out initial preface or residual subproc input and output.""" - pause = .5 - maxIter = 10 # 5 seconds to clear - iterations = 0 - got = b'' - self.proc.write(b'') - while iterations < maxIter: - got = got + self.proc.readPendingChars() - # Strip out all but the last incomplete line: - got = got.split(b'\n')[-1] - if got == b'ph> ': - return # Ok. ===> - time.sleep(pause) - raise SubprocessError('ph not responding within %s secs' % - pause * maxIter) - - -############################################################################## -##### Test ##### noqa: E266 -############################################################################## - - -def test(p=0): - print("\tOpening bogus subprocess, should fail:") - try: - Subprocess('/', 1) - print("\tOops! Null-named subprocess startup *succeeded*?!?") - except SubprocessError: - print("\t...yep, it failed.") - print("\tOpening cat subprocess:") - p = Subprocess('cat', 1) # set to expire noisily... - print(p) - print('\tWrite, then read, two newline-teriminated lines, using readline:') - p.write(b'first full line written\n') - p.write(b'second.\n') - print(repr(p.readline())) - print(repr(p.readline())) - print('\tThree lines, last sans newline, read using combination:') - p.write(b'first\n') - p.write(b'second\n') - p.write(b'third, (no cr)') - print('\tFirst line via readline:') - print(repr(p.readline())) - print('\tRest via readPendingChars:') - print(p.readPendingChars()) - print("\tStopping then continuing subprocess (verbose):") - if not p.stop(1): # verbose stop - print('\t** Stop seems to have failed!') - else: - print('\tWriting line while subprocess is paused...') - p.write(b'written while subprocess paused\n') - print('\tNonblocking read of paused subprocess (should be empty):') - print(p.readPendingChars()) - print('\tContinuing subprocess (verbose):') - if not p.cont(1): # verbose continue - print( - '\t** Continue seems to have failed! Probly lost subproc...') - return p - else: - print('\tReading accumulated line, blocking read:') - print(p.readline()) - print("\tDeleting subproc, which was set to die noisily:") - del p - print("\tDone.") - return None - - -if __name__ == '__main__': - test() -- 2.39.5