2 """Recursive FTP scanners"""
5 from __future__ import print_function
9 from .ftpparse import ftpparse
12 class FtpScanError(Exception): pass
13 ftpscan_error_mark = object() # error marker
20 def __call__(self, line):
21 entry = ftpparse(line)
23 self.entries.append(entry)
25 def filter(self, file_type):
26 return filter(lambda e, file_type=file_type: e.file_type == file_type,
30 return self.filter('f')
32 def directories(self):
33 return filter(lambda e: e.name not in (".", ".."), self.filter('d'))
36 class ReconnectingFTPCallWrapper:
37 retries = 10 # retries per function call
39 def __init__(self, wrapper, func):
40 self.wrapper = wrapper
43 def __call__(self, *params, **kw):
44 wrapper = self.wrapper
47 for retry in range(self.retries):
49 return func(*params, **kw)
53 ftp_dir = wrapper._ftp_dir
54 wrapper._tree.append((ftpscan_error_mark, "Connection reset by peer at directory `%s'. Reconnecting..." % ftp_dir))
59 ftp.connect(wrapper._ftp_server, wrapper._ftp_port)
60 ftp.login(wrapper._login, wrapper._password)
63 class ReconnectingFTPWrapper:
64 ReconnectingFTPCallWrapperClass = ReconnectingFTPCallWrapper
66 def __init__(self, ftp, ftp_server, ftp_port=0, login=None, password=None, ftp_dir='/', tree=None):
68 self._ftp_server = ftp_server
69 self._ftp_port = ftp_port
71 self._password = password
72 ftp_dir = [''] + [name for name in ftp_dir.split('/') if name] # remove double slashes //
73 self._ftp_dir = '/'.join(ftp_dir)
76 def cwd(self, new_cwd, do_ftp=True):
77 ftp_dir = self._ftp_dir.split('/')
81 ftp_dir.append(new_cwd)
82 self._ftp_dir = '/'.join(ftp_dir)
83 if do_ftp: self._wrap(self._ftp.cwd)(new_cwd)
85 def __getattr__(self, attr):
86 value = getattr(self._ftp, attr)
88 return self._wrap(value)
91 def _wrap(self, func):
92 return self.ReconnectingFTPCallWrapperClass(self, func)
95 def _traverse_ftp(ftp, tree, ftp_dir):
96 get_files = GetFiles()
99 except ftplib.all_errors as msg:
100 tree.append((ftpscan_error_mark, "Cannot list directory `%s': %s" % (ftp_dir, msg)))
102 files = get_files.files()
103 directories = get_files.directories()
105 if ftp_dir and ftp_dir[-1] == '/':
106 ftp_dir = ftp_dir[:-1] # Prevent paths to contain double slashes //
108 tree.append((ftp_dir, files))
110 for d in directories:
112 full_path = ftp_dir + '/' + name
115 except ftplib.error_perm as msg:
116 tree.append((ftpscan_error_mark, "Cannot enter directory `%s': %s" % (full_path, msg)))
117 if isinstance(ftp, ReconnectingFTPWrapper):
119 except ftplib.all_errors as msg:
120 tree.append((ftpscan_error_mark, "Cannot enter directory `%s': %s" % (full_path, msg)))
122 _traverse_ftp(ftp, tree, full_path)
126 def ftpscan1(ftp_server, ftp_port=0, login=None, password=None,
127 ftp_dir='/', passive=None, FTPClass=ftplib.FTP, reconnect=False,
128 ReconnectingFTPWrapperClass=ReconnectingFTPWrapper):
129 """Recursive FTP scan using one-by-one directory traversing. It is slow
130 but robust - it works with all but very broken FTP servers.
134 if passive is not None:
135 ftp.set_pasv(passive)
137 ftp = ReconnectingFTPWrapperClass(ftp, ftp_server, ftp_port, login, password, ftp_dir, tree)
138 ftp.connect(ftp_server, ftp_port)
139 ftp.login(login, password)
143 _traverse_ftp(ftp, tree, ftp_dir)
149 def ftpscanrecursive(ftp_server, ftp_port=0, login=None, password=None,
150 ftp_dir='/', passive=None, FTPClass=ftplib.FTP, reconnect=False):
152 Recursive FTP scan using fast LIST -R command. Not all servers supports
156 if passive is not None:
157 ftp.set_pasv(passive)
158 ftp.connect(ftp_server, ftp_port)
159 ftp.login(login, password)
165 ftp.dir("-R", lines.append)
166 except ftplib.error_perm:
167 # The server does not implement LIST -R and
168 # treats -R as a name of a directory (-:
170 raise FtpScanError("the server does not implement recursive listing")
174 current_dir = ftp_dir
179 if line[-1] == ':' and not line.startswith("-rw-"): # directory
180 tree.append((current_dir, files))
182 line = line[1:] # remove leading dot
185 current_dir = line[:-1]
188 if not line.startswith("total "):
189 entry = ftpparse(line)
191 if entry.file_type == 'f':
194 tree.append((ftpscan_error_mark, "Unrecognised line: `%s'" % line))
195 tree.append((current_dir, files))
198 raise FtpScanError("the server ignores -R in LIST")
203 def ftpscan(ftp_server, ftp_port=0, login=None, password=None,
204 ftp_dir='/', passive=None, FTPClass=ftplib.FTP):
206 return ftpscanrecursive(ftp_server, ftp_port, login, password, ftp_dir, passive, FTPClass)
209 return ftpscan1(ftp_server, ftp_port, login, password, ftp_dir, passive, FTPClass)
211 return ftpscan1(ftp_server, ftp_port, login, password, ftp_dir, passive, FTPClass, True)
213 return ftpscan1(ftp_server, ftp_port, login, password, ftp_dir, passive, FTPClass, True)
216 def test(ftp_server, func, passive=None, reconnect=False):
217 from time import time
220 tree = func(ftp_server, passive=passive, reconnect=reconnect)
223 print(stop_time - start_time)
225 logfname = "%s.list" % ftp_server
226 if sys.version_info[0] >= 3:
227 log = codecs.open(logfname, 'w', encoding='utf-8')
229 log = open(logfname, 'w')
231 for ftp_dir, files in tree:
232 if ftp_dir == ftpscan_error_mark:
233 log.write("Error:\n")
237 log.write(ftp_dir + '\n')
240 log.write(_file.name + '\n')
244 sys.stderr.write("Usage: %s [-a|-p] [hostname]\n" % sys.argv[0])
247 if __name__ == "__main__":
249 from getopt import getopt, GetoptError
252 options, arguments = getopt(sys.argv[1:], "hap",
253 ["help", "active", "passive"])
259 for option, value in options:
260 if option in ("-h", "--help"):
262 elif option in ("-a", "--active"):
264 elif option in ("-p", "--passive"):
271 ftp_server = "localhost"
275 ftp_server = arguments[0]
277 print("Scanning", ftp_server)
279 test(ftp_server, ftpscanrecursive, passive)
280 except FtpScanError as msg:
281 print("Rescanning due to the error:", msg)
283 test(ftp_server, ftpscan1, passive)
285 print("Rescanning due to the error: connection reset by peer")
286 test(ftp_server, ftpscan1, passive, True)
288 print("Rescanning due to the error: connection reset by peer")
289 test(ftp_server, ftpscan1, passive, True)