2 """Recursive FTP scanners"""
6 from m_lib.net.ftp.ftpparse import ftpparse
9 class FtpScanError(Exception): pass
10 ftpscan_error_mark = object() # error marker
17 def __call__(self, line):
18 entry = ftpparse(line)
20 self.entries.append(entry)
22 def filter(self, file_type):
23 return filter(lambda e, file_type=file_type: e.file_type == file_type,
27 return self.filter('f')
29 def directories(self):
30 return filter(lambda e: e.name not in (".", ".."), self.filter('d'))
33 class ReconnectingFTPCallWrapper:
34 retries = 10 # retries per function call
36 def __init__(self, wrapper, func):
37 self.wrapper = wrapper
40 def __call__(self, *params, **kw):
41 wrapper = self.wrapper
44 for retry in range(self.retries):
46 return func(*params, **kw)
50 ftp_dir = wrapper._ftp_dir
51 wrapper._tree.append((ftpscan_error_mark, "Connection reset by peer at directory `%s'. Reconnecting..." % ftp_dir))
56 ftp.connect(wrapper._ftp_server, wrapper._ftp_port)
57 ftp.login(wrapper._login, wrapper._password)
60 class ReconnectingFTPWrapper:
61 ReconnectingFTPCallWrapperClass = ReconnectingFTPCallWrapper
63 def __init__(self, ftp, ftp_server, ftp_port=None, login=None, password=None, ftp_dir='/', tree=None):
65 self._ftp_server = ftp_server
66 self._ftp_port = ftp_port
68 self._password = password
69 ftp_dir = [''] + [name for name in ftp_dir.split('/') if name] # remove double slashes //
70 self._ftp_dir = '/'.join(ftp_dir)
73 def cwd(self, new_cwd, do_ftp=True):
74 ftp_dir = self._ftp_dir.split('/')
78 ftp_dir.append(new_cwd)
79 self._ftp_dir = '/'.join(ftp_dir)
80 if do_ftp: self._wrap(self._ftp.cwd)(new_cwd)
82 def __getattr__(self, attr):
83 value = getattr(self._ftp, attr)
85 return self._wrap(value)
88 def _wrap(self, func):
89 return self.ReconnectingFTPCallWrapperClass(self, func)
92 def _traverse_ftp(ftp, tree, ftp_dir):
93 get_files = GetFiles()
96 except ftplib.all_errors, msg:
97 tree.append((ftpscan_error_mark, "Cannot list directory `%s': %s" % (ftp_dir, msg)))
99 files = get_files.files()
100 directories = get_files.directories()
102 if ftp_dir and ftp_dir[-1] == '/':
103 ftp_dir = ftp_dir[:-1] # Prevent paths to contain double slashes //
105 tree.append((ftp_dir, files))
107 for d in directories:
109 full_path = ftp_dir + '/' + name
112 except ftplib.error_perm, msg:
113 tree.append((ftpscan_error_mark, "Cannot enter directory `%s': %s" % (full_path, msg)))
114 if isinstance(ftp, ReconnectingFTPWrapper):
116 except ftplib.all_errors, msg:
117 tree.append((ftpscan_error_mark, "Cannot enter directory `%s': %s" % (full_path, msg)))
119 _traverse_ftp(ftp, tree, full_path)
123 def ftpscan1(ftp_server, ftp_port=None, login=None, password=None,
124 ftp_dir='/', passive=None, FTPClass=ftplib.FTP, reconnect=False,
125 ReconnectingFTPWrapperClass=ReconnectingFTPWrapper):
126 """Recursive FTP scan using one-by-one directory traversing. It is slow
127 but robust - it works with all but very broken FTP servers.
131 if passive is not None:
132 ftp.set_pasv(passive)
134 ftp = ReconnectingFTPWrapperClass(ftp, ftp_server, ftp_port, login, password, ftp_dir, tree)
135 ftp.connect(ftp_server, ftp_port)
136 ftp.login(login, password)
140 _traverse_ftp(ftp, tree, ftp_dir)
146 def ftpscanrecursive(ftp_server, ftp_port=None, login=None, password=None,
147 ftp_dir='/', passive=None, FTPClass=ftplib.FTP, reconnect=False):
149 Recursive FTP scan using fast LIST -R command. Not all servers supports
153 if passive is not None:
154 ftp.set_pasv(passive)
155 ftp.connect(ftp_server, ftp_port)
156 ftp.login(login, password)
162 ftp.dir("-R", lines.append)
163 except ftplib.error_perm:
164 # The server does not implement LIST -R and
165 # treats -R as a name of a directory (-:
167 raise FtpScanError, "the server does not implement recursive listing"
171 current_dir = ftp_dir
176 if line[-1] == ':' and not line.startswith("-rw-"): # directory
177 tree.append((current_dir, files))
179 line = line[1:] # remove leading dot
182 current_dir = line[:-1]
185 if not line.startswith("total "):
186 entry = ftpparse(line)
188 if entry.file_type == 'f':
191 tree.append((ftpscan_error_mark, "Unrecognised line: `%s'" % line))
192 tree.append((current_dir, files))
195 raise FtpScanError, "the server ignores -R in LIST"
200 def ftpscan(ftp_server, ftp_port=None, login=None, password=None,
201 ftp_dir='/', passive=None, FTPClass=ftplib.FTP):
203 return ftpscanrecursive(ftp_server, ftp_port, login, password, ftp_dir, passive, FTPClass)
206 return ftpscan1(ftp_server, ftp_port, login, password, ftp_dir, passive, FTPClass)
208 return ftpscan1(ftp_server, ftp_port, login, password, ftp_dir, passive, FTPClass, True)
210 return ftpscan1(ftp_server, ftp_port, login, password, ftp_dir, passive, FTPClass, True)
213 def test(ftp_server, func, passive=None, reconnect=False):
214 from time import time
217 tree = func(ftp_server, passive=passive, reconnect=reconnect)
220 print stop_time - start_time
222 logfname = "%s.list" % ftp_server
223 log = open(logfname, 'w')
225 for ftp_dir, files in tree:
226 if ftp_dir == ftpscan_error_mark:
227 log.write("Error:\n")
231 log.write(ftp_dir + '\n')
234 log.write(_file.name + '\n')
238 sys.stderr.write("Usage: %s [-a|-p] [hostname]\n" % sys.argv[0])
241 if __name__ == "__main__":
243 from getopt import getopt, GetoptError
246 options, arguments = getopt(sys.argv[1:], "hap",
247 ["help", "active", "passive"])
253 for option, value in options:
254 if option in ("-h", "--help"):
256 elif option in ("-a", "--active"):
258 elif option in ("-p", "--passive"):
265 ftp_server = "localhost"
269 ftp_server = arguments[0]
271 print "Scanning", ftp_server
273 test(ftp_server, ftpscanrecursive, passive)
274 except FtpScanError, msg:
275 print "Rescanning due to the error:", msg
277 test(ftp_server, ftpscan1, passive)
279 print "Rescanning due to the error: connection reset by peer"
280 test(ftp_server, ftpscan1, passive, True)
282 print "Rescanning due to the error: connection reset by peer"
283 test(ftp_server, ftpscan1, passive, True)