]> git.phdru.name Git - extfs.d.git/blobdiff - torrent
Feat(torrent): Try harder to guess encoding
[extfs.d.git] / torrent
diff --git a/torrent b/torrent
index 67588e5ee336909c7959e8c093495b6b436fe374..a0f827ef91f7ad213d7ab70445e9ef0e54e6175a 100755 (executable)
--- a/torrent
+++ b/torrent
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 """Torrent Virtual FileSystem for Midnight Commander
 
 The script requires Midnight Commander 3.1+
@@ -15,7 +15,7 @@ file the command is "%cd"): cd file/torrent://; In older versions it is
 cd file#torrent, where "file" is the name of your torrent metafile.
 
 See detailed installation instructions at
-http://phdru.name/Software/mc/torrent_INSTALL.html.
+https://phdru.name/Software/mc/torrent_INSTALL.html.
 
 The VFS lists all files and directories from the torrent metafile; all files
 appear empty, of course, but the sizes are shown. Filenames are reencoded from
@@ -35,9 +35,9 @@ The filesystem is, naturally, read-only.
 
 """
 
-__version__ = "1.2.4"
+__version__ = "1.3.1"
 __author__ = "Oleg Broytman <phd@phdru.name>"
-__copyright__ = "Copyright (C) 2010-2018 PhiloSoft Design"
+__copyright__ = "Copyright (C) 2010-2023 PhiloSoft Design"
 __license__ = "GPL"
 
 
@@ -56,9 +56,12 @@ except ImportError:
 if use_locale:
     # Get the default charset.
     try:
-        lcAll = locale.getdefaultlocale()
-    except locale.Error, err:
-        print >>sys.stderr, "WARNING:", err
+        if sys.version_info[:2] < (3, 11):
+            lcAll = locale.getdefaultlocale()
+        else:
+            lcAll = []
+    except locale.Error as err:
+        #print("WARNING:", err, file=sys.stderr)
         lcAll = []
 
     if len(lcAll) == 2:
@@ -66,8 +69,8 @@ if use_locale:
     else:
         try:
             default_encoding = locale.getpreferredencoding()
-        except locale.Error, err:
-            print >>sys.stderr, "WARNING:", err
+        except locale.Error as err:
+            #print("WARNING:", err, file=sys.stderr)
             default_encoding = sys.getdefaultencoding()
 else:
     default_encoding = sys.getdefaultencoding()
@@ -91,13 +94,18 @@ This is not a program. Put the script in $HOME/[.local/share/].mc/extfs.d or
 
 locale.setlocale(locale.LC_ALL, '')
 
+PY3 = (sys.version_info[0] >= 3)
+if PY3:
+    def output(s):
+        sys.stdout.buffer.write(s.encode(default_encoding, 'replace') + b'\n')
+else:
+    def output(s):
+        sys.stdout.write(s + '\n')
+
 
 def mctorrent_list():
     """List the entire VFS"""
 
-    if 'info' not in torrent:
-        torrent_error('Info absent')
-
     info = torrent['info']
     if 'name' not in info and 'name.utf-8' not in info:
         torrent_error('Unknown name')
@@ -122,33 +130,13 @@ def mctorrent_list():
             if 'path.utf-8' in file:
                 if name_utf8:
                     path = '/'.join([name_utf8] + file['path.utf-8'])
-                    if default_encoding != 'utf-8':
-                        path = path.decode('utf-8', 'replace').encode(
-                            default_encoding, 'replace')
                 else:
-                    _name_utf8 = name
-                    if encoding and (encoding != 'utf-8'):
-                        _name_utf8 = _name_utf8.decode(
-                            encoding, 'replace').encode('utf-8', 'replace')
-                    path = '/'.join([_name_utf8] + file['path.utf-8'])
-                    if default_encoding != 'utf-8':
-                        path = path.decode('utf-8', 'replace').encode(
-                            default_encoding, 'replace')
+                    path = '/'.join([name] + file['path.utf-8'])
             else:
                 if name_utf8:
-                    path = file['path']
-                    if encoding and (encoding != 'utf-8'):
-                        path = path.decode(encoding, 'replace').encode(
-                            'utf-8', 'replace')
                     path = '/'.join([name_utf8] + path)
-                    if default_encoding != 'utf-8':
-                        path = path.decode('utf-8', 'replace').encode(
-                            default_encoding, 'replace')
                 else:
                     path = '/'.join([name] + file['path'])
-                    if encoding and (default_encoding != encoding):
-                        path = path.decode(encoding, 'replace').encode(
-                            default_encoding, 'replace')
             length = file['length']
             paths.append((path, length))
     else:  # One-file torrent
@@ -156,12 +144,7 @@ def mctorrent_list():
             torrent_error('Unknown length')
         length = info['length']
         if name_utf8:
-            if default_encoding != 'utf-8':
-                name = name_utf8.decode('utf-8', 'replace').encode(
-                    default_encoding, 'replace')
-        elif encoding and (default_encoding != encoding):
-            name = name.decode(encoding, 'replace').encode(
-                default_encoding, 'replace')
+            name = name_utf8
         paths = [(name, length)]
 
     meta = []
@@ -169,8 +152,7 @@ def mctorrent_list():
                 'created by', 'creation date', 'encoding', \
                 'nodes', 'publisher', 'publisher-url':
         if name == 'comment' and 'comment.utf-8' in torrent:
-            data = torrent['comment.utf-8'].decode('utf-8').encode(
-                default_encoding, 'replace')
+            data = torrent['comment.utf-8']
             meta.append(('.META/' + name, len(data)))
         elif name in torrent:
             if name == 'announce-list':
@@ -204,10 +186,10 @@ def mctorrent_list():
         dt = decode_datetime(getmtime(sys.argv[2]))
 
     for name in sorted(dirs):
-        print "dr-xr-xr-x 1 user group 0 %s %s" % (dt, name)
+        output("dr-xr-xr-x 1 user group 0 %s %s" % (dt, name))
 
     for name, size in sorted(paths):
-        print "-r--r--r-- 1 user group %d %s %s" % (size, dt, name)
+        output("-r--r--r-- 1 user group %d %s %s" % (size, dt, name))
 
 
 def mctorrent_copyout():
@@ -221,8 +203,7 @@ def mctorrent_copyout():
                 'created by', 'creation date', 'encoding', \
                 'nodes', 'publisher', 'publisher-url':
         if name == 'comment' and 'comment.utf-8' in torrent:
-            data = torrent['comment.utf-8'].decode('utf-8').encode(
-                default_encoding, 'replace')
+            data = torrent['comment.utf-8']
         elif torrent_filename == '.META/' + name:
             if name in torrent:
                 if name == 'announce-list':
@@ -242,15 +223,13 @@ def mctorrent_copyout():
             break
 
     if torrent_filename in ('.META/private', '.META/piece length'):
-        if 'info' not in torrent:
-            torrent_error('Info absent')
         info = torrent['info']
         if torrent_filename == '.META/private':
             if 'private' not in info:
-                torrent_error('Info absent')
+                torrent_error('Private absent')
         if torrent_filename == '.META/piece length':
             if 'piece length' not in info:
-                torrent_error('Info absent')
+                torrent_error('Piece length absent')
         data = str(info[torrent_filename[len('.META/'):]])
 
     if not torrent_filename.startswith('.META/'):
@@ -259,14 +238,15 @@ def mctorrent_copyout():
     if data is None:
         torrent_error('Unknown file name')
     else:
-        outfile = open(real_filename, 'w')
+        outfile = open(real_filename, 'wt')
         outfile.write(data)
         outfile.close()
 
 
 def mctorrent_copyin():
     """Put a file to the VFS"""
-    sys.exit("Torrent VFS doesn't support adding files (read-only filesystem)")
+    sys.exit("Torrent VFS doesn't support adding/overwriting files "
+             "(read-only filesystem)")
 
 
 def mctorrent_rm():
@@ -289,15 +269,65 @@ def torrent_error(error_str):
     sys.exit(1)
 
 
+def decode_dict(d, encoding):
+    new_d = {}
+    for k in d:
+        v = d[k]
+        k = k.decode(encoding)
+        if isinstance(v, dict):
+            v = decode_dict(v, encoding)
+        elif isinstance(v, list):
+            v = decode_list(v, encoding)
+        elif isinstance(v, bytes):
+            v = v.decode(encoding)
+        new_d[k] = v
+    return new_d
+
+
+def decode_list(l, encoding):
+    new_l = []
+    for v in l:
+        if isinstance(v, dict):
+            v = decode_dict(v, encoding)
+        elif isinstance(v, list):
+            v = decode_list(v, encoding)
+        elif isinstance(v, bytes):
+            v = v.decode(encoding)
+        new_l.append(v)
+    return new_l
+
+
 def decode_torrent():
     try:
-        torrent_file = open(sys.argv[2], 'r')
+        torrent_file = open(sys.argv[2], 'rb')
         data = torrent_file.read()
         torrent_file.close()
-        return decode(data)
-    except IOError, error_str:
+        torrent = decode(data)
+    except IOError as error_str:
         torrent_error(error_str)
 
+    del torrent[b'info'][b'pieces']
+    if b'info' not in torrent:
+        torrent_error('Info absent')
+
+    if PY3:
+        codepage = torrent.get(b'codepage', None)
+        encoding = torrent.get(b'encoding', None)
+        if encoding:
+            encoding = encoding.decode('ascii')
+        elif codepage:
+            encoding = codepage.decode('ascii')
+        else:
+            for encoding in ('ascii', 'utf-8', default_encoding):
+                try:
+                    return decode_dict(torrent, encoding)
+                except UnicodeDecodeError:
+                    pass
+            torrent_error('UnicodeDecodeError')
+        return decode_dict(torrent, encoding)
+
+    return torrent
+
 
 def decode_datetime_asc(dt):
     try:
@@ -316,7 +346,7 @@ def decode_datetime(dt):
 
 
 def decode_announce_list(announce):
-    return '\n'.join(l[0] for l in announce if l)
+    return '\n'.join(a[0] for a in announce if a)
 
 
 command = sys.argv[1]
@@ -333,5 +363,5 @@ try:
     g[procname]()
 except SystemExit:
     raise
-except:
+except Exception:
     logger.exception("Error during run")