]> git.phdru.name Git - m_librarian.git/blobdiff - m_librarian/inp.py
Fix(inpx): Fix decode filenames to unicode in Python 3
[m_librarian.git] / m_librarian / inp.py
index a030ebc8993dfe65735d773291f085baf964a14b..d0b567af4c5f97f218b4c393e575b6d69a18ab17 100644 (file)
@@ -1,5 +1,6 @@
 
 import os
+import sys
 from zipfile import ZipFile
 from sqlobject import sqlhub
 from sqlobject.sqlbuilder import Select
@@ -14,14 +15,21 @@ EOT = chr(4)  # INP field separator
 
 def split_line(line):
     parts = line.strip().split(EOT)
-    l = len(parts)
-    if l < 11:
+    _l = len(parts)
+    if _l < 11:
         raise ValueError('Unknown INP structure: "%s"' % line)
-    if l == 11:  # Standard structure
+    archive = None
+    if _l == 11:  # Standard structure
         parts.append(None)  # Emulate lang
-    else:  # New structure
+    elif _l == 15:  # New structure
         parts = parts[:12]
-    return parts
+    elif _l == 17:  # Very new structure
+        archive = parts[12]
+        language = parts[13]
+        parts = parts[:11] + [language]
+    else:  # New structure
+        raise ValueError('Unknown INP structure: "%s"' % line)
+    return archive, parts
 
 
 def import_inp_line(archive, parts):
@@ -65,30 +73,60 @@ def import_inp_line(archive, parts):
             book.addGenre(genre_row)
 
 
+if sys.version[0] == 2:
+    def tounicode(s):
+        return s.decode('utf-8')
+else:
+    def tounicode(s):
+        return s
+
+
 def import_inp(archive, inp):
+    archives = set()
     files = set()
     connection = sqlhub.processConnection
     for file, in connection.queryAll(connection.sqlrepr(
             Select(Book.q.file, Book.q.archive == archive))):
-        files.add(file)
+        files.add((archive, tounicode(file)))
     for line in inp:
-        parts = split_line(line)
+        line = line.decode('utf-8')
+        _archive, parts = split_line(line)
+        if _archive and (_archive not in archives):
+            archives.add(_archive)
+            for file, in connection.queryAll(connection.sqlrepr(
+                    Select(Book.q.file, Book.q.archive == _archive))):
+                files.add((_archive, tounicode(file)))
         file = parts[5]
-        if file not in files:
-            files.add(file)
-            import_inp_line(archive, parts)
+        if (_archive or archive, file) not in files:
+            files.add((_archive or archive, file))
+            import_inp_line(_archive or archive, parts)
 
 
-def import_inpx(path):
+def import_inpx(path, pbar_cb=None):
     inpx = ZipFile(path)
+    if pbar_cb:
+        inp_count = 0
+        for name in inpx.namelist():
+            ext = os.path.splitext(name)[1]
+            if ext == '.inp':
+                inp_count += 1
+        pbar_cb.set_max(inp_count)
+    inp_count = 0
     for name in inpx.namelist():
         archive, ext = os.path.splitext(name)
         if ext != '.inp':
             continue
+        if pbar_cb:
+            inp_count += 1
+            pbar_cb.display(inp_count)
         inp = inpx.open(name)
         sqlhub.doInTransaction(import_inp, archive + '.zip', inp)
         inp.close()
     connection = sqlhub.processConnection
-    if connection.dbName in ('postgres', 'sqlite'):
+    if connection.dbName == 'postgres':
         for table in Author, Book, Extension, Genre, Language:
             connection.query("VACUUM %s" % table.sqlmeta.table)
+    elif connection.dbName == 'sqlite':
+        connection.query("VACUUM")
+    if pbar_cb:
+        pbar_cb.close()