]> git.phdru.name Git - m_librarian.git/blob - m_librarian/inp.py
Fix(inpx): Fix decode filenames to unicode in Python 3
[m_librarian.git] / m_librarian / inp.py
1
2 import os
3 import sys
4 from zipfile import ZipFile
5 from sqlobject import sqlhub
6 from sqlobject.sqlbuilder import Select
7 from .db import Author, Book, Extension, Genre, Language, \
8     insert_name, insert_author
9
10 __all__ = ['import_inpx']
11
12
13 EOT = chr(4)  # INP field separator
14
15
16 def split_line(line):
17     parts = line.strip().split(EOT)
18     _l = len(parts)
19     if _l < 11:
20         raise ValueError('Unknown INP structure: "%s"' % line)
21     archive = None
22     if _l == 11:  # Standard structure
23         parts.append(None)  # Emulate lang
24     elif _l == 15:  # New structure
25         parts = parts[:12]
26     elif _l == 17:  # Very new structure
27         archive = parts[12]
28         language = parts[13]
29         parts = parts[:11] + [language]
30     else:  # New structure
31         raise ValueError('Unknown INP structure: "%s"' % line)
32     return archive, parts
33
34
35 def import_inp_line(archive, parts):
36     authors, genres, title, series, ser_no, file, size, lib_id, deleted, \
37         extension, date, language = parts
38     try:
39         ser_no = int(ser_no)
40     except ValueError:
41         ser_no = None
42     size = int(size)
43     deleted = deleted == '1'
44     extension_row = insert_name(Extension, extension)
45     language_row = insert_name(Language, language)
46     book = Book(title=title, series=series, ser_no=ser_no,
47                 archive=archive, file=file, size=size,
48                 lib_id=lib_id, deleted=deleted,
49                 extension=extension_row, date=date,
50                 language=language_row)
51     authors = authors.split(':')
52     seen_authors = set()
53     for author in authors:
54         if author:
55             if author in seen_authors:
56                 continue
57             seen_authors.add(author)
58             alist = author.split(',', 2)
59             surname = alist[0]
60             if len(alist) > 1:
61                 name = alist[1]
62                 if len(alist) == 3:
63                     misc_name = alist[2]
64                 else:
65                     misc_name = ''
66             else:
67                 name = misc_name = ''
68             author_row = insert_author(surname, name, misc_name)
69             book.addAuthor(author_row)
70     for genre in genres.split(':'):
71         if genre:
72             genre_row = insert_name(Genre, genre, title=genre)
73             book.addGenre(genre_row)
74
75
76 if sys.version[0] == 2:
77     def tounicode(s):
78         return s.decode('utf-8')
79 else:
80     def tounicode(s):
81         return s
82
83
84 def import_inp(archive, inp):
85     archives = set()
86     files = set()
87     connection = sqlhub.processConnection
88     for file, in connection.queryAll(connection.sqlrepr(
89             Select(Book.q.file, Book.q.archive == archive))):
90         files.add((archive, tounicode(file)))
91     for line in inp:
92         line = line.decode('utf-8')
93         _archive, parts = split_line(line)
94         if _archive and (_archive not in archives):
95             archives.add(_archive)
96             for file, in connection.queryAll(connection.sqlrepr(
97                     Select(Book.q.file, Book.q.archive == _archive))):
98                 files.add((_archive, tounicode(file)))
99         file = parts[5]
100         if (_archive or archive, file) not in files:
101             files.add((_archive or archive, file))
102             import_inp_line(_archive or archive, parts)
103
104
105 def import_inpx(path, pbar_cb=None):
106     inpx = ZipFile(path)
107     if pbar_cb:
108         inp_count = 0
109         for name in inpx.namelist():
110             ext = os.path.splitext(name)[1]
111             if ext == '.inp':
112                 inp_count += 1
113         pbar_cb.set_max(inp_count)
114     inp_count = 0
115     for name in inpx.namelist():
116         archive, ext = os.path.splitext(name)
117         if ext != '.inp':
118             continue
119         if pbar_cb:
120             inp_count += 1
121             pbar_cb.display(inp_count)
122         inp = inpx.open(name)
123         sqlhub.doInTransaction(import_inp, archive + '.zip', inp)
124         inp.close()
125     connection = sqlhub.processConnection
126     if connection.dbName == 'postgres':
127         for table in Author, Book, Extension, Genre, Language:
128             connection.query("VACUUM %s" % table.sqlmeta.table)
129     elif connection.dbName == 'sqlite':
130         connection.query("VACUUM")
131     if pbar_cb:
132         pbar_cb.close()