]> git.phdru.name Git - m_librarian.git/blob - m_librarian/inp.py
Fix(inpx): Fix decoding filenames to unicode
[m_librarian.git] / m_librarian / inp.py
1
2 import os
3 from zipfile import ZipFile
4 from sqlobject import sqlhub
5 from sqlobject.sqlbuilder import Select
6 from .db import Author, Book, Extension, Genre, Language, \
7     insert_name, insert_author
8
9 __all__ = ['import_inpx']
10
11
12 EOT = chr(4)  # INP field separator
13
14
15 def split_line(line):
16     parts = line.strip().split(EOT)
17     _l = len(parts)
18     if _l < 11:
19         raise ValueError('Unknown INP structure: "%s"' % line)
20     archive = None
21     if _l == 11:  # Standard structure
22         parts.append(None)  # Emulate lang
23     elif _l == 15:  # New structure
24         parts = parts[:12]
25     elif _l == 17:  # Very new structure
26         archive = parts[12]
27         language = parts[13]
28         parts = parts[:11] + [language]
29     else:  # New structure
30         raise ValueError('Unknown INP structure: "%s"' % line)
31     return archive, parts
32
33
34 def import_inp_line(archive, parts):
35     authors, genres, title, series, ser_no, file, size, lib_id, deleted, \
36         extension, date, language = parts
37     try:
38         ser_no = int(ser_no)
39     except ValueError:
40         ser_no = None
41     size = int(size)
42     deleted = deleted == '1'
43     extension_row = insert_name(Extension, extension)
44     language_row = insert_name(Language, language)
45     book = Book(title=title, series=series, ser_no=ser_no,
46                 archive=archive, file=file, size=size,
47                 lib_id=lib_id, deleted=deleted,
48                 extension=extension_row, date=date,
49                 language=language_row)
50     authors = authors.split(':')
51     seen_authors = set()
52     for author in authors:
53         if author:
54             if author in seen_authors:
55                 continue
56             seen_authors.add(author)
57             alist = author.split(',', 2)
58             surname = alist[0]
59             if len(alist) > 1:
60                 name = alist[1]
61                 if len(alist) == 3:
62                     misc_name = alist[2]
63                 else:
64                     misc_name = ''
65             else:
66                 name = misc_name = ''
67             author_row = insert_author(surname, name, misc_name)
68             book.addAuthor(author_row)
69     for genre in genres.split(':'):
70         if genre:
71             genre_row = insert_name(Genre, genre, title=genre)
72             book.addGenre(genre_row)
73
74
75 def tounicode(s):
76     if isinstance(s, bytes):
77         return s.decode('utf-8')
78     else:
79         return s
80
81
82 def import_inp(archive, inp):
83     archives = set()
84     files = set()
85     connection = sqlhub.processConnection
86     for file, in connection.queryAll(connection.sqlrepr(
87             Select(Book.q.file, Book.q.archive == archive))):
88         files.add((archive, tounicode(file)))
89     for line in inp:
90         line = line.decode('utf-8')
91         _archive, parts = split_line(line)
92         if _archive and (_archive not in archives):
93             archives.add(_archive)
94             for file, in connection.queryAll(connection.sqlrepr(
95                     Select(Book.q.file, Book.q.archive == _archive))):
96                 files.add((_archive, tounicode(file)))
97         file = parts[5]
98         if (_archive or archive, file) not in files:
99             files.add((_archive or archive, file))
100             import_inp_line(_archive or archive, parts)
101
102
103 def import_inpx(path, pbar_cb=None):
104     inpx = ZipFile(path)
105     if pbar_cb:
106         inp_count = 0
107         for name in inpx.namelist():
108             ext = os.path.splitext(name)[1]
109             if ext == '.inp':
110                 inp_count += 1
111         pbar_cb.set_max(inp_count)
112     inp_count = 0
113     for name in inpx.namelist():
114         archive, ext = os.path.splitext(name)
115         if ext != '.inp':
116             continue
117         if pbar_cb:
118             inp_count += 1
119             pbar_cb.display(inp_count)
120         inp = inpx.open(name)
121         sqlhub.doInTransaction(import_inp, archive + '.zip', inp)
122         inp.close()
123     connection = sqlhub.processConnection
124     if connection.dbName == 'postgres':
125         for table in Author, Book, Extension, Genre, Language:
126             connection.query("VACUUM %s" % table.sqlmeta.table)
127     elif connection.dbName == 'sqlite':
128         connection.query("VACUUM")
129     if pbar_cb:
130         pbar_cb.close()