]> git.phdru.name Git - m_librarian.git/blob - m_librarian/inp.py
Fix(inpx): Decode filenames to unicode
[m_librarian.git] / m_librarian / inp.py
1
2 import os
3 from zipfile import ZipFile
4 from sqlobject import sqlhub
5 from sqlobject.sqlbuilder import Select
6 from .db import Author, Book, Extension, Genre, Language, \
7     insert_name, insert_author
8
9 __all__ = ['import_inpx']
10
11
12 EOT = chr(4)  # INP field separator
13
14
15 def split_line(line):
16     parts = line.strip().split(EOT)
17     _l = len(parts)
18     if _l < 11:
19         raise ValueError('Unknown INP structure: "%s"' % line)
20     archive = None
21     if _l == 11:  # Standard structure
22         parts.append(None)  # Emulate lang
23     elif _l == 15:  # New structure
24         parts = parts[:12]
25     elif _l == 17:  # Very new structure
26         archive = parts[12]
27         language = parts[13]
28         parts = parts[:11] + [language]
29     else:  # New structure
30         raise ValueError('Unknown INP structure: "%s"' % line)
31     return archive, parts
32
33
34 def import_inp_line(archive, parts):
35     authors, genres, title, series, ser_no, file, size, lib_id, deleted, \
36         extension, date, language = parts
37     try:
38         ser_no = int(ser_no)
39     except ValueError:
40         ser_no = None
41     size = int(size)
42     deleted = deleted == '1'
43     extension_row = insert_name(Extension, extension)
44     language_row = insert_name(Language, language)
45     book = Book(title=title, series=series, ser_no=ser_no,
46                 archive=archive, file=file, size=size,
47                 lib_id=lib_id, deleted=deleted,
48                 extension=extension_row, date=date,
49                 language=language_row)
50     authors = authors.split(':')
51     seen_authors = set()
52     for author in authors:
53         if author:
54             if author in seen_authors:
55                 continue
56             seen_authors.add(author)
57             alist = author.split(',', 2)
58             surname = alist[0]
59             if len(alist) > 1:
60                 name = alist[1]
61                 if len(alist) == 3:
62                     misc_name = alist[2]
63                 else:
64                     misc_name = ''
65             else:
66                 name = misc_name = ''
67             author_row = insert_author(surname, name, misc_name)
68             book.addAuthor(author_row)
69     for genre in genres.split(':'):
70         if genre:
71             genre_row = insert_name(Genre, genre, title=genre)
72             book.addGenre(genre_row)
73
74
75 def import_inp(archive, inp):
76     archives = set()
77     files = set()
78     connection = sqlhub.processConnection
79     for file, in connection.queryAll(connection.sqlrepr(
80             Select(Book.q.file, Book.q.archive == archive))):
81         files.add((archive, file.decode('utf-8')))
82     for line in inp:
83         line = line.decode('utf-8')
84         _archive, parts = split_line(line)
85         if _archive and (_archive not in archives):
86             archives.add(_archive)
87             for file, in connection.queryAll(connection.sqlrepr(
88                     Select(Book.q.file, Book.q.archive == _archive))):
89                 files.add((_archive, file.decode('utf-8')))
90         file = parts[5]
91         if (_archive or archive, file) not in files:
92             files.add((_archive or archive, file))
93             import_inp_line(_archive or archive, parts)
94
95
96 def import_inpx(path, pbar_cb=None):
97     inpx = ZipFile(path)
98     if pbar_cb:
99         inp_count = 0
100         for name in inpx.namelist():
101             ext = os.path.splitext(name)[1]
102             if ext == '.inp':
103                 inp_count += 1
104         pbar_cb.set_max(inp_count)
105     inp_count = 0
106     for name in inpx.namelist():
107         archive, ext = os.path.splitext(name)
108         if ext != '.inp':
109             continue
110         if pbar_cb:
111             inp_count += 1
112             pbar_cb.display(inp_count)
113         inp = inpx.open(name)
114         sqlhub.doInTransaction(import_inp, archive + '.zip', inp)
115         inp.close()
116     connection = sqlhub.processConnection
117     if connection.dbName == 'postgres':
118         for table in Author, Book, Extension, Genre, Language:
119             connection.query("VACUUM %s" % table.sqlmeta.table)
120     elif connection.dbName == 'sqlite':
121         connection.query("VACUUM")
122     if pbar_cb:
123         pbar_cb.close()