]> git.phdru.name Git - m_librarian.git/blob - m_librarian/inp.py
Docs: Update TODO
[m_librarian.git] / m_librarian / inp.py
1
2 import os
3 from zipfile import ZipFile
4
5 from sqlobject import dberrors, sqlhub
6 from sqlobject.sqlbuilder import Select
7
8 from .db import Author, Book, Extension, Genre, Language, \
9     insert_name, insert_author
10
11 __all__ = ['import_inpx']
12
13
14 EOT = chr(4)  # INP field separator
15
16
17 def split_line(line):
18     parts = line.strip().split(EOT)
19     _l = len(parts)
20     if _l < 11:
21         raise ValueError('Unknown INP structure: "%s"' % line)
22     archive = None
23     if _l == 11:  # Standard structure
24         parts.append(None)  # Emulate lang
25     elif _l == 15:  # New structure
26         parts = parts[:12]
27     elif _l == 17:  # Very new structure
28         archive = parts[12]
29         language = parts[13]
30         parts = parts[:11] + [language]
31     else:  # New structure
32         raise ValueError('Unknown INP structure: "%s"' % line)
33     return archive, parts
34
35
36 def import_inp_line(archive, parts):
37     authors, genres, title, series, ser_no, file, size, lib_id, deleted, \
38         extension, date, language = parts
39     try:
40         ser_no = int(ser_no)
41     except ValueError:
42         ser_no = None
43     size = int(size)
44     deleted = deleted == '1'
45     extension_row = insert_name(Extension, extension)
46     language_row = insert_name(Language, language)
47     book = Book(title=title, series=series, ser_no=ser_no,
48                 archive=archive, file=file, size=size,
49                 lib_id=lib_id, deleted=deleted,
50                 extension=extension_row, date=date,
51                 language=language_row)
52     authors = authors.split(':')
53     seen_authors = set()
54     for author in authors:
55         if author:
56             if author in seen_authors:
57                 continue
58             seen_authors.add(author)
59             alist = author.split(',', 2)
60             surname = alist[0]
61             if len(alist) > 1:
62                 name = alist[1]
63                 if len(alist) == 3:
64                     misc_name = alist[2]
65                 else:
66                     misc_name = ''
67             else:
68                 name = misc_name = ''
69             author_row = insert_author(surname, name, misc_name)
70             book.addAuthor(author_row)
71     for genre in genres.split(':'):
72         if genre:
73             genre_row = insert_name(Genre, genre, title=genre)
74             try:
75                 book.addGenre(genre_row)
76             except dberrors.DuplicateEntryError:
77                 pass  # The genre has already been added
78
79
80 def tounicode(s):
81     if isinstance(s, bytes):
82         return s.decode('utf-8')
83     else:
84         return s
85
86
87 def import_inp(archive, inp):
88     archives = set()
89     files = set()
90     connection = sqlhub.processConnection
91     for file, in connection.queryAll(connection.sqlrepr(
92             Select(Book.q.file, Book.q.archive == archive))):
93         files.add((archive, tounicode(file)))
94     for line in inp:
95         line = line.decode('utf-8')
96         _archive, parts = split_line(line)
97         if _archive and (_archive not in archives):
98             archives.add(_archive)
99             for file, in connection.queryAll(connection.sqlrepr(
100                     Select(Book.q.file, Book.q.archive == _archive))):
101                 files.add((_archive, tounicode(file)))
102         file = parts[5]
103         if (_archive or archive, file) not in files:
104             files.add((_archive or archive, file))
105             import_inp_line(_archive or archive, parts)
106
107
108 def import_inpx(path, pbar_cb=None):
109     inpx = ZipFile(path)
110     if pbar_cb:
111         inp_count = 0
112         for name in inpx.namelist():
113             ext = os.path.splitext(name)[1]
114             if ext == '.inp':
115                 inp_count += 1
116         pbar_cb.set_max(inp_count)
117     inp_count = 0
118     for name in inpx.namelist():
119         archive, ext = os.path.splitext(name)
120         if ext != '.inp':
121             continue
122         if pbar_cb:
123             inp_count += 1
124             pbar_cb.display(inp_count)
125         inp = inpx.open(name)
126         sqlhub.doInTransaction(import_inp, archive + '.zip', inp)
127         inp.close()
128     connection = sqlhub.processConnection
129     if connection.dbName == 'postgres':
130         for table in Author, Book, Extension, Genre, Language:
131             connection.query("VACUUM %s" % table.sqlmeta.table)
132     elif connection.dbName == 'sqlite':
133         connection.query("VACUUM")
134     if pbar_cb:
135         pbar_cb.close()