]> git.phdru.name Git - m_librarian.git/blob - m_librarian/inp.py
Prevent duplicate authors
[m_librarian.git] / m_librarian / inp.py
1
2 __all__ = ['import_inpx']
3
4 import os
5 from zipfile import ZipFile
6 from sqlobject import sqlhub, SQLObjectNotFound
7 from .db import Author, Book, Extension, Genre, Language, \
8     insert_name, insert_author
9
10
11 EOT = chr(4)  # INP field separator
12
13
14 def split_line(line):
15     parts = line.strip().split(EOT)
16     l = len(parts)
17     if l < 11:
18         raise ValueError('Unknown INP structure: "%s"' % line)
19     if l == 11:  # Standard structure
20         parts.append(None)  # Emulate lang
21     else:  # New structure
22         parts = parts[:12]
23     return parts
24
25
26 def import_inp_line(archive, parts):
27     authors, genres, title, series, ser_no, file, size, lib_id, deleted, \
28         extension, date, language = parts
29     try:
30         Book.archive_file_idx.get(archive, file)
31     except SQLObjectNotFound:
32         pass
33     else:
34         return
35     try:
36         ser_no = int(ser_no)
37     except ValueError:
38         ser_no = None
39     size = int(size)
40     deleted = deleted == '1'
41     extension_row = insert_name(Extension, extension)
42     language_row = insert_name(Language, language)
43     book = Book(title=title, series=series, ser_no=ser_no,
44                 archive=archive, file=file, size=size,
45                 lib_id=lib_id, deleted=deleted,
46                 extension=extension_row, date=date,
47                 language=language_row)
48     authors = authors.split(':')
49     seen_authors = set()
50     for author in authors:
51         if author:
52             if author in seen_authors:
53                 continue
54             seen_authors.add(author)
55             alist = author.split(',', 2)
56             surname = alist[0]
57             if len(alist) > 1:
58                 name = alist[1]
59                 if len(alist) == 3:
60                     misc_name = alist[2]
61                 else:
62                     misc_name = ''
63             else:
64                 name = misc_name = ''
65             author_row = insert_author(surname, name, misc_name)
66             book.addAuthor(author_row)
67     for genre in genres.split(':'):
68         if genre:
69             genre_row = insert_name(Genre, genre, title=genre)
70             book.addGenre(genre_row)
71
72
73 def import_inp(archive, inp):
74     for line in inp:
75         import_inp_line(archive, split_line(line))
76
77
78 def import_inpx(path):
79     inpx = ZipFile(path)
80     for name in inpx.namelist():
81         archive, ext = os.path.splitext(name)
82         if ext != '.inp':
83             continue
84         inp = inpx.open(name)
85         sqlhub.doInTransaction(import_inp, archive + '.zip', inp)
86         inp.close()
87     connection = sqlhub.processConnection
88     if connection.dbName in ('postgres', 'sqlite'):
89         for table in Author, Book, Extension, Genre, Language:
90             connection.query("VACUUM %s" % table.sqlmeta.table)