X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=m_librarian%2Finp.py;h=76ab97be9883c4cd240ae725acc3300acf897e98;hb=ce323cc1847cdb57a6fc6df2c685e68fd76be522;hp=56f85e4d1dcdfb4561fe0ac08cc4d364c291f25a;hpb=991627dd1d4fcdecc04a091ec9c0c8b9e8e8b27c;p=m_librarian.git diff --git a/m_librarian/inp.py b/m_librarian/inp.py index 56f85e4..76ab97b 100644 --- a/m_librarian/inp.py +++ b/m_librarian/inp.py @@ -1,22 +1,23 @@ -__all__ = ['import_inpx'] - import os from zipfile import ZipFile -from sqlobject import sqlhub, SQLObjectNotFound +from sqlobject import sqlhub +from sqlobject.sqlbuilder import Select from .db import Author, Book, Extension, Genre, Language, \ insert_name, insert_author +__all__ = ['import_inpx'] + EOT = chr(4) # INP field separator def split_line(line): parts = line.strip().split(EOT) - l = len(parts) - if l < 11: + _l = len(parts) + if _l < 11: raise ValueError('Unknown INP structure: "%s"' % line) - if l == 11: # Standard structure + if _l == 11: # Standard structure parts.append(None) # Emulate lang else: # New structure parts = parts[:12] @@ -26,12 +27,6 @@ def split_line(line): def import_inp_line(archive, parts): authors, genres, title, series, ser_no, file, size, lib_id, deleted, \ extension, date, language = parts - try: - Book.archive_file_idx.get(archive, file) - except SQLObjectNotFound: - pass - else: - return try: ser_no = int(ser_no) except ValueError: @@ -45,8 +40,13 @@ def import_inp_line(archive, parts): lib_id=lib_id, deleted=deleted, extension=extension_row, date=date, language=language_row) - for author in authors.split(':'): + authors = authors.split(':') + seen_authors = set() + for author in authors: if author: + if author in seen_authors: + continue + seen_authors.add(author) alist = author.split(',', 2) surname = alist[0] if len(alist) > 1: @@ -66,20 +66,45 @@ def import_inp_line(archive, parts): def import_inp(archive, inp): + files = set() + connection = sqlhub.processConnection + for file, in connection.queryAll(connection.sqlrepr( + Select(Book.q.file, Book.q.archive == archive))): + files.add(file) for line in inp: - import_inp_line(archive, split_line(line)) + line = line.decode('utf-8') + parts = split_line(line) + file = parts[5] + if file not in files: + files.add(file) + import_inp_line(archive, parts) -def import_inpx(path): +def import_inpx(path, pbar_cb=None): inpx = ZipFile(path) + if pbar_cb: + inp_count = 0 + for name in inpx.namelist(): + ext = os.path.splitext(name)[1] + if ext == '.inp': + inp_count += 1 + pbar_cb.set_max(inp_count) + inp_count = 0 for name in inpx.namelist(): archive, ext = os.path.splitext(name) if ext != '.inp': continue + if pbar_cb: + inp_count += 1 + pbar_cb.display(inp_count) inp = inpx.open(name) sqlhub.doInTransaction(import_inp, archive + '.zip', inp) inp.close() connection = sqlhub.processConnection - if connection.dbName in ('postgres', 'sqlite'): + if connection.dbName == 'postgres': for table in Author, Book, Extension, Genre, Language: connection.query("VACUUM %s" % table.sqlmeta.table) + elif connection.dbName == 'sqlite': + connection.query("VACUUM") + if pbar_cb: + pbar_cb.close()