X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=m_librarian%2Finp.py;h=6d3bd7d120ce0d15e4157dbf7a41cc42bfddc3c0;hb=HEAD;hp=e16df6c5d91496d9f652e966084429737ce21a5e;hpb=51aa2c71d5bebc4be0c98f00c97d2f47fcde4d3a;p=m_librarian.git diff --git a/m_librarian/inp.py b/m_librarian/inp.py index e16df6c..6d3bd7d 100644 --- a/m_librarian/inp.py +++ b/m_librarian/inp.py @@ -1,8 +1,10 @@ import os from zipfile import ZipFile -from sqlobject import sqlhub + +from sqlobject import dberrors, sqlhub from sqlobject.sqlbuilder import Select + from .db import Author, Book, Extension, Genre, Language, \ insert_name, insert_author @@ -14,14 +16,21 @@ EOT = chr(4) # INP field separator def split_line(line): parts = line.strip().split(EOT) - l = len(parts) - if l < 11: + _l = len(parts) + if _l < 11: raise ValueError('Unknown INP structure: "%s"' % line) - if l == 11: # Standard structure + archive = None + if _l == 11: # Standard structure parts.append(None) # Emulate lang - else: # New structure + elif _l == 15: # New structure parts = parts[:12] - return parts + elif _l == 17: # Very new structure + archive = parts[12] + language = parts[13] + parts = parts[:11] + [language] + else: # New structure + raise ValueError('Unknown INP structure: "%s"' % line) + return archive, parts def import_inp_line(archive, parts): @@ -62,34 +71,65 @@ def import_inp_line(archive, parts): for genre in genres.split(':'): if genre: genre_row = insert_name(Genre, genre, title=genre) - book.addGenre(genre_row) + try: + book.addGenre(genre_row) + except dberrors.DuplicateEntryError: + pass # The genre has already been added + + +def tounicode(s): + if isinstance(s, bytes): + return s.decode('utf-8') + else: + return s def import_inp(archive, inp): + archives = set() files = set() connection = sqlhub.processConnection for file, in connection.queryAll(connection.sqlrepr( Select(Book.q.file, Book.q.archive == archive))): - files.add(file) + files.add((archive, tounicode(file))) for line in inp: line = line.decode('utf-8') - parts = split_line(line) + _archive, parts = split_line(line) + if _archive and (_archive not in archives): + archives.add(_archive) + for file, in connection.queryAll(connection.sqlrepr( + Select(Book.q.file, Book.q.archive == _archive))): + files.add((_archive, tounicode(file))) file = parts[5] - if file not in files: - files.add(file) - import_inp_line(archive, parts) + if (_archive or archive, file) not in files: + files.add((_archive or archive, file)) + import_inp_line(_archive or archive, parts) -def import_inpx(path): +def import_inpx(path, pbar_cb=None): inpx = ZipFile(path) + if pbar_cb: + inp_count = 0 + for name in inpx.namelist(): + ext = os.path.splitext(name)[1] + if ext == '.inp': + inp_count += 1 + pbar_cb.set_max(inp_count) + inp_count = 0 for name in inpx.namelist(): archive, ext = os.path.splitext(name) if ext != '.inp': continue + if pbar_cb: + inp_count += 1 + pbar_cb.display(inp_count) inp = inpx.open(name) sqlhub.doInTransaction(import_inp, archive + '.zip', inp) inp.close() connection = sqlhub.processConnection - if connection.dbName in ('postgres', 'sqlite'): + if connection.dbName == 'postgres': for table in Author, Book, Extension, Genre, Language: connection.query("VACUUM %s" % table.sqlmeta.table) + elif connection.dbName == 'sqlite': + connection.query("VACUUM") + if pbar_cb: + pbar_cb.close()