From f29d3e8f0981bcc8df87b8e661a5bf019a152fae Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 6 Mar 2016 16:43:07 +0300 Subject: [PATCH] Speedup inp import by caching a set of files in the archive --- m_librarian/db.py | 1 + m_librarian/inp.py | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/m_librarian/db.py b/m_librarian/db.py index 65c7987..99a2843 100755 --- a/m_librarian/db.py +++ b/m_librarian/db.py @@ -95,6 +95,7 @@ class Book(SQLObject): title_idx = DatabaseIndex(title) series_idx = DatabaseIndex(series) ser_no_idx = DatabaseIndex(ser_no) + archive_idx = DatabaseIndex(archive) archive_file_idx = DatabaseIndex(archive, file, unique=True) file_idx = DatabaseIndex(file) size_idx = DatabaseIndex(size) diff --git a/m_librarian/inp.py b/m_librarian/inp.py index 09cd42b..dd32c08 100644 --- a/m_librarian/inp.py +++ b/m_librarian/inp.py @@ -3,7 +3,8 @@ __all__ = ['import_inpx'] import os from zipfile import ZipFile -from sqlobject import sqlhub, SQLObjectNotFound +from sqlobject import sqlhub +from sqlobject.sqlbuilder import Select from .db import Author, Book, Extension, Genre, Language, \ insert_name, insert_author @@ -26,12 +27,6 @@ def split_line(line): def import_inp_line(archive, parts): authors, genres, title, series, ser_no, file, size, lib_id, deleted, \ extension, date, language = parts - try: - Book.archive_file_idx.get(archive, file) - except SQLObjectNotFound: - pass - else: - return try: ser_no = int(ser_no) except ValueError: @@ -71,8 +66,17 @@ def import_inp_line(archive, parts): def import_inp(archive, inp): + files = set() + connection = sqlhub.processConnection + for file, in connection.queryAll(connection.sqlrepr( + Select(Book.q.file, Book.q.archive == archive))): + files.add(file) for line in inp: - import_inp_line(archive, split_line(line)) + parts = split_line(line) + file = parts[5] + if file not in files: + files.add(file) + import_inp_line(archive, parts) def import_inpx(path): -- 2.39.2