From 24b8388fe8972e17c7982bbe0768eca2e54b8f1e Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Thu, 17 May 2018 00:10:06 +0300 Subject: [PATCH] Feat(inpx): Import new (17 fields) INP --- m_librarian/inp.py | 27 ++++++++++++++++++++------- tests/test.inpx | Bin 394 -> 686 bytes tests/test_inp.py | 4 ++-- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/m_librarian/inp.py b/m_librarian/inp.py index 76ab97b..4b772af 100644 --- a/m_librarian/inp.py +++ b/m_librarian/inp.py @@ -17,11 +17,18 @@ def split_line(line): _l = len(parts) if _l < 11: raise ValueError('Unknown INP structure: "%s"' % line) + archive = None if _l == 11: # Standard structure parts.append(None) # Emulate lang - else: # New structure + elif _l == 15: # New structure parts = parts[:12] - return parts + elif _l == 17: # Very new structure + archive = parts[12] + language = parts[13] + parts = parts[:11] + [language] + else: # New structure + raise ValueError('Unknown INP structure: "%s"' % line) + return archive, parts def import_inp_line(archive, parts): @@ -66,18 +73,24 @@ def import_inp_line(archive, parts): def import_inp(archive, inp): + archives = set() files = set() connection = sqlhub.processConnection for file, in connection.queryAll(connection.sqlrepr( Select(Book.q.file, Book.q.archive == archive))): - files.add(file) + files.add((archive, file)) for line in inp: line = line.decode('utf-8') - parts = split_line(line) + _archive, parts = split_line(line) + if _archive and (_archive not in archives): + archives.add(_archive) + for file, in connection.queryAll(connection.sqlrepr( + Select(Book.q.file, Book.q.archive == _archive))): + files.add((_archive, file)) file = parts[5] - if file not in files: - files.add(file) - import_inp_line(archive, parts) + if (_archive or archive, file) not in files: + files.add((_archive or archive, file)) + import_inp_line(_archive or archive, parts) def import_inpx(path, pbar_cb=None): diff --git a/tests/test.inpx b/tests/test.inpx index f794270360d78167fe59d7b7b6edd261a818324a..e364a6f56d074cbfe9aeee5492cde0a7cf4d542a 100644 GIT binary patch delta 402 zcmeBTUdK8?N}7v7h9NJtT-VT4FEg(oG=!6ZIX8_pe9fFcQ4@{7)-$t+FfcK2FzjL2 z=wsmi%c6yWfuS3S`GDHWQj03VTFv2FjpzP}`UTWlTEWf0$nt`jfdNeHHu#V0+$A;fY=UaQlx5ee`xUnSpXwaNFlJh4<#!QM2$z)BMzCCN^!tl}D2vTxzYTG>y?aP%}VTUz{Ppn~_P58R8wot!*AYlirs<0ea;kBLf2$ zg9MNT0ZSS|G$c$WM=;7|AhcnI0ZC diff --git a/tests/test_inp.py b/tests/test_inp.py index 75bcd01..f51e730 100644 --- a/tests/test_inp.py +++ b/tests/test_inp.py @@ -11,5 +11,5 @@ def test_import_bad_inpx(): def test_import_inpx(): load_inpx('test.inpx') - assert Author.select().count() == 4 - assert Book.select().count() == 4 + assert Author.select().count() == 5 + assert Book.select().count() == 5 -- 2.39.2