]> git.phdru.name Git - m_librarian.git/commitdiff
Prevent duplicate authors
authorOleg Broytman <phd@phdru.name>
Thu, 11 Feb 2016 22:00:29 +0000 (01:00 +0300)
committerOleg Broytman <phd@phdru.name>
Thu, 11 Feb 2016 22:00:29 +0000 (01:00 +0300)
Some INP contain duplicate author names like this:
Author,Name:Author,Name

m_librarian/inp.py

index 56f85e4d1dcdfb4561fe0ac08cc4d364c291f25a..09cd42be2151a5e7b11dda19673cbada15578721 100644 (file)
@@ -45,8 +45,13 @@ def import_inp_line(archive, parts):
                 lib_id=lib_id, deleted=deleted,
                 extension=extension_row, date=date,
                 language=language_row)
-    for author in authors.split(':'):
+    authors = authors.split(':')
+    seen_authors = set()
+    for author in authors:
         if author:
+            if author in seen_authors:
+                continue
+            seen_authors.add(author)
             alist = author.split(',', 2)
             surname = alist[0]
             if len(alist) > 1: