Merge branch 'with-dvcs'
[redakcja.git] / apps / catalogue / management / commands / import_wl.py
index 6836d36..4f15cef 100755 (executable)
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from collections import defaultdict
 import json
 from optparse import make_option
 import urllib2
@@ -35,9 +36,9 @@ class Command(BaseCommand):
         transaction.managed(True)
 
         if verbose:
-            print 'Reading currently managed files.'
-        slugs = {}
-        for b in Book.objects.all():
+            print 'Reading currently managed files (skipping hidden ones).'
+        slugs = defaultdict(list)
+        for b in Book.objects.exclude(slug__startswith='.').all():
             if verbose:
                 print b.slug
             text = b.materialize().encode('utf-8')
@@ -46,28 +47,49 @@ class Command(BaseCommand):
             except (ParseError, ValidationError):
                 pass
             else:
-                slugs[info.slug] = b
+                slugs[info.slug].append(b)
+
+        #~ conflicts = []
+        #~ for slug, book_list in slugs.items():
+            #~ if len(book_list) > 1:
+                #~ conflicts.append((slug, book_list))
+        #~ if conflicts:
+            #~ print self.style.ERROR("There is more than one book "
+                    #~ "with the same slug in dc:url. "
+                    #~ "Merge or hide them before proceeding.")
+            #~ for slug, book_list in sorted(conflicts):
+                #~ print slug
+                #~ print "\n".join(b.slug for b in book_list)
+                #~ print
+            #~ return
 
         book_count = 0
         commit_args = {
             "author_name": 'Platforma',
-            "description": 'Import from WL',
+            "description": 'Automatycznie zaimportowane z Wolnych Lektur',
+            "publishable": True,
         }
 
         if verbose:
             print 'Opening books list'
-        for book in json.load(urllib2.urlopen(WL_API)):
+        for book in json.load(urllib2.urlopen(WL_API))[:10]:
             book_detail = json.load(urllib2.urlopen(book['href']))
             xml_text = urllib2.urlopen(book_detail['xml']).read()
             info = BookInfo.from_string(xml_text)
-            previous_book = slugs.get(info.slug, None)
-            if previous_book:
+            previous_books = slugs.get(info.slug)
+            if previous_books:
+                if len(previous_books) > 1:
+                    print self.style.ERROR("There is more than one book "
+                        "with slug %s:"), 
+                previous_book = previous_books[0]
                 comm = previous_book.slug
             else:
+                previous_book = None
                 comm = '*'
             print book_count, info.slug , '-->', comm
             Book.import_xml_text(xml_text, title=info.title,
-                slug=info.slug, previous_book=slugs.get(info.slug, None))
+                slug=info.slug, previous_book=previous_book,
+                commit_args=commit_args)
             book_count += 1
 
         # Print results