nearly working version
[redakcja.git] / apps / catalogue / management / commands / import_wl.py
1 # -*- coding: utf-8 -*-
2
3 from collections import defaultdict
4 import json
5 from optparse import make_option
6 import urllib2
7
8 from django.core.management.base import BaseCommand
9 from django.core.management.color import color_style
10 from django.db import transaction
11 from librarian.dcparser import BookInfo
12 from librarian import ParseError, ValidationError
13
14 from catalogue.models import Book
15
16
17 WL_API = 'http://www.wolnelektury.pl/api/books/'
18
19
20 class Command(BaseCommand):
21     option_list = BaseCommand.option_list + (
22         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
23             help='Less output'),
24     )
25     help = 'Imports XML files from WL.'
26
27     def handle(self, *args, **options):
28
29         self.style = color_style()
30
31         verbose = options.get('verbose')
32
33         # Start transaction management.
34         transaction.commit_unless_managed()
35         transaction.enter_transaction_management()
36         transaction.managed(True)
37
38         if verbose:
39             print 'Reading currently managed files (skipping hidden ones).'
40         slugs = defaultdict(list)
41         for b in Book.objects.exclude(slug__startswith='.').all():
42             if verbose:
43                 print b.slug
44             text = b.materialize().encode('utf-8')
45             try:
46                 info = BookInfo.from_string(text)
47             except (ParseError, ValidationError):
48                 pass
49             else:
50                 slugs[info.slug].append(b)
51
52         #~ conflicts = []
53         #~ for slug, book_list in slugs.items():
54             #~ if len(book_list) > 1:
55                 #~ conflicts.append((slug, book_list))
56         #~ if conflicts:
57             #~ print self.style.ERROR("There is more than one book "
58                     #~ "with the same slug in dc:url. "
59                     #~ "Merge or hide them before proceeding.")
60             #~ for slug, book_list in sorted(conflicts):
61                 #~ print slug
62                 #~ print "\n".join(b.slug for b in book_list)
63                 #~ print
64             #~ return
65
66         book_count = 0
67         commit_args = {
68             "author_name": 'Platforma',
69             "description": 'Automatycznie zaimportowane z Wolnych Lektur',
70             "publishable": True,
71         }
72
73         if verbose:
74             print 'Opening books list'
75         for book in json.load(urllib2.urlopen(WL_API))[:10]:
76             book_detail = json.load(urllib2.urlopen(book['href']))
77             xml_text = urllib2.urlopen(book_detail['xml']).read()
78             info = BookInfo.from_string(xml_text)
79             previous_books = slugs.get(info.slug)
80             if previous_books:
81                 if len(previous_books) > 1:
82                     print self.style.ERROR("There is more than one book "
83                         "with slug %s:"), 
84                 previous_book = previous_books[0]
85                 comm = previous_book.slug
86             else:
87                 previous_book = None
88                 comm = '*'
89             print book_count, info.slug , '-->', comm
90             Book.import_xml_text(xml_text, title=info.title,
91                 slug=info.slug, previous_book=previous_book,
92                 commit_args=commit_args)
93             book_count += 1
94
95         # Print results
96         print
97         print "Results:"
98         print "Imported %d books from WL:" % (
99                 book_count, )
100         print
101
102
103         transaction.commit()
104         transaction.leave_transaction_management()
105