Updating flickr html parsing, fixing #2688
[redakcja.git] / apps / catalogue / management / commands / import_wl.py
1 # -*- coding: utf-8 -*-
2
3 from collections import defaultdict
4 import json
5 from optparse import make_option
6 import urllib2
7
8 from django.core.management.base import BaseCommand
9 from django.core.management.color import color_style
10 from django.db import transaction
11 from librarian.dcparser import BookInfo
12 from librarian import ParseError, ValidationError
13
14 from catalogue.models import Book
15
16
17 WL_API = 'http://www.wolnelektury.pl/api/books/'
18
19
20 class Command(BaseCommand):
21     option_list = BaseCommand.option_list + (
22         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
23             help='Less output'),
24     )
25     help = 'Imports XML files from WL.'
26
27     def handle(self, *args, **options):
28
29         self.style = color_style()
30
31         verbose = options.get('verbose')
32
33         # Start transaction management.
34         transaction.commit_unless_managed()
35         transaction.enter_transaction_management()
36         transaction.managed(True)
37
38         if verbose:
39             print 'Reading currently managed files (skipping hidden ones).'
40         slugs = defaultdict(list)
41         for b in Book.objects.exclude(slug__startswith='.').all():
42             if verbose:
43                 print b.slug
44             text = b.materialize().encode('utf-8')
45             try:
46                 info = BookInfo.from_string(text)
47             except (ParseError, ValidationError):
48                 pass
49             else:
50                 slugs[info.slug].append(b)
51
52         book_count = 0
53         commit_args = {
54             "author_name": 'Platforma',
55             "description": 'Automatycznie zaimportowane z Wolnych Lektur',
56             "publishable": True,
57         }
58
59         if verbose:
60             print 'Opening books list'
61         for book in json.load(urllib2.urlopen(WL_API)):
62             book_detail = json.load(urllib2.urlopen(book['href']))
63             xml_text = urllib2.urlopen(book_detail['xml']).read()
64             info = BookInfo.from_string(xml_text)
65             previous_books = slugs.get(info.slug)
66             if previous_books:
67                 if len(previous_books) > 1:
68                     print self.style.ERROR("There is more than one book "
69                         "with slug %s:"), 
70                 previous_book = previous_books[0]
71                 comm = previous_book.slug
72             else:
73                 previous_book = None
74                 comm = '*'
75             print book_count, info.slug , '-->', comm
76             Book.import_xml_text(xml_text, title=info.title[:255],
77                 slug=info.slug[:128], previous_book=previous_book,
78                 commit_args=commit_args)
79             book_count += 1
80
81         # Print results
82         print
83         print "Results:"
84         print "Imported %d books from WL:" % (
85                 book_count, )
86         print
87
88
89         transaction.commit()
90         transaction.leave_transaction_management()
91