first take on The Great Import
[redakcja.git] / apps / catalogue / management / commands / import_wl.py
1 # -*- coding: utf-8 -*-
2
3 import json
4 from optparse import make_option
5 import urllib2
6
7 from django.core.management.base import BaseCommand
8 from django.core.management.color import color_style
9 from django.db import transaction
10 from librarian.dcparser import BookInfo
11 from librarian import ParseError, ValidationError
12
13 from catalogue.models import Book
14
15
16 WL_API = 'http://www.wolnelektury.pl/api/books/'
17
18
19 class Command(BaseCommand):
20     option_list = BaseCommand.option_list + (
21         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
22             help='Less output'),
23     )
24     help = 'Imports XML files from WL.'
25
26     def handle(self, *args, **options):
27
28         self.style = color_style()
29
30         verbose = options.get('verbose')
31
32         # Start transaction management.
33         transaction.commit_unless_managed()
34         transaction.enter_transaction_management()
35         transaction.managed(True)
36
37         if verbose:
38             print 'Reading currently managed files.'
39         slugs = {}
40         for b in Book.objects.all():
41             if verbose:
42                 print b.slug
43             text = b.materialize().encode('utf-8')
44             try:
45                 info = BookInfo.from_string(text)
46             except (ParseError, ValidationError):
47                 pass
48             else:
49                 slugs[info.slug] = b
50
51         book_count = 0
52         commit_args = {
53             "author_name": 'Platforma',
54             "description": 'Import from WL',
55         }
56
57         if verbose:
58             print 'Opening books list'
59         for book in json.load(urllib2.urlopen(WL_API)):
60             book_detail = json.load(urllib2.urlopen(book['href']))
61             xml_text = urllib2.urlopen(book_detail['xml']).read()
62             info = BookInfo.from_string(xml_text)
63             previous_book = slugs.get(info.slug, None)
64             if previous_book:
65                 comm = previous_book.slug
66             else:
67                 comm = '*'
68             print book_count, info.slug , '-->', comm
69             Book.import_xml_text(xml_text, title=info.title,
70                 slug=info.slug, previous_book=slugs.get(info.slug, None))
71             book_count += 1
72
73         # Print results
74         print
75         print "Results:"
76         print "Imported %d books from WL:" % (
77                 book_count, )
78         print
79
80
81         transaction.commit()
82         transaction.leave_transaction_management()
83