import edumed z pad: poprawki
[redakcja.git] / apps / catalogue / management / commands / import_pad.py
1 # -*- coding: utf-8 -*-
2 from slughifi import slughifi
3 from collections import defaultdict
4 import json
5 from optparse import make_option
6 import urllib2
7
8 from py_etherpad import EtherpadLiteClient
9 from django.core.management.base import BaseCommand
10 from django.core.management.color import color_style
11 from django.db import transaction
12 from librarian.dcparser import BookInfo
13 from librarian import ParseError, ValidationError, WLURI
14 from django.conf import settings
15 from catalogue.models import Book
16 from catalogue.management import auto_taggers
17
18
19 class Command(BaseCommand):
20     option_list = BaseCommand.option_list + (
21         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
22             help='Less output'),
23         make_option('-p', '--pad', dest='pad_id', help='Pad Id (or many id\'s, comma separated)'),
24         make_option('-P', '--pad-ids', dest='pad_ids_file', help='Read Pad id\'s from file'),
25         make_option('-E', '--edumed', dest="tag_edumed", default=False,
26                     action='store_true', help="Perform EduMed pre-tagging"),
27         make_option('-a', '--autotagger', dest="auto_tagger", default=None, help="Use auto-tagger (one of: %s)" % ', '.join(auto_taggers.keys())),
28     )
29     help = 'Imports Text files from EtherPad Lite.'
30
31     def handle(self, *args, **options):
32
33         self.style = color_style()
34
35         verbose = options.get('verbose')
36         pad_ids_file = options.get('pad_ids_file')
37         if pad_ids_file:
38             pad_id = open(pad_ids_file).readlines()
39         else:
40             pad_id = options.get("pad_id").split(',')
41         pad_id = map(str.strip, pad_id)
42
43         # Start transaction management.
44         transaction.commit_unless_managed()
45         transaction.enter_transaction_management()
46         transaction.managed(True)
47
48         if verbose:
49             print 'Reading currently managed files (skipping hidden ones).'
50         slugs = defaultdict(list)
51         for b in Book.objects.exclude(slug__startswith='.').all():
52             if verbose:
53                 print b.slug
54             text = b.materialize().encode('utf-8')
55             try:
56                 info = BookInfo.from_string(text)
57                 slugs[info.url.slug].append(b)
58             except (ParseError, ValidationError):
59                 slugs[b.slug].append(b)
60
61         book_count = 0
62         commit_args = {
63             "author_name": 'Platforma',
64             "description": 'Automatycznie zaimportowane z EtherPad',
65             "publishable": False,
66         }
67
68         if verbose:
69             print 'Opening Pad'
70         pad = EtherpadLiteClient(settings.ETHERPAD_APIKEY, settings.ETHERPAD_URL)
71
72         for pid in pad_id:
73             try:
74                 text = pad.getText(pid)['text']
75             except ValueError:
76                 print "pad '%s' does not exist" % pid
77                 continue
78             slug = slughifi(pid)
79             print "Importing %s..." % pid
80             title = pid
81
82             print slugs, slug
83             previous_books = slugs.get(slug)
84             if previous_books:
85                 if len(previous_books) > 1:
86                     print self.style.ERROR("There is more than one book "
87                         "with slug %s:" % slug),
88                 previous_book = previous_books[0]
89                 comm = previous_book.slug
90             else:
91                 previous_book = None
92                 comm = '*'
93             print book_count, slug, '-->', comm
94
95             if previous_book:
96                 book = previous_book
97             else:
98                 book = Book()
99                 book.slug = slug
100             book.title = title
101             book.save()
102
103             if len(book) > 0:
104                 chunk = book[0]
105                 chunk.slug = slug[:50]
106                 chunk.title = title[:255]
107                 chunk.save()
108             else:
109                 chunk = book.add(slug, title)
110
111             if options.get('tag_edumed'):
112                 auto_tagger = 'edumed'
113             else:
114                 auto_tagger = options.get('auto_tagger')
115             if auto_tagger:
116                 text = auto_taggers[auto_tagger](text)
117             chunk.commit(text, **commit_args)
118
119             book_count += 1
120
121         # Print results
122         print
123         print "Results:"
124         print "Imported %d books from Pad" % book_count
125
126         transaction.commit()
127         transaction.leave_transaction_management()