slug updates
[redakcja.git] / apps / catalogue / management / commands / import_pad.py
1 # -*- coding: utf-8 -*-
2 from slughifi import slughifi
3 from collections import defaultdict
4 import json
5 from optparse import make_option
6 import urllib2
7
8 from py_etherpad import EtherpadLiteClient
9 from django.core.management.base import BaseCommand
10 from django.core.management.color import color_style
11 from django.db import transaction
12 from librarian.dcparser import BookInfo
13 from librarian import ParseError, ValidationError, WLURI
14 from django.conf import settings
15 from catalogue.models import Book
16 from catalogue.management import auto_taggers
17 import re
18
19
20 class Command(BaseCommand):
21     option_list = BaseCommand.option_list + (
22         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
23             help='Less output'),
24         make_option('-p', '--pad', dest='pad_id', help='Pad Id (or many id\'s, comma separated)'),
25         make_option('-P', '--pad-ids', dest='pad_ids_file', help='Read Pad id\'s from file'),
26         make_option('-E', '--edumed', dest="tag_edumed", default=False,
27                     action='store_true', help="Perform EduMed pre-tagging"),
28         make_option('-a', '--autotagger', dest="auto_tagger", default=None, help="Use auto-tagger (one of: %s)" % ', '.join(auto_taggers.keys())),
29         make_option('-S', '--use-pad-prefix', dest="pad_prefix", default=False, action='store_true', help="use pad name prefix in slug"),
30     )
31     help = 'Imports Text files from EtherPad Lite.'
32
33     def handle(self, *args, **options):
34
35         self.style = color_style()
36
37         verbose = options.get('verbose')
38         pad_ids_file = options.get('pad_ids_file')
39         if pad_ids_file:
40             pad_id = open(pad_ids_file).readlines()
41         else:
42             pad_id = options.get("pad_id").split(',')
43         pad_id = map(str.strip, pad_id)
44
45         # Start transaction management.
46         transaction.commit_unless_managed()
47         transaction.enter_transaction_management()
48         transaction.managed(True)
49
50         if verbose:
51             print 'Reading currently managed files (skipping hidden ones).'
52         slugs = defaultdict(list)
53         for b in Book.objects.exclude(slug__startswith='.').all():
54             if verbose:
55                 print b.slug
56             text = b.materialize().encode('utf-8')
57
58             try:
59                 info = BookInfo.from_string(text)
60                 slugs[info.url.slug].append(b)
61             except (ParseError, ValidationError):
62                 slugs[b.slug].append(b)
63
64         book_count = 0
65         commit_args = {
66             "author_name": 'Platforma',
67             "description": 'Automatycznie zaimportowane z EtherPad',
68             "publishable": False,
69         }
70
71         if verbose:
72             print 'Opening Pad'
73         pad = EtherpadLiteClient(settings.ETHERPAD_APIKEY, settings.ETHERPAD_URL)
74
75         for pid in pad_id:
76             try:
77                 text = pad.getText(pid)['text']
78             except ValueError:
79                 print "pad '%s' does not exist" % pid
80                 continue
81
82             open("/tmp/pad_%s.txt" % pid, 'w').write(text.encode('utf-8'))
83             
84             if options.get('tag_edumed'):
85                 auto_tagger = 'edumed'
86             else:
87                 auto_tagger = options.get('auto_tagger')
88             if auto_tagger:
89                 text = auto_taggers[auto_tagger](text)
90             try:
91                 info = BookInfo.from_string(text.encode('utf-8'))
92                 slug = info.url.slug
93             except (ParseError, ValidationError):
94                 slug = slughifi(pid)
95
96             print "Importing %s (slug %s)..." % (pid, slug)
97             title = pid
98
99             #            print slugs, slug
100             previous_books = slugs.get(slug)
101             if previous_books:
102                 if len(previous_books) > 1:
103                     print self.style.ERROR("There is more than one book "
104                         "with slug %s:" % slug),
105                 previous_book = previous_books[0]
106                 comm = previous_book.slug
107             else:
108                 previous_book = None
109                 comm = '*'
110             print book_count, slug, '-->', comm
111
112             # add pad prefix now.
113             if options.get('pad_prefix'):
114                 pad_prefix = re.split(r"[-_]", pid)[0]
115                 slug = pad_prefix + "-" + slug
116                 
117             if previous_book:
118                 book = previous_book
119                 book.slug = slug
120             else:
121                 book = Book()
122                 book.slug = slug
123             book.title = title
124             book.save()
125
126             if len(book) > 0:
127                 chunk = book[0]
128                 chunk.slug = slug[:50]
129                 chunk.title = title[:255]
130                 chunk.save()
131             else:
132                 chunk = book.add(slug, title)
133
134             chunk.commit(text, **commit_args)
135
136             book_count += 1
137
138         # Print results
139         print
140         print "Results:"
141         print "Imported %d books from Pad" % book_count
142
143         transaction.commit()
144         transaction.leave_transaction_management()