1 # -*- coding: utf-8 -*-
2 from slughifi import slughifi
3 from collections import defaultdict
5 from optparse import make_option
8 from py_etherpad import EtherpadLiteClient
9 from django.core.management.base import BaseCommand
10 from django.core.management.color import color_style
11 from django.db import transaction
12 from librarian.dcparser import BookInfo
13 from librarian import ParseError, ValidationError, WLURI
14 from django.conf import settings
15 from catalogue.models import Book
16 from catalogue.management import auto_taggers
20 class Command(BaseCommand):
21 option_list = BaseCommand.option_list + (
22 make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
24 make_option('-p', '--pad', dest='pad_id', help='Pad Id (or many id\'s, comma separated)'),
25 make_option('-P', '--pad-ids', dest='pad_ids_file', help='Read Pad id\'s from file'),
26 make_option('-E', '--edumed', dest="tag_edumed", default=False,
27 action='store_true', help="Perform EduMed pre-tagging"),
28 make_option('-a', '--autotagger', dest="auto_tagger", default=None, help="Use auto-tagger (one of: %s)" % ', '.join(auto_taggers.keys())),
29 make_option('-S', '--use-pad-prefix', dest="pad_prefix", default=False, action='store_true', help="use pad name prefix in slug"),
31 help = 'Imports Text files from EtherPad Lite.'
33 def handle(self, *args, **options):
35 self.style = color_style()
37 verbose = options.get('verbose')
38 pad_ids_file = options.get('pad_ids_file')
40 pad_id = open(pad_ids_file).readlines()
42 pad_id = options.get("pad_id").split(',')
43 pad_id = map(str.strip, pad_id)
45 # Start transaction management.
46 transaction.commit_unless_managed()
47 transaction.enter_transaction_management()
48 transaction.managed(True)
51 print 'Reading currently managed files (skipping hidden ones).'
52 slugs = defaultdict(list)
53 for b in Book.objects.exclude(slug__startswith='.').all():
56 text = b.materialize().encode('utf-8')
59 info = BookInfo.from_string(text)
60 slugs[info.url.slug].append(b)
61 except (ParseError, ValidationError):
62 slugs[b.slug].append(b)
66 "author_name": 'Platforma',
67 "description": 'Automatycznie zaimportowane z EtherPad',
73 pad = EtherpadLiteClient(settings.ETHERPAD_APIKEY, settings.ETHERPAD_URL)
77 text = pad.getText(pid)['text']
79 print "pad '%s' does not exist" % pid
82 open("/tmp/pad_%s.txt" % pid, 'w').write(text.encode('utf-8'))
84 if options.get('tag_edumed'):
85 auto_tagger = 'edumed'
87 auto_tagger = options.get('auto_tagger')
89 text = auto_taggers[auto_tagger](text)
91 info = BookInfo.from_string(text.encode('utf-8'))
93 except (ParseError, ValidationError):
96 print "Importing %s (slug %s)..." % (pid, slug)
100 previous_books = slugs.get(slug)
102 if len(previous_books) > 1:
103 print self.style.ERROR("There is more than one book "
104 "with slug %s:" % slug),
105 previous_book = previous_books[0]
106 comm = previous_book.slug
110 print book_count, slug, '-->', comm
112 # add pad prefix now.
113 if options.get('pad_prefix'):
114 pad_prefix = re.split(r"[-_]", pid)[0]
115 slug = pad_prefix + "-" + slug
128 chunk.slug = slug[:50]
129 chunk.title = title[:255]
132 chunk = book.add(slug, title)
134 chunk.commit(text, **commit_args)
141 print "Imported %d books from Pad" % book_count
144 transaction.leave_transaction_management()