X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/65f4793d71ca86c80a7f4f7974e8d65a3ebe6f25..df9d185f88fa914f619261ea5564003bb93a6041:/apps/catalogue/management/commands/import_pad.py diff --git a/apps/catalogue/management/commands/import_pad.py b/apps/catalogue/management/commands/import_pad.py index 205c4765..dbfe7992 100644 --- a/apps/catalogue/management/commands/import_pad.py +++ b/apps/catalogue/management/commands/import_pad.py @@ -10,10 +10,11 @@ from django.core.management.base import BaseCommand from django.core.management.color import color_style from django.db import transaction from librarian.dcparser import BookInfo -from librarian import ParseError, ValidationError +from librarian import ParseError, ValidationError, WLURI from django.conf import settings from catalogue.models import Book from catalogue.management import auto_taggers +import re class Command(BaseCommand): @@ -25,6 +26,7 @@ class Command(BaseCommand): make_option('-E', '--edumed', dest="tag_edumed", default=False, action='store_true', help="Perform EduMed pre-tagging"), make_option('-a', '--autotagger', dest="auto_tagger", default=None, help="Use auto-tagger (one of: %s)" % ', '.join(auto_taggers.keys())), + make_option('-S', '--use-pad-prefix', dest="pad_prefix", default=False, action='store_true', help="use pad name prefix in slug"), ) help = 'Imports Text files from EtherPad Lite.' @@ -52,9 +54,10 @@ class Command(BaseCommand): if verbose: print b.slug text = b.materialize().encode('utf-8') + try: info = BookInfo.from_string(text) - slugs[info.slug].append(b) + slugs[info.url.slug].append(b) except (ParseError, ValidationError): slugs[b.slug].append(b) @@ -75,9 +78,25 @@ class Command(BaseCommand): except ValueError: print "pad '%s' does not exist" % pid continue - slug = slughifi(pid) + + open("/tmp/pad_%s.txt" % pid, 'w').write(text.encode('utf-8')) + + if options.get('tag_edumed'): + auto_tagger = 'edumed' + else: + auto_tagger = options.get('auto_tagger') + if auto_tagger: + text = auto_taggers[auto_tagger](text) + try: + info = BookInfo.from_string(text.encode('utf-8')) + slug = info.url.slug + except (ParseError, ValidationError): + slug = slughifi(pid) + + print "Importing %s (slug %s)..." % (pid, slug) title = pid + # print slugs, slug previous_books = slugs.get(slug) if previous_books: if len(previous_books) > 1: @@ -90,8 +109,14 @@ class Command(BaseCommand): comm = '*' print book_count, slug, '-->', comm + # add pad prefix now. + if options.get('pad_prefix'): + pad_prefix = re.split(r"[-_]", pid)[0] + slug = pad_prefix + "-" + slug + if previous_book: book = previous_book + book.slug = slug else: book = Book() book.slug = slug @@ -106,12 +131,6 @@ class Command(BaseCommand): else: chunk = book.add(slug, title) - if options.get('tag_edumed'): - auto_tagger = 'edumed' - else: - auto_tagger = options.get('auto_tagger') - if auto_tagger: - text = auto_taggers[auto_tagger](text) chunk.commit(text, **commit_args) book_count += 1