Merge branch 'master' into edumed
[redakcja.git] / apps / catalogue / management / commands / import_pad.py
index 205c476..dbfe799 100644 (file)
@@ -10,10 +10,11 @@ from django.core.management.base import BaseCommand
 from django.core.management.color import color_style
 from django.db import transaction
 from librarian.dcparser import BookInfo
-from librarian import ParseError, ValidationError
+from librarian import ParseError, ValidationError, WLURI
 from django.conf import settings
 from catalogue.models import Book
 from catalogue.management import auto_taggers
+import re
 
 
 class Command(BaseCommand):
@@ -25,6 +26,7 @@ class Command(BaseCommand):
         make_option('-E', '--edumed', dest="tag_edumed", default=False,
                     action='store_true', help="Perform EduMed pre-tagging"),
         make_option('-a', '--autotagger', dest="auto_tagger", default=None, help="Use auto-tagger (one of: %s)" % ', '.join(auto_taggers.keys())),
+        make_option('-S', '--use-pad-prefix', dest="pad_prefix", default=False, action='store_true', help="use pad name prefix in slug"),
     )
     help = 'Imports Text files from EtherPad Lite.'
 
@@ -52,9 +54,10 @@ class Command(BaseCommand):
             if verbose:
                 print b.slug
             text = b.materialize().encode('utf-8')
+
             try:
                 info = BookInfo.from_string(text)
-                slugs[info.slug].append(b)
+                slugs[info.url.slug].append(b)
             except (ParseError, ValidationError):
                 slugs[b.slug].append(b)
 
@@ -75,9 +78,25 @@ class Command(BaseCommand):
             except ValueError:
                 print "pad '%s' does not exist" % pid
                 continue
-            slug = slughifi(pid)
+
+            open("/tmp/pad_%s.txt" % pid, 'w').write(text.encode('utf-8'))
+            
+            if options.get('tag_edumed'):
+                auto_tagger = 'edumed'
+            else:
+                auto_tagger = options.get('auto_tagger')
+            if auto_tagger:
+                text = auto_taggers[auto_tagger](text)
+            try:
+                info = BookInfo.from_string(text.encode('utf-8'))
+                slug = info.url.slug
+            except (ParseError, ValidationError):
+                slug = slughifi(pid)
+
+            print "Importing %s (slug %s)..." % (pid, slug)
             title = pid
 
+            #            print slugs, slug
             previous_books = slugs.get(slug)
             if previous_books:
                 if len(previous_books) > 1:
@@ -90,8 +109,14 @@ class Command(BaseCommand):
                 comm = '*'
             print book_count, slug, '-->', comm
 
+            # add pad prefix now.
+            if options.get('pad_prefix'):
+                pad_prefix = re.split(r"[-_]", pid)[0]
+                slug = pad_prefix + "-" + slug
+                
             if previous_book:
                 book = previous_book
+                book.slug = slug
             else:
                 book = Book()
                 book.slug = slug
@@ -106,12 +131,6 @@ class Command(BaseCommand):
             else:
                 chunk = book.add(slug, title)
 
-            if options.get('tag_edumed'):
-                auto_tagger = 'edumed'
-            else:
-                auto_tagger = options.get('auto_tagger')
-            if auto_tagger:
-                text = auto_taggers[auto_tagger](text)
             chunk.commit(text, **commit_args)
 
             book_count += 1