import edumed z pad: poprawki
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Mon, 12 Nov 2012 14:13:35 +0000 (15:13 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Mon, 12 Nov 2012 14:13:35 +0000 (15:13 +0100)
apps/catalogue/management/commands/import_pad.py
apps/catalogue/management/edumed.py

index 205c476..8032d94 100644 (file)
@@ -10,7 +10,7 @@ from django.core.management.base import BaseCommand
 from django.core.management.color import color_style
 from django.db import transaction
 from librarian.dcparser import BookInfo
-from librarian import ParseError, ValidationError
+from librarian import ParseError, ValidationError, WLURI
 from django.conf import settings
 from catalogue.models import Book
 from catalogue.management import auto_taggers
@@ -54,7 +54,7 @@ class Command(BaseCommand):
             text = b.materialize().encode('utf-8')
             try:
                 info = BookInfo.from_string(text)
-                slugs[info.slug].append(b)
+                slugs[info.url.slug].append(b)
             except (ParseError, ValidationError):
                 slugs[b.slug].append(b)
 
@@ -76,8 +76,10 @@ class Command(BaseCommand):
                 print "pad '%s' does not exist" % pid
                 continue
             slug = slughifi(pid)
+            print "Importing %s..." % pid
             title = pid
 
+            print slugs, slug
             previous_books = slugs.get(slug)
             if previous_books:
                 if len(previous_books) > 1:
index e5d5ee0..ffad2ec 100644 (file)
@@ -15,14 +15,18 @@ class Tagger:
     def line(self, position):
         return self.lines[position]
 
-    empty_line = re.compile(r"\s+")
+    ignore = [ re.compile(r"^[\[][PA][\]] - [^ ]+$") ]
+    empty_line = re.compile(r"^\s+$")
     
     def skip_empty(self, position):
         while self.line(position) == "" or \
-            self.empty_line.match(self.line(position)):
+            self.empty_line.match(self.line(position)) or \
+            filter(lambda r: r.match(self.line(position)),
+                             self.ignore[:]):
             position += 1
         return position
 
+
     def tag(self, position):
         """
 Return None -- means that we can't tag it in any way
@@ -105,7 +109,7 @@ class List(Tagger):
 
 class Paragraph(Tagger):
     remove_this = [
-        re.compile(r"[\s]*opis zawartości[\s]*", re.I),
+        re.compile(r"[\s]*opis zawarto.ci[\s]*", re.I),
         re.compile(r"^[\s]*$")
         ]
     podrozdzial = [