Fix
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Mon, 12 Nov 2012 14:58:49 +0000 (15:58 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Mon, 12 Nov 2012 14:58:49 +0000 (15:58 +0100)
apps/catalogue/management/commands/import_pad.py
apps/catalogue/management/edumed.py

index 8032d94..6e3ff4a 100644 (file)
@@ -75,11 +75,25 @@ class Command(BaseCommand):
             except ValueError:
                 print "pad '%s' does not exist" % pid
                 continue
-            slug = slughifi(pid)
-            print "Importing %s..." % pid
+
+            open("/tmp/pad_%s.txt" % pid, 'w').write(text.encode('utf-8'))
+            
+            if options.get('tag_edumed'):
+                auto_tagger = 'edumed'
+            else:
+                auto_tagger = options.get('auto_tagger')
+            if auto_tagger:
+                text = auto_taggers[auto_tagger](text)
+            try:
+                info = BookInfo.from_string(text.encode('utf-8'))
+                slug = info.url.slug
+            except (ParseError, ValidationError):
+                slug = slughifi(pid)
+
+            print "Importing %s (slug %s)..." % (pid, slug)
             title = pid
 
-            print slugs, slug
+            #            print slugs, slug
             previous_books = slugs.get(slug)
             if previous_books:
                 if len(previous_books) > 1:
@@ -94,6 +108,7 @@ class Command(BaseCommand):
 
             if previous_book:
                 book = previous_book
+                book.slug = slug
             else:
                 book = Book()
                 book.slug = slug
@@ -108,12 +123,6 @@ class Command(BaseCommand):
             else:
                 chunk = book.add(slug, title)
 
-            if options.get('tag_edumed'):
-                auto_tagger = 'edumed'
-            else:
-                auto_tagger = options.get('auto_tagger')
-            if auto_tagger:
-                text = auto_taggers[auto_tagger](text)
             chunk.commit(text, **commit_args)
 
             book_count += 1
index ffad2ec..55269d1 100644 (file)
@@ -15,7 +15,7 @@ class Tagger:
     def line(self, position):
         return self.lines[position]
 
-    ignore = [ re.compile(r"^[\[][PA][\]] - [^ ]+$") ]
+    ignore = [re.compile(r"^[\[][PA][\]] - [^ ]+$")]
     empty_line = re.compile(r"^\s+$")
     
     def skip_empty(self, position):
@@ -26,7 +26,6 @@ class Tagger:
             position += 1
         return position
 
-
     def tag(self, position):
         """
 Return None -- means that we can't tag it in any way
@@ -36,6 +35,11 @@ Return None -- means that we can't tag it in any way
     def wrap(self, tagname, content):
         return u"<%s>%s</%s>" % (tagname, content, tagname)
 
+    @staticmethod
+    def anymatches(regex):
+        return lambda x: regex.match(x)
+        
+
 
 class Section(Tagger):
     looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$")
@@ -87,11 +91,14 @@ class Informacje(Tagger):
 
 
 class List(Tagger):
+    point = re.compile(r"^[\s]*([-*])")
+    
     def tag(self, pos):
         self.items = []
         while True:
             l = self.line(pos)
-            if l and l[0] in ('-', '*'):
+            m = self.point.match(l)
+            if l and m:
                 self.items.append(l[1:].strip())
                 pos += 1
             else:
@@ -100,10 +107,10 @@ class List(Tagger):
             return pos
 
     def __unicode__(self):
-        s = "<lista>\n"
+        s = '<lista typ="punkt">'
         for i in self.items:
-            s += "<punkt>%s</punkt>\n" % i
-        s += "</lista>\n"
+            s += "\n<punkt>%s</punkt>" % i
+        s += "\n</lista>\n"
         return s