X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/65f4793d71ca86c80a7f4f7974e8d65a3ebe6f25..78e13f577ab9db6cb8cc181833e351ab792cebe9:/apps/catalogue/management/edumed.py

diff --git a/apps/catalogue/management/edumed.py b/apps/catalogue/management/edumed.py
index e5d5ee0a..55269d1e 100644
--- a/apps/catalogue/management/edumed.py
+++ b/apps/catalogue/management/edumed.py
@@ -15,11 +15,14 @@ class Tagger:
     def line(self, position):
         return self.lines[position]
 
-    empty_line = re.compile(r"\s+")
+    ignore = [re.compile(r"^[\[][PA][\]] - [^ ]+$")]
+    empty_line = re.compile(r"^\s+$")
     
     def skip_empty(self, position):
         while self.line(position) == "" or \
-            self.empty_line.match(self.line(position)):
+            self.empty_line.match(self.line(position)) or \
+            filter(lambda r: r.match(self.line(position)),
+                             self.ignore[:]):
             position += 1
         return position
 
@@ -32,6 +35,11 @@ Return None -- means that we can't tag it in any way
     def wrap(self, tagname, content):
         return u"<%s>%s</%s>" % (tagname, content, tagname)
 
+    @staticmethod
+    def anymatches(regex):
+        return lambda x: regex.match(x)
+        
+
 
 class Section(Tagger):
     looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$")
@@ -83,11 +91,14 @@ class Informacje(Tagger):
 
 
 class List(Tagger):
+    point = re.compile(r"^[\s]*([-*])")
+    
     def tag(self, pos):
         self.items = []
         while True:
             l = self.line(pos)
-            if l and l[0] in ('-', '*'):
+            m = self.point.match(l)
+            if l and m:
                 self.items.append(l[1:].strip())
                 pos += 1
             else:
@@ -96,16 +107,16 @@ class List(Tagger):
             return pos
 
     def __unicode__(self):
-        s = "<lista>\n"
+        s = '<lista typ="punkt">'
         for i in self.items:
-            s += "<punkt>%s</punkt>\n" % i
-        s += "</lista>\n"
+            s += "\n<punkt>%s</punkt>" % i
+        s += "\n</lista>\n"
         return s
 
 
 class Paragraph(Tagger):
     remove_this = [
-        re.compile(r"[\s]*opis zawartoÅci[\s]*", re.I),
+        re.compile(r"[\s]*opis zawarto.ci[\s]*", re.I),
         re.compile(r"^[\s]*$")
         ]
     podrozdzial = [