Fix dictionary auto-tagging

[redakcja.git] / apps / catalogue / management / edumed.py
diff --git a/apps/catalogue/management/edumed.py b/apps/catalogue/management/edumed.py

index ffad2ec..250eccb 100644 (file)
--- a/apps/catalogue/management/edumed.py
+++ b/apps/catalogue/management/edumed.py
@@ -15,7 +15,7 @@ class Tagger:
      def line(self, position):
          return self.lines[position]
  
      def line(self, position):
          return self.lines[position]
  
-    ignore = [ re.compile(r"^[\[][PA][\]] - [^ ]+$") ]
+    ignore = [re.compile(r"^[\[][PA][\]] - [^ ]+$")]
      empty_line = re.compile(r"^\s+$")
      
      def skip_empty(self, position):
      empty_line = re.compile(r"^\s+$")
      
      def skip_empty(self, position):
@@ -26,7 +26,6 @@ class Tagger:
              position += 1
          return position
  
              position += 1
          return position
  
-
      def tag(self, position):
          """
  Return None -- means that we can't tag it in any way
      def tag(self, position):
          """
  Return None -- means that we can't tag it in any way
@@ -36,6 +35,11 @@ Return None -- means that we can't tag it in any way
      def wrap(self, tagname, content):
          return u"<%s>%s</%s>" % (tagname, content, tagname)
  
      def wrap(self, tagname, content):
          return u"<%s>%s</%s>" % (tagname, content, tagname)
  
+    @staticmethod
+    def anymatches(regex):
+        return lambda x: regex.match(x)
+        
+
  
  class Section(Tagger):
      looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$")
  
  class Section(Tagger):
      looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$")
@@ -87,11 +91,14 @@ class Informacje(Tagger):
  
  
  class List(Tagger):
  
  
  class List(Tagger):
+    point = re.compile(r"^[\s]*([-*])")
+    
      def tag(self, pos):
          self.items = []
          while True:
              l = self.line(pos)
      def tag(self, pos):
          self.items = []
          while True:
              l = self.line(pos)
-            if l and l[0] in ('-', '*'):
+            m = self.point.match(l)
+            if l and m:
                  self.items.append(l[1:].strip())
                  pos += 1
              else:
                  self.items.append(l[1:].strip())
                  pos += 1
              else:
@@ -100,10 +107,10 @@ class List(Tagger):
              return pos
  
      def __unicode__(self):
              return pos
  
      def __unicode__(self):
-        s = "<lista>\n"
+        s = '<lista typ="punkt">'
          for i in self.items:
          for i in self.items:
-            s += "<punkt>%s</punkt>\n" % i
-        s += "</lista>\n"
+            s += "\n<punkt>%s</punkt>" % i
+        s += "\n</lista>\n"
          return s
  
  
          return s
  
  
@@ -219,7 +226,7 @@ returns auto-tagged text
              pos += 1
              if pos >= len(lines):
                  break
              pos += 1
              if pos >= len(lines):
                  break
-
+            
      return toxml(content)
  
  dc_fixed = {
      return toxml(content)
  
  dc_fixed = {
@@ -308,6 +315,7 @@ def mark_dictionary(content):
              elif db >= 1:
                  de = i
                  content[db:de] = [Container('slowniczek', *slowniczek)]
              elif db >= 1:
                  de = i
                  content[db:de] = [Container('slowniczek', *slowniczek)]
+                break
          elif db >= 0:
              if isinstance(e, Paragraph):
                  m = is_dictentry.match(e.line)
          elif db >= 0:
              if isinstance(e, Paragraph):
                  m = is_dictentry.match(e.line)