X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/e2d4fbf230c90dcf3e904351a70d399426478352..e71e4d92d30cc3dc88f7fe6ccb5d4b10866ed499:/apps/catalogue/management/edumed.py diff --git a/apps/catalogue/management/edumed.py b/apps/catalogue/management/edumed.py index ffad2ec1..250eccb7 100644 --- a/apps/catalogue/management/edumed.py +++ b/apps/catalogue/management/edumed.py @@ -15,7 +15,7 @@ class Tagger: def line(self, position): return self.lines[position] - ignore = [ re.compile(r"^[\[][PA][\]] - [^ ]+$") ] + ignore = [re.compile(r"^[\[][PA][\]] - [^ ]+$")] empty_line = re.compile(r"^\s+$") def skip_empty(self, position): @@ -26,7 +26,6 @@ class Tagger: position += 1 return position - def tag(self, position): """ Return None -- means that we can't tag it in any way @@ -36,6 +35,11 @@ Return None -- means that we can't tag it in any way def wrap(self, tagname, content): return u"<%s>%s" % (tagname, content, tagname) + @staticmethod + def anymatches(regex): + return lambda x: regex.match(x) + + class Section(Tagger): looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$") @@ -87,11 +91,14 @@ class Informacje(Tagger): class List(Tagger): + point = re.compile(r"^[\s]*([-*])") + def tag(self, pos): self.items = [] while True: l = self.line(pos) - if l and l[0] in ('-', '*'): + m = self.point.match(l) + if l and m: self.items.append(l[1:].strip()) pos += 1 else: @@ -100,10 +107,10 @@ class List(Tagger): return pos def __unicode__(self): - s = "\n" + s = '' for i in self.items: - s += "%s\n" % i - s += "\n" + s += "\n%s" % i + s += "\n\n" return s @@ -219,7 +226,7 @@ returns auto-tagged text pos += 1 if pos >= len(lines): break - + return toxml(content) dc_fixed = { @@ -308,6 +315,7 @@ def mark_dictionary(content): elif db >= 1: de = i content[db:de] = [Container('slowniczek', *slowniczek)] + break elif db >= 0: if isinstance(e, Paragraph): m = is_dictentry.match(e.line)