X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/65f4793d71ca86c80a7f4f7974e8d65a3ebe6f25..78e13f577ab9db6cb8cc181833e351ab792cebe9:/apps/catalogue/management/edumed.py diff --git a/apps/catalogue/management/edumed.py b/apps/catalogue/management/edumed.py index e5d5ee0a..55269d1e 100644 --- a/apps/catalogue/management/edumed.py +++ b/apps/catalogue/management/edumed.py @@ -15,11 +15,14 @@ class Tagger: def line(self, position): return self.lines[position] - empty_line = re.compile(r"\s+") + ignore = [re.compile(r"^[\[][PA][\]] - [^ ]+$")] + empty_line = re.compile(r"^\s+$") def skip_empty(self, position): while self.line(position) == "" or \ - self.empty_line.match(self.line(position)): + self.empty_line.match(self.line(position)) or \ + filter(lambda r: r.match(self.line(position)), + self.ignore[:]): position += 1 return position @@ -32,6 +35,11 @@ Return None -- means that we can't tag it in any way def wrap(self, tagname, content): return u"<%s>%s" % (tagname, content, tagname) + @staticmethod + def anymatches(regex): + return lambda x: regex.match(x) + + class Section(Tagger): looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$") @@ -83,11 +91,14 @@ class Informacje(Tagger): class List(Tagger): + point = re.compile(r"^[\s]*([-*])") + def tag(self, pos): self.items = [] while True: l = self.line(pos) - if l and l[0] in ('-', '*'): + m = self.point.match(l) + if l and m: self.items.append(l[1:].strip()) pos += 1 else: @@ -96,16 +107,16 @@ class List(Tagger): return pos def __unicode__(self): - s = "\n" + s = '' for i in self.items: - s += "%s\n" % i - s += "\n" + s += "\n%s" % i + s += "\n\n" return s class Paragraph(Tagger): remove_this = [ - re.compile(r"[\s]*opis zawartości[\s]*", re.I), + re.compile(r"[\s]*opis zawarto.ci[\s]*", re.I), re.compile(r"^[\s]*$") ] podrozdzial = [