def line(self, position):
return self.lines[position]
- empty_line = re.compile(r"\s+")
+ ignore = [re.compile(r"^[\[][PA][\]] - [^ ]+$")]
+ empty_line = re.compile(r"^\s+$")
def skip_empty(self, position):
while self.line(position) == "" or \
- self.empty_line.match(self.line(position)):
+ self.empty_line.match(self.line(position)) or \
+ filter(lambda r: r.match(self.line(position)),
+ self.ignore[:]):
position += 1
return position
def wrap(self, tagname, content):
return u"<%s>%s</%s>" % (tagname, content, tagname)
+ @staticmethod
+ def anymatches(regex):
+ return lambda x: regex.match(x)
+
+
class Section(Tagger):
looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$")
class List(Tagger):
+ point = re.compile(r"^[\s]*([-*])")
+
def tag(self, pos):
self.items = []
while True:
l = self.line(pos)
- if l and l[0] in ('-', '*'):
+ m = self.point.match(l)
+ if l and m:
self.items.append(l[1:].strip())
pos += 1
else:
return pos
def __unicode__(self):
- s = "<lista>\n"
+ s = '<lista typ="punkt">'
for i in self.items:
- s += "<punkt>%s</punkt>\n" % i
- s += "</lista>\n"
+ s += "\n<punkt>%s</punkt>" % i
+ s += "\n</lista>\n"
return s
class Paragraph(Tagger):
remove_this = [
- re.compile(r"[\s]*opis zawartości[\s]*", re.I),
+ re.compile(r"[\s]*opis zawarto.ci[\s]*", re.I),
re.compile(r"^[\s]*$")
]
podrozdzial = [
pos += 1
if pos >= len(lines):
break
-
+
return toxml(content)
dc_fixed = {
elif db >= 1:
de = i
content[db:de] = [Container('slowniczek', *slowniczek)]
+ break
elif db >= 0:
if isinstance(e, Paragraph):
m = is_dictentry.match(e.line)