fnp
/
redakcja.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Fix dictionary auto-tagging
[redakcja.git]
/
apps
/
catalogue
/
management
/
edumed.py
diff --git
a/apps/catalogue/management/edumed.py
b/apps/catalogue/management/edumed.py
index
ffad2ec
..
250eccb
100644
(file)
--- a/
apps/catalogue/management/edumed.py
+++ b/
apps/catalogue/management/edumed.py
@@
-15,7
+15,7
@@
class Tagger:
def line(self, position):
return self.lines[position]
def line(self, position):
return self.lines[position]
- ignore = [
re.compile(r"^[\[][PA][\]] - [^ ]+$")
]
+ ignore = [
re.compile(r"^[\[][PA][\]] - [^ ]+$")
]
empty_line = re.compile(r"^\s+$")
def skip_empty(self, position):
empty_line = re.compile(r"^\s+$")
def skip_empty(self, position):
@@
-26,7
+26,6
@@
class Tagger:
position += 1
return position
position += 1
return position
-
def tag(self, position):
"""
Return None -- means that we can't tag it in any way
def tag(self, position):
"""
Return None -- means that we can't tag it in any way
@@
-36,6
+35,11
@@
Return None -- means that we can't tag it in any way
def wrap(self, tagname, content):
return u"<%s>%s</%s>" % (tagname, content, tagname)
def wrap(self, tagname, content):
return u"<%s>%s</%s>" % (tagname, content, tagname)
+ @staticmethod
+ def anymatches(regex):
+ return lambda x: regex.match(x)
+
+
class Section(Tagger):
looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$")
class Section(Tagger):
looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$")
@@
-87,11
+91,14
@@
class Informacje(Tagger):
class List(Tagger):
class List(Tagger):
+ point = re.compile(r"^[\s]*([-*])")
+
def tag(self, pos):
self.items = []
while True:
l = self.line(pos)
def tag(self, pos):
self.items = []
while True:
l = self.line(pos)
- if l and l[0] in ('-', '*'):
+ m = self.point.match(l)
+ if l and m:
self.items.append(l[1:].strip())
pos += 1
else:
self.items.append(l[1:].strip())
pos += 1
else:
@@
-100,10
+107,10
@@
class List(Tagger):
return pos
def __unicode__(self):
return pos
def __unicode__(self):
- s =
"<lista>\n"
+ s =
'<lista typ="punkt">'
for i in self.items:
for i in self.items:
- s += "
<punkt>%s</punkt>\n
" % i
- s += "</lista>\n"
+ s += "
\n<punkt>%s</punkt>
" % i
+ s += "
\n
</lista>\n"
return s
return s
@@
-219,7
+226,7
@@
returns auto-tagged text
pos += 1
if pos >= len(lines):
break
pos += 1
if pos >= len(lines):
break
-
+
return toxml(content)
dc_fixed = {
return toxml(content)
dc_fixed = {
@@
-308,6
+315,7
@@
def mark_dictionary(content):
elif db >= 1:
de = i
content[db:de] = [Container('slowniczek', *slowniczek)]
elif db >= 1:
de = i
content[db:de] = [Container('slowniczek', *slowniczek)]
+ break
elif db >= 0:
if isinstance(e, Paragraph):
m = is_dictentry.match(e.line)
elif db >= 0:
if isinstance(e, Paragraph):
m = is_dictentry.match(e.line)