except ValueError:
print "pad '%s' does not exist" % pid
continue
- slug = slughifi(pid)
- print "Importing %s..." % pid
+
+ open("/tmp/pad_%s.txt" % pid, 'w').write(text.encode('utf-8'))
+
+ if options.get('tag_edumed'):
+ auto_tagger = 'edumed'
+ else:
+ auto_tagger = options.get('auto_tagger')
+ if auto_tagger:
+ text = auto_taggers[auto_tagger](text)
+ try:
+ info = BookInfo.from_string(text.encode('utf-8'))
+ slug = info.url.slug
+ except (ParseError, ValidationError):
+ slug = slughifi(pid)
+
+ print "Importing %s (slug %s)..." % (pid, slug)
title = pid
- print slugs, slug
+ # print slugs, slug
previous_books = slugs.get(slug)
if previous_books:
if len(previous_books) > 1:
if previous_book:
book = previous_book
+ book.slug = slug
else:
book = Book()
book.slug = slug
else:
chunk = book.add(slug, title)
- if options.get('tag_edumed'):
- auto_tagger = 'edumed'
- else:
- auto_tagger = options.get('auto_tagger')
- if auto_tagger:
- text = auto_taggers[auto_tagger](text)
chunk.commit(text, **commit_args)
book_count += 1
def line(self, position):
return self.lines[position]
- ignore = [ re.compile(r"^[\[][PA][\]] - [^ ]+$") ]
+ ignore = [re.compile(r"^[\[][PA][\]] - [^ ]+$")]
empty_line = re.compile(r"^\s+$")
def skip_empty(self, position):
position += 1
return position
-
def tag(self, position):
"""
Return None -- means that we can't tag it in any way
def wrap(self, tagname, content):
return u"<%s>%s</%s>" % (tagname, content, tagname)
+ @staticmethod
+ def anymatches(regex):
+ return lambda x: regex.match(x)
+
+
class Section(Tagger):
looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$")
class List(Tagger):
+ point = re.compile(r"^[\s]*([-*])")
+
def tag(self, pos):
self.items = []
while True:
l = self.line(pos)
- if l and l[0] in ('-', '*'):
+ m = self.point.match(l)
+ if l and m:
self.items.append(l[1:].strip())
pos += 1
else:
return pos
def __unicode__(self):
- s = "<lista>\n"
+ s = '<lista typ="punkt">'
for i in self.items:
- s += "<punkt>%s</punkt>\n" % i
- s += "</lista>\n"
+ s += "\n<punkt>%s</punkt>" % i
+ s += "\n</lista>\n"
return s