From 78e13f577ab9db6cb8cc181833e351ab792cebe9 Mon Sep 17 00:00:00 2001 From: Marcin Koziej Date: Mon, 12 Nov 2012 15:58:49 +0100 Subject: [PATCH 1/1] Fix --- .../management/commands/import_pad.py | 27 ++++++++++++------- apps/catalogue/management/edumed.py | 19 ++++++++----- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/apps/catalogue/management/commands/import_pad.py b/apps/catalogue/management/commands/import_pad.py index 8032d947..6e3ff4a6 100644 --- a/apps/catalogue/management/commands/import_pad.py +++ b/apps/catalogue/management/commands/import_pad.py @@ -75,11 +75,25 @@ class Command(BaseCommand): except ValueError: print "pad '%s' does not exist" % pid continue - slug = slughifi(pid) - print "Importing %s..." % pid + + open("/tmp/pad_%s.txt" % pid, 'w').write(text.encode('utf-8')) + + if options.get('tag_edumed'): + auto_tagger = 'edumed' + else: + auto_tagger = options.get('auto_tagger') + if auto_tagger: + text = auto_taggers[auto_tagger](text) + try: + info = BookInfo.from_string(text.encode('utf-8')) + slug = info.url.slug + except (ParseError, ValidationError): + slug = slughifi(pid) + + print "Importing %s (slug %s)..." % (pid, slug) title = pid - print slugs, slug + # print slugs, slug previous_books = slugs.get(slug) if previous_books: if len(previous_books) > 1: @@ -94,6 +108,7 @@ class Command(BaseCommand): if previous_book: book = previous_book + book.slug = slug else: book = Book() book.slug = slug @@ -108,12 +123,6 @@ class Command(BaseCommand): else: chunk = book.add(slug, title) - if options.get('tag_edumed'): - auto_tagger = 'edumed' - else: - auto_tagger = options.get('auto_tagger') - if auto_tagger: - text = auto_taggers[auto_tagger](text) chunk.commit(text, **commit_args) book_count += 1 diff --git a/apps/catalogue/management/edumed.py b/apps/catalogue/management/edumed.py index ffad2ec1..55269d1e 100644 --- a/apps/catalogue/management/edumed.py +++ b/apps/catalogue/management/edumed.py @@ -15,7 +15,7 @@ class Tagger: def line(self, position): return self.lines[position] - ignore = [ re.compile(r"^[\[][PA][\]] - [^ ]+$") ] + ignore = [re.compile(r"^[\[][PA][\]] - [^ ]+$")] empty_line = re.compile(r"^\s+$") def skip_empty(self, position): @@ -26,7 +26,6 @@ class Tagger: position += 1 return position - def tag(self, position): """ Return None -- means that we can't tag it in any way @@ -36,6 +35,11 @@ Return None -- means that we can't tag it in any way def wrap(self, tagname, content): return u"<%s>%s" % (tagname, content, tagname) + @staticmethod + def anymatches(regex): + return lambda x: regex.match(x) + + class Section(Tagger): looks_like = re.compile(r"^[IVX]+[.]\s+(.*)$") @@ -87,11 +91,14 @@ class Informacje(Tagger): class List(Tagger): + point = re.compile(r"^[\s]*([-*])") + def tag(self, pos): self.items = [] while True: l = self.line(pos) - if l and l[0] in ('-', '*'): + m = self.point.match(l) + if l and m: self.items.append(l[1:].strip()) pos += 1 else: @@ -100,10 +107,10 @@ class List(Tagger): return pos def __unicode__(self): - s = "\n" + s = '' for i in self.items: - s += "%s\n" % i - s += "\n" + s += "\n%s" % i + s += "\n\n" return s -- 2.20.1