From e2d4fbf230c90dcf3e904351a70d399426478352 Mon Sep 17 00:00:00 2001 From: Marcin Koziej Date: Mon, 12 Nov 2012 15:13:35 +0100 Subject: [PATCH 1/1] import edumed z pad: poprawki --- apps/catalogue/management/commands/import_pad.py | 6 ++++-- apps/catalogue/management/edumed.py | 10 +++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/apps/catalogue/management/commands/import_pad.py b/apps/catalogue/management/commands/import_pad.py index 205c4765..8032d947 100644 --- a/apps/catalogue/management/commands/import_pad.py +++ b/apps/catalogue/management/commands/import_pad.py @@ -10,7 +10,7 @@ from django.core.management.base import BaseCommand from django.core.management.color import color_style from django.db import transaction from librarian.dcparser import BookInfo -from librarian import ParseError, ValidationError +from librarian import ParseError, ValidationError, WLURI from django.conf import settings from catalogue.models import Book from catalogue.management import auto_taggers @@ -54,7 +54,7 @@ class Command(BaseCommand): text = b.materialize().encode('utf-8') try: info = BookInfo.from_string(text) - slugs[info.slug].append(b) + slugs[info.url.slug].append(b) except (ParseError, ValidationError): slugs[b.slug].append(b) @@ -76,8 +76,10 @@ class Command(BaseCommand): print "pad '%s' does not exist" % pid continue slug = slughifi(pid) + print "Importing %s..." % pid title = pid + print slugs, slug previous_books = slugs.get(slug) if previous_books: if len(previous_books) > 1: diff --git a/apps/catalogue/management/edumed.py b/apps/catalogue/management/edumed.py index e5d5ee0a..ffad2ec1 100644 --- a/apps/catalogue/management/edumed.py +++ b/apps/catalogue/management/edumed.py @@ -15,14 +15,18 @@ class Tagger: def line(self, position): return self.lines[position] - empty_line = re.compile(r"\s+") + ignore = [ re.compile(r"^[\[][PA][\]] - [^ ]+$") ] + empty_line = re.compile(r"^\s+$") def skip_empty(self, position): while self.line(position) == "" or \ - self.empty_line.match(self.line(position)): + self.empty_line.match(self.line(position)) or \ + filter(lambda r: r.match(self.line(position)), + self.ignore[:]): position += 1 return position + def tag(self, position): """ Return None -- means that we can't tag it in any way @@ -105,7 +109,7 @@ class List(Tagger): class Paragraph(Tagger): remove_this = [ - re.compile(r"[\s]*opis zawartości[\s]*", re.I), + re.compile(r"[\s]*opis zawarto.ci[\s]*", re.I), re.compile(r"^[\s]*$") ] podrozdzial = [ -- 2.20.1