X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/7e954ab5ac6399b7f348b972613748bb9c484670..b6b62a76505574e1961e15900a96a70a5461f108:/apps/catalogue/management/edumed.py diff --git a/apps/catalogue/management/edumed.py b/apps/catalogue/management/edumed.py index 9e90ba7d..05034368 100644 --- a/apps/catalogue/management/edumed.py +++ b/apps/catalogue/management/edumed.py @@ -137,7 +137,8 @@ class List(Tagger): class Paragraph(Tagger): remove_this = [ re.compile(r"[\s]*opis zawarto.ci[\s]*", re.I), - re.compile(r"^[\s]*$") + re.compile(r"^[\s]*$"), + re.compile(r"http://pad.nowoczesnapolska.org.pl/p/slowniczek") ] podrozdzial = [ re.compile(r"[\s]*(przebieg zaj..|opcje dodatkowe)[\s]*", re.I), @@ -258,6 +259,10 @@ dc_fixed = { } +class NotFound(Exception): + pass + + def find_block(content, title_re, begin=-1, end=-1): title_re = re.compile(title_re, re.I | re.UNICODE) @@ -283,11 +288,11 @@ def find_block(content, title_re, begin=-1, end=-1): break if rb >= 0: return rb, i + raise NotFound() def remove_block(content, title_re, removed=None): rb, re = find_block(content, title_re) - if removed is not None and isinstance(removed, list): removed += content[rb:re][:] content[rb:re] = [] @@ -416,8 +421,14 @@ def toxml(content, pretty_print=False): # some transformations content = mark_activities(content) content = mark_dictionary(content) - content = remove_block(content, r"wykorzyst(yw)?ane metody[+ PA\[\].]*") - content = remove_block(content, r"(pomoce|potrzebne materia.y)[+ PA\[\]]*") + try: + content = remove_block(content, r"wykorzyst(yw)?ane metody[+ PA\[\].]*") + except NotFound: + pass + try: + content = remove_block(content, r"(pomoce|potrzebne materia.y)[+ PA\[\]]*") + except NotFound: + pass content = move_evaluation(content) info = content.pop(0)