X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/dd0db2db471a6bb8e08ca74c683fbde6b35144a9..6f1b6c8ed3980bad95c67be4709dc8e62ba4a593:/apps/catalogue/management/edumed.py

diff --git a/apps/catalogue/management/edumed.py b/apps/catalogue/management/edumed.py
index c319132a..25f30849 100644
--- a/apps/catalogue/management/edumed.py
+++ b/apps/catalogue/management/edumed.py
@@ -61,7 +61,7 @@ class Section(Tagger):
 
 
 class Meta(Tagger):
-    looks_like = re.compile(r"([^:]+): (.*)", re.UNICODE)
+    looks_like = re.compile(r"([^:]+): ?(.*)", re.UNICODE)
 
     def tag(self, pos):
         pos = self.skip_empty(pos)
@@ -137,7 +137,8 @@ class List(Tagger):
 class Paragraph(Tagger):
     remove_this = [
         re.compile(r"[\s]*opis zawarto.ci[\s]*", re.I),
-        re.compile(r"^[\s]*$")
+        re.compile(r"^[\s]*$"),
+        re.compile(r"http://pad.nowoczesnapolska.org.pl/p/slowniczek")
         ]
     podrozdzial = [
         re.compile(r"[\s]*(przebieg zaj..|opcje dodatkowe)[\s]*", re.I),
@@ -251,7 +252,7 @@ returns auto-tagged text
     return toxml(content, pretty_print=pretty_print)
 
 dc_fixed = {
-    'description': u'Publikacja zrealizowana w ramach projektu Cyfrowa PrzyszÅoÅÄ (http://cyfrowaprzyszlosc.pl).',
+    'description': u'Publikacja zrealizowana w ramach projektu Cyfrowa PrzyszÅoÅÄ (http://edukacjamedialna.edu.pl).',
     'relation': u'moduÅy powiÄzane linki',
     'description.material': u'linki do zaÅÄcznikÃ³w',
     'rights': u'Creative Commons Uznanie autorstwa - Na tych samych warunkach 3.0',
@@ -388,12 +389,38 @@ def mark_dictionary(content):
                                        Container('definiens', m.groups()[1])])
 
                 else:
-                    slowniczek.append(e)
+                    slowniczek.append(e.line)
         i += 1
 
     return content
 
 
+def mark_czytelnia(content):
+    db = -1
+    de = -1
+    i = 0
+    czy_czytelnia = re.compile(r"[\s]*czytelnia[\s]*", re.I)
+    czytelnia = content[0].spawn(List)
+    czytelnia.type = 'czytelnia'
+    while i < len(content):
+        e = content[i]
+        if isinstance(e, Section):
+            if czy_czytelnia.match(e.title):
+                db = i + 1
+            elif db >= 1:
+                de = i
+                content[db:de] = [czytelnia]
+                break
+        elif db >= 0:
+            if isinstance(e, Paragraph):
+                if e.line:
+                    czytelnia.append(e.line)
+        i += 1
+
+    return content
+
+
+
 def move_evaluation(content):
     evaluation = []
 
@@ -420,6 +447,8 @@ def toxml(content, pretty_print=False):
     # some transformations
     content = mark_activities(content)
     content = mark_dictionary(content)
+    content = mark_czytelnia(content)
+    
     try:
         content = remove_block(content, r"wykorzyst(yw)?ane metody[+ PA\[\].]*")
     except NotFound:
@@ -453,7 +482,7 @@ def toxml(content, pretty_print=False):
 
     p("<utwor>")
     p(u'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">')
-    p(u'<rdf:Description rdf:about="http://redakcja.cyfrowaprzyszlosc.pl/documents/">')
+    p(u'<rdf:Description rdf:about="http://redakcja.edukacjamedialna.edu.pl/documents/">')
     authors = map(unicode.strip, meta[u'Autorzy'].split(u','))
     for author in authors:
         names = author.split(u' ')
@@ -472,7 +501,7 @@ def toxml(content, pretty_print=False):
     dc(u'description', dc_fixed['description'])
     dc(u'description.material', dc_fixed['description.material'])
     dc(u'relation', dc_fixed['relation'])
-    dc(u'identifier.url', u'http://cyfrowaprzyszlosc.pl/%s' % slug)
+    dc(u'identifier.url', u'http://edukacjamedialna.edu.pl/%s' % slug)
     dc(u'rights', dc_fixed['rights'])
     dc(u'rights.license', u'http://creativecommons.org/licenses/by-sa/3.0/')
     dc(u'format', u'xml')