master xml making script
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 30 Jan 2013 14:35:08 +0000 (15:35 +0100)
committerLibrary Admin <librarian@szo.nowoczesnapolska.org.pl>
Wed, 30 Jan 2013 14:35:25 +0000 (15:35 +0100)
apps/catalogue/management/commands/make_master.py
scripts/fix_something.py

index 220c66c..5d907b9 100644 (file)
@@ -26,12 +26,12 @@ class Command(BaseCommand):
     )
     help = 'Create a master module skeleton'
 
-    def looks_like_syntetic(self, slug):
-        if re.match(r"^(gim|lic) \d[.]? ", slug):
+    def looks_like_synthetic(self, title):
+        if re.match(r"^(gim|lic)_\d[.]? ", title):
             return True
         return False
 
-    def gen_xml(self, options, syntetic_modules=[], course_modules=[], project_modules=[]):
+    def gen_xml(self, options, synthetic_modules=[], course_modules=[], project_modules=[]):
         holder = {}
         holder['xml'] = u""
 
@@ -52,8 +52,8 @@ class Command(BaseCommand):
         p(u'<rdf:Description rdf:about="http://redakcja.edukacjamedialna.edu.pl/documents/">')
 
         dc(u'title', options['title'])
-        for slug in syntetic_modules:
-            dc(u'relation.hasChild.syntetic', slug_url(slug))
+        for slug in synthetic_modules:
+            dc(u'relation.hasChild.synthetic', slug_url(slug))
         for slug in course_modules:
             dc(u'relation.hasChild.course', slug_url(slug))
         for slug in project_modules:
@@ -68,17 +68,24 @@ class Command(BaseCommand):
         dc(u'identifier.url', u'http://edukacjamedialna.edu.pl/%s' % options['slug'])
         dc(u'rights', dc_fixed['rights'])
         dc(u'rights.license', dc_fixed['rights_license'])
-        dc(u'format', u'syntetic, course, project')
+        dc(u'format', u'synthetic, course, project')
         dc(u'type', u'text')
         dc(u'date', date.strftime(date.today(), "%Y-%m-%d"))
         dc(u'audience', options['audience'])
         dc(u'language', u'pol')
         p(u'</rdf:Description>')
         p(u'</rdf:RDF>')
+        p(u'</utwor>')
 
         return holder['xml']
 
     def handle(self, *args, **options):
+        commit_args = {
+            "author_name": 'Platforma',
+            "description": 'Automatycznie zaimportowane z EtherPad',
+            "publishable": False,
+        }
+
         slug = options['slug']
         if not slug:
             slug = slughifi(options['title'])
@@ -99,7 +106,7 @@ class Command(BaseCommand):
         if len(master) == 0:
             master.add(slug, options['title'])
 
-        syntetic_modules = []
+        synthetic_modules = []
         course_modules = []
         if 'slugs_file' in options:
             f = open(options['slugs_file'], 'r')
@@ -113,18 +120,19 @@ class Command(BaseCommand):
                     except Book.DoesNotExist:
                         print "Book for title %s does not exist" % t
                         continue
-                    if self.looks_like_syntetic(t):
-                        syntetic_modules.append(b.slug)
+                    if self.looks_like_synthetic(t):
+                        synthetic_modules.append(b.slug)
                     else:
                         course_modules.append(b.slug)
             except Exception, e:
                 print "Error getting slug list (file %s): %s" % (options['slugs_file'], e)
 
-        print "syntetic: %s" % syntetic_modules
+        print "synthetic: %s" % synthetic_modules
         print "course: %s" % course_modules
 
-        xml = self.gen_xml(options, syntetic_modules, course_modules)
-
+        xml = self.gen_xml(options, synthetic_modules, course_modules)
+        c = master[0]
         print xml
+        if confirm("Commit?", True):
+            c.commit(xml, **commit_args)
 
-        #        master.save()
index 2d73ba1..c9f9a68 100755 (executable)
@@ -10,6 +10,7 @@ sys.path.append('./lib')
 
 from django.core.management import setup_environ
 from redakcja import settings
+from lxml import etree
 
 setup_environ(settings)
 
@@ -21,9 +22,6 @@ fixed = {}
 tag_with_name = r"<([^>]+)name=\"([^>]+)>"
 
 def fix(book, author, dry_run=True):
-    if len(book) == 0:
-        print "%s ==> does not contain chunks" % book.slug
-        return
     fc = book[0]
     txt = fc.materialize()
 
@@ -38,6 +36,25 @@ def fix(book, author, dry_run=True):
     else:
         print "%s ==> i would change this" % book.slug
 
+def fix_empty_opis(book, author, dry_run=True):
+    fc = book[0]
+    txt = fc.materialize()
+    try:
+        t = etree.fromstring(txt)
+        empty_opis = t.xpath('//opis[not(node())]')
+        empty_cwiczenie = t.xpath('//cwiczenie[not(node())]')
+        
+        if empty_opis:
+            print "%s: opis/ x %d" % (book.slug, len(empty_opis))
+
+        if empty_cwiczenie:
+            print "%s: cwiczenie/ x %d" % (book.slug, len(empty_cwiczenie))
+
+    except:
+        print "%s didn't parse" % b.slug
+        return
+
+    
 
 import sys
 import getopt
@@ -50,7 +67,10 @@ me = User.objects.get(username='marcinkoziej')
 if dry_run:
     print "This is a dry run, to really fix something, run with --seriously"
 for b in Book.objects.all():
-    fix(b, me, dry_run)
+    if len(b) == 0:
+        print "%s ==> does not contain chunks" % b.slug
+        continue
+    fix_empty_opis(b, me, dry_run)