ignore comments in xml when indexing; make-xml-zip script
authorMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 18 Jan 2012 14:08:25 +0000 (15:08 +0100)
committerMarcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Wed, 18 Jan 2012 14:08:25 +0000 (15:08 +0100)
apps/search/index.py
scripts/make-xml-zip.py [new file with mode: 0755]

index 29e41d2..307376d 100644 (file)
@@ -25,6 +25,7 @@ import re
 import errno
 from librarian import dcparser
 from librarian.parser import WLDocument
+from lxml import etree
 import catalogue.models
 from multiprocessing.pool import ThreadPool
 from threading import current_thread
@@ -401,6 +402,8 @@ class Index(BaseIndex):
 
                 if header.tag in self.skip_header_tags:
                     continue
+                if header.tag is etree.Comment:
+                    continue
 
                 # section content
                 content = []
diff --git a/scripts/make-xml-zip.py b/scripts/make-xml-zip.py
new file mode 100755 (executable)
index 0000000..d8b3dde
--- /dev/null
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import sys
+sys.path.insert(0, '../apps')
+sys.path.insert(0, '../lib')
+sys.path.insert(0, '../lib/librarian')
+sys.path.insert(0, '../wolnelektury')
+sys.path.insert(0, '..')
+
+from django.core.management import setup_environ
+from wolnelektury import settings
+import sys
+import zipfile
+
+setup_environ(settings)
+
+from catalogue.models import Book
+
+
+if len(sys.argv) < 2:
+    print "Provide a zip name as first argument"
+    sys.exit(-1)
+
+zip = zipfile.ZipFile(sys.argv[1], 'w')
+for book in Book.objects.all():
+    zip.write(book.xml_file.path, "%s.xml" % book.slug)
+zip.close()
+