From d60f44f5eceafa25705d55044ae1b11aceb28187 Mon Sep 17 00:00:00 2001 From: Marcin Koziej Date: Wed, 18 Jan 2012 15:08:25 +0100 Subject: [PATCH 1/1] ignore comments in xml when indexing; make-xml-zip script --- apps/search/index.py | 3 +++ scripts/make-xml-zip.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100755 scripts/make-xml-zip.py diff --git a/apps/search/index.py b/apps/search/index.py index 29e41d222..307376de9 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -25,6 +25,7 @@ import re import errno from librarian import dcparser from librarian.parser import WLDocument +from lxml import etree import catalogue.models from multiprocessing.pool import ThreadPool from threading import current_thread @@ -401,6 +402,8 @@ class Index(BaseIndex): if header.tag in self.skip_header_tags: continue + if header.tag is etree.Comment: + continue # section content content = [] diff --git a/scripts/make-xml-zip.py b/scripts/make-xml-zip.py new file mode 100755 index 000000000..d8b3dde88 --- /dev/null +++ b/scripts/make-xml-zip.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import sys +sys.path.insert(0, '../apps') +sys.path.insert(0, '../lib') +sys.path.insert(0, '../lib/librarian') +sys.path.insert(0, '../wolnelektury') +sys.path.insert(0, '..') + +from django.core.management import setup_environ +from wolnelektury import settings +import sys +import zipfile + +setup_environ(settings) + +from catalogue.models import Book + + +if len(sys.argv) < 2: + print "Provide a zip name as first argument" + sys.exit(-1) + +zip = zipfile.ZipFile(sys.argv[1], 'w') +for book in Book.objects.all(): + zip.write(book.xml_file.path, "%s.xml" % book.slug) +zip.close() + -- 2.20.1