From db6d2feee32d100fb893b389421bda2fd65a89cd Mon Sep 17 00:00:00 2001 From: zuber Date: Wed, 12 Aug 2009 14:26:21 +0200 Subject: [PATCH] =?utf8?q?Usuni=C4=99cie=20biblioteki=20librarian=20(zosta?= =?utf8?q?=C5=82a=20ona=20wydzielona=20do=20osobnego=20projektu)=20i=20dod?= =?utf8?q?anie=20prostego=20pliku=20README.txt=20z=20opisem=20zale=C5=BCno?= =?utf8?q?=C5=9Bci.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- README.txt | 6 + lib/librarian/__init__.py | 0 lib/librarian/bin/book2html.py | 31 - lib/librarian/bin/book2txt.py | 68 -- lib/librarian/bin/bookfragments.py | 50 -- lib/librarian/bin/genslugs.py | 64 -- lib/librarian/bin/master.css | 207 ------ lib/librarian/bin/master.plain.css | 160 ----- lib/librarian/book2html.xslt | 615 ------------------ lib/librarian/dcparser.py | 197 ------ lib/librarian/html.py | 247 ------- lib/librarian/tests/__init__.py | 104 --- .../tests/andersen_brzydkie_kaczatko.xml | 24 - lib/librarian/tests/biedrzycki_akslop.xml | 25 - lib/librarian/tests/kochanowski_piesn7.xml | 27 - lib/librarian/tests/mickiewicz_rybka.xml | 28 - lib/librarian/tests/sofokles_antygona.xml | 25 - 17 files changed, 6 insertions(+), 1872 deletions(-) create mode 100644 README.txt delete mode 100644 lib/librarian/__init__.py delete mode 100755 lib/librarian/bin/book2html.py delete mode 100755 lib/librarian/bin/book2txt.py delete mode 100755 lib/librarian/bin/bookfragments.py delete mode 100755 lib/librarian/bin/genslugs.py delete mode 100644 lib/librarian/bin/master.css delete mode 100644 lib/librarian/bin/master.plain.css delete mode 100644 lib/librarian/book2html.xslt delete mode 100644 lib/librarian/dcparser.py delete mode 100644 lib/librarian/html.py delete mode 100644 lib/librarian/tests/__init__.py delete mode 100644 lib/librarian/tests/andersen_brzydkie_kaczatko.xml delete mode 100644 lib/librarian/tests/biedrzycki_akslop.xml delete mode 100644 lib/librarian/tests/kochanowski_piesn7.xml delete mode 100644 lib/librarian/tests/mickiewicz_rybka.xml delete mode 100644 lib/librarian/tests/sofokles_antygona.xml diff --git a/README.txt b/README.txt new file mode 100644 index 00000000..239441f9 --- /dev/null +++ b/README.txt @@ -0,0 +1,6 @@ +Zależności +========== + + * [Django 1.1](http://djangoproject.com/) + * [Mercurial 1.3.1](http://www.selenic.com/mercurial/) + * [librarian 1.1](http://redmine.nowoczesnapolska.org.pl/projects/show/librarian) diff --git a/lib/librarian/__init__.py b/lib/librarian/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/librarian/bin/book2html.py b/lib/librarian/bin/book2html.py deleted file mode 100755 index a0229bbe..00000000 --- a/lib/librarian/bin/book2html.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -import os -import optparse - -from librarian import html - - -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to HTML format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.html' - html.transform(input_filename, output_filename) - diff --git a/lib/librarian/bin/book2txt.py b/lib/librarian/bin/book2txt.py deleted file mode 100755 index 9c470805..00000000 --- a/lib/librarian/bin/book2txt.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import re -import os -import optparse -import codecs - - -HEADER = u"""\ -Kodowanie znaków w dokumencie: UTF-8. ------ -Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl/). Reprodukcja cyfrowa wykonana przez -Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. Ten utwór nie jest chroniony prawem autorskim i znajduje -się w domenie publicznej, co oznacza, że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. ------ - -""" - - -REGEXES = [ - (r']*>(.|\n)*?', ''), - (r']*>(.|\n)*?', ''), - ('<(begin|end)\\sid=[\'|"][b|e]\\d+[\'|"]\\s/>', ''), - (r'(()|())', ''), - (r'(.|\n)*?', ''), - (r'(.|\n)*?', ''), - (r'<[^>]+>', ''), - (r'/\n', '\n'), - (r'---', u'—'), - (r'--', u'-'), - (r',,', u'„'), - (r'"', u'”'), -] - - -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Convert SOURCE files to TXT format.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.txt' - - xml = codecs.open(input_filename, 'r', encoding='utf-8').read() - for pattern, repl in REGEXES: - # print pattern, repl - xml, n = re.subn(pattern, repl, xml) - # print n - - output = codecs.open(output_filename, 'w', encoding='utf-8') - output.write(HEADER) - output.write(xml) - diff --git a/lib/librarian/bin/bookfragments.py b/lib/librarian/bin/bookfragments.py deleted file mode 100755 index f29e11e0..00000000 --- a/lib/librarian/bin/bookfragments.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -import os -import optparse - -from librarian import html - - -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Extract theme fragments from SOURCE.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.fragments.html' - - closed_fragments, open_fragments = html.extract_fragments(input_filename) - - for fragment_id in open_fragments: - print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id) - - output_file = open(output_filename, 'w') - output_file.write(""" - - - bookfragments output - - - - """) - for fragment in closed_fragments.values(): - fragment_html = u'

[#%s] %s

%s
' % (fragment.id, fragment.themes, fragment) - output_file.write(fragment_html.encode('utf-8')) - output_file.write('') - output_file.close() - diff --git a/lib/librarian/bin/genslugs.py b/lib/librarian/bin/genslugs.py deleted file mode 100755 index 3391d8e5..00000000 --- a/lib/librarian/bin/genslugs.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import os -import optparse - -from lxml import etree -from librarian import html -from slughifi import slughifi - - -BOOK_URL = 'http://wolnelektury.pl/katalog/lektura/' - - -if __name__ == '__main__': - # Parse commandline arguments - usage = """Usage: %prog [options] SOURCE [SOURCE...] - Generate slugs for SOURCE.""" - - parser = optparse.OptionParser(usage=usage) - - parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - parser.add_option('-f', '--force', action='store_true', dest='force', default=False, - help='overwrite current identifiers') - - options, input_filenames = parser.parse_args() - - if len(input_filenames) < 1: - parser.print_help() - exit(1) - - # Do some real work - for input_filename in input_filenames: - if options.verbose: - print input_filename - - doc = etree.parse(input_filename) - try: - title = doc.find('//{http://purl.org/dc/elements/1.1/}title').text - except AttributeError: - print '%s:error:Book title not found. Skipping.' % input_filename - continue - - parent = '' - try: - parent_url = doc.find('//{http://purl.org/dc/elements/1.1/}relation.isPartOf').text - parent = parent_url.rsplit('/', 1)[1] + ' ' - except AttributeError: - pass - except IndexError: - print '%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url) - - book_url = doc.find('//{http://purl.org/dc/elements/1.1/}identifier.url') - if book_url is None: - book_description = doc.find('//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description') - book_url = etree.SubElement(book_description, '{http://purl.org/dc/elements/1.1/}identifier.url') - if not options.force and book_url.text.startswith('http://'): - print '%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text) - continue - - book_url.text = BOOK_URL + slughifi(parent + title)[:60] - - doc.write(input_filename, xml_declaration=True, pretty_print=True, encoding='utf-8') - diff --git a/lib/librarian/bin/master.css b/lib/librarian/bin/master.css deleted file mode 100644 index 98e142b9..00000000 --- a/lib/librarian/bin/master.css +++ /dev/null @@ -1,207 +0,0 @@ -body { - font-size: 16px; - font: Georgia, "Times New Roman", serif; - line-height: 1.5em; - margin: 3em; - max-width: 36em; -} - -a { - color: blue; - text-decoration: none; -} - -/* =================================================== */ -/* = Common elements: headings, paragraphs and lines = */ -/* =================================================== */ -h1 { - font-size: 3em; - margin: 1.5em 0; - text-align: center; - line-height: 1.5em; - font-weight: bold; -} - -h2 { - font-size: 2em; - margin: 1.5em 0 0; - font-weight: bold; - line-height: 1.5em; -} - -h3 { - font-size: 1.5em; - margin: 1.5em 0 0; - font-weight: normal; - line-height: 1.5em; -} - -h4 { - font-size: 1em; - margin: 1.5em 0 0; - line-height: 1.5em; -} - -p { - margin: 0; -} - -/* ======================== */ -/* = Footnotes and themes = */ -/* ======================== */ -.theme-begin { - border-left: 0.1em solid #DDDDDD; - color: #777; - padding: 0 0.5em; - width: 7.5em; - font-style: normal; - font-weight: normal; - font-size: 16px; - position: absolute; - left: 40em; - line-height: 1.5em; - text-align: left; -} - -.annotation { - font-style: normal; - font-weight: normal; - font-size: 12px; -} - -#footnotes .annotation { - display: block; - float: left; - width: 2.5em; - clear: both; -} - -#footnotes div { - margin: 1.5em 0 0 0; -} - -#footnotes p { - margin-left: 2.5em; -} - - -/* ============= */ -/* = Numbering = */ -/* ============= */ -.anchor { - float: left; - margin: -0.2em -0.5em -0.2em -3.5em; - color: #777; - font-size: 12px; - width: 2em; - text-align: center; - padding: 0.2em 0.5em; -} - -.anchor:hover, .anchor:active { - color: #FFF; - background-color: #CCC; -} - - -/* =================== */ -/* = Custom elements = */ -/* =================== */ -span.author { - font-size: 0.75em; - display: block; - line-height: 1.5em; - margin-bottom: 0.25em; -} - -span.collection { - font-size: 0.75em; - display: block; - line-height: 1.5em; - margin-bottom: -0.25em; -} - -span.subtitle { - font-size: 0.75em; - display: block; - line-height: 1.5em; - margin-top: -0.25em; -} - -div.didaskalia { - font-style: italic; - margin: 0.5em 0 0; -} - -div.kwestia { - margin: 0.5em 0 0; -} - -div.stanza { - margin: 1.5em 0 0; -} - -div.kwestia div.stanza { - margin: 0; -} - -p.paragraph { - text-align: justify; - margin: 1.5em 0 0; -} - -p.motto { - text-align: justify; - font-style: italic; - margin: 1.5em 0 0; -} - -p.motto_podpis { - font-size: 0.875em; -} - -div.fragment { - border-bottom: 0.1em solid #999; - padding-bottom: 1.5em; -} - -div.note p, div.dedication p, div.note p.paragraph, div.dedication p.paragraph { - text-align: right; - font-style: italic; -} - -hr.spacer { - height: 3em; - visibility: hidden; -} - -hr.spacer-line { - margin: 1.5em 0; - border: none; - border-bottom: 0.1em solid #000; -} - -p.spacer-asterisk { - padding: 0; - margin: 1.5em 0; - text-align: center; -} - -div.person-list ol { - list-style: none; - padding: 0 0 0 1.5em; -} - -p.place-and-time { - font-style: italic; -} - -em.math, em.foreign-word, em.book-title, em.didaskalia, em.author-emphasis { - font-style: italic; -} - -em.person { - font-style: normal; - font-variant: small-caps; -} - diff --git a/lib/librarian/bin/master.plain.css b/lib/librarian/bin/master.plain.css deleted file mode 100644 index 3210e881..00000000 --- a/lib/librarian/bin/master.plain.css +++ /dev/null @@ -1,160 +0,0 @@ -body { - font-size: 16px; - font: Georgia, "Times New Roman", serif; - line-height: 1.5em; - margin: 3em; - max-width: 36em; -} - -a { - color: blue; - text-decoration: none; -} - -/* =================================================== */ -/* = Common elements: headings, paragraphs and lines = */ -/* =================================================== */ -h1 { - font-size: 3em; - margin: 1.5em 0; - text-align: center; - line-height: 1.5em; - font-weight: bold; -} - -h2 { - font-size: 2em; - margin: 1.5em 0 0; - font-weight: bold; - line-height: 1.5em; -} - -h3 { - font-size: 1.5em; - margin: 1.5em 0 0; - font-weight: normal; - line-height: 1.5em; -} - -h4 { - font-size: 1em; - margin: 1.5em 0 0; - line-height: 1.5em; -} - -p { - margin: 0; -} - -/* ======================== */ -/* = Footnotes and themes = */ -/* ======================== */ -.theme-begin { - border-left: 0.1em solid #DDDDDD; - color: #666; - float: right; - margin: 0 -9.5em 0 0; - padding: 0 0.5em; - width: 7.5em; - font-style: normal; - font-weight: normal; - font-size: 16px; - display: none; -} - -.annotation { - font-style: normal; - font-weight: normal; - font-size: 16px; - display: none; -} - -#footnotes { - display: none; -} - -#footnotes .annotation { - display: block; - float: left; - width: 2.5em; - clear: both; -} - -#footnotes div { - margin: 1.5em 0 0 0; -} - -#footnotes p { - margin-left: 2.5em; -} - -/* =================== */ -/* = Custom elements = */ -/* =================== */ -span.author { - font-size: 0.75em; - display: block; - line-height: 1.5em; - margin-bottom: 0.25em; -} - -span.collection { - font-size: 0.75em; - display: block; - line-height: 1.5em; - margin-bottom: -0.25em; -} - -span.subtitle { - font-size: 0.75em; - display: block; - line-height: 1.5em; - margin-top: -0.25em; -} - -div.didaskalia { - font-style: italic; - margin: 0.5em 0 0; -} - -div.kwestia { - margin: 0.5em 0 0; -} - -div.stanza { - margin: 1.5em 0 0; -} - -div.kwestia div.stanza { - margin: 0; -} - -p.paragraph { - text-align: justify; - margin: 1.5em 0 0; -} - -p.motto { - text-align: justify; - font-style: italic; - margin: 1.5em 0 0; -} - -p.motto_podpis { - font-size: 0.875em; -} - -div.fragment { - border-bottom: 0.1em solid #999; - padding-bottom: 1.5em; -} - -div.note p, div.note p.paragraph { - text-align: right; - font-style: italic; -} - -hr.spacer { - height: 3em; - visibility: hidden; -} diff --git a/lib/librarian/book2html.xslt b/lib/librarian/book2html.xslt deleted file mode 100644 index 71f11820..00000000 --- a/lib/librarian/book2html.xslt +++ /dev/null @@ -1,615 +0,0 @@ - - - - - - - -
- - -
-

Przypisy

- -
- - [] - - -

-
- - - -
-
-
-
-
-
- -
- - - - - - - - -

- -

-
- -
- - - - - - - -
-
- - -
-

-
    - -
-
-
- - -
-
- - -
- -
-
- - -
-
- - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - -

-
- - -

-
- - -

-
- - - -

-
- - -
-
- - -
  • -
    - - -

    -
    - - -
    - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -

    - - - padding-left: 1em - - - - - padding-left: em - - - padding-left: 1em - - - - - padding-left: 12em - - - -

    -
    - - -

    -
    - - - - - - - - - - [] - - - - - - - - - - - - - - - - - - „” - - - - - - - - - - - - - - - - - -
    -
    - - -

    *

    -
    - - -
    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - diff --git a/lib/librarian/dcparser.py b/lib/librarian/dcparser.py deleted file mode 100644 index 557509c9..00000000 --- a/lib/librarian/dcparser.py +++ /dev/null @@ -1,197 +0,0 @@ -# -*- coding: utf-8 -*- -from xml.parsers.expat import ExpatError -from datetime import date -import time - -# Import ElementTree from anywhere -try: - import xml.etree.ElementTree as etree # Python >= 2.5 -except ImportError: - try: - import elementtree.ElementTree as etree # effbot's pure Python module - except ImportError: - import lxml.etree as etree # ElementTree API using libxml2 - - -# ============== -# = Converters = -# ============== -class Person(object): - """Single person with last name and a list of first names.""" - def __init__(self, last_name, *first_names): - self.last_name = last_name - self.first_names = first_names - - - def __eq__(self, right): - return self.last_name == right.last_name and self.first_names == right.first_names - - - def __unicode__(self): - if len(self.first_names) > 0: - return '%s, %s' % (self.last_name, ' '.join(self.first_names)) - else: - return self.last_name - - - def __repr__(self): - return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names) - - -def str_to_unicode(value, previous): - return unicode(value) - - -def str_to_unicode_list(value, previous): - if previous is None: - previous = [] - previous.append(str_to_unicode(value, None)) - return previous - - -def str_to_person(value, previous): - comma_count = value.count(',') - - if comma_count == 0: - last_name, first_names = value, [] - elif comma_count == 1: - last_name, first_names = value.split(',') - first_names = [name for name in first_names.split(' ') if len(name)] - else: - raise ValueError("value contains more than one comma: %r" % value) - - return Person(last_name.strip(), *first_names) - - -def str_to_date(value, previous): - try: - t = time.strptime(value, '%Y-%m-%d') - except ValueError: - t = time.strptime(value, '%Y') - return date(t[0], t[1], t[2]) - - -# ========== -# = Parser = -# ========== -class ParseError(Exception): - def __init__(self, message): - super(ParseError, self).__init__(message) - - -class XMLNamespace(object): - '''Represents XML namespace.''' - - def __init__(self, uri): - self.uri = uri - - def __call__(self, tag): - return '{%s}%s' % (self.uri, tag) - - def __contains__(self, tag): - return tag.startswith(str(self)) - - def __repr__(self): - return 'XMLNamespace(%r)' % self.uri - - def __str__(self): - return '%s' % self.uri - - -class BookInfo(object): - RDF = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') - DC = XMLNamespace('http://purl.org/dc/elements/1.1/') - - mapping = { - DC('creator') : ('author', str_to_person), - DC('title') : ('title', str_to_unicode), - DC('subject.period') : ('epoch', str_to_unicode), - DC('subject.type') : ('kind', str_to_unicode), - DC('subject.genre') : ('genre', str_to_unicode), - DC('date') : ('created_at', str_to_date), - DC('date.pd') : ('released_to_public_domain_at', str_to_date), - DC('contributor.translator') : ('translator', str_to_person), - DC('contributor.technical_editor') : ('technical_editor', str_to_person), - DC('publisher') : ('publisher', str_to_unicode), - DC('source') : ('source_name', str_to_unicode), - DC('source.URL') : ('source_url', str_to_unicode), - DC('identifier.url') : ('url', str_to_unicode), - DC('relation.hasPart') : ('parts', str_to_unicode_list), - DC('rights.license') : ('license', str_to_unicode), - DC('rights') : ('license_description', str_to_unicode), - } - - @classmethod - def from_string(cls, xml): - from StringIO import StringIO - return cls.from_file(StringIO(xml)) - - @classmethod - def from_file(cls, xml_file): - book_info = cls() - - try: - tree = etree.parse(xml_file) - except ExpatError, e: - raise ParseError(e) - - description = tree.find('//' + book_info.RDF('Description')) - book_info.wiki_url = description.get(cls.RDF('about'), None) - - if description is None: - raise ParseError('no Description tag found in document') - - for element in description.findall('*'): - book_info.parse_element(element) - - return book_info - - def parse_element(self, element): - try: - attribute, converter = self.mapping[element.tag] - setattr(self, attribute, converter(element.text, getattr(self, attribute, None))) - except KeyError: - pass - - def to_xml(self): - """XML representation of this object.""" - etree._namespace_map[str(self.RDF)] = 'rdf' - etree._namespace_map[str(self.DC)] = 'dc' - - root = etree.Element(self.RDF('RDF')) - description = etree.SubElement(root, self.RDF('Description')) - - if self.wiki_url: - description.set(self.RDF('about'), self.wiki_url) - - for tag, (attribute, converter) in self.mapping.iteritems(): - if hasattr(self, attribute): - e = etree.Element(tag) - e.text = unicode(getattr(self, attribute)) - description.append(e) - - return unicode(etree.tostring(root, 'utf-8'), 'utf-8') - - def to_dict(self): - etree._namespace_map[str(self.RDF)] = 'rdf' - etree._namespace_map[str(self.DC)] = 'dc' - - result = {'about': self.wiki_url} - for tag, (attribute, converter) in self.mapping.iteritems(): - if hasattr(self, attribute): - result[attribute] = unicode(getattr(self, attribute)) - - return result - - -def parse(file_name): - return BookInfo.from_file(file_name) - - -if __name__ == '__main__': - import sys - - info = parse(sys.argv[1]) - for attribute, _ in BookInfo.mapping.values(): - print '%s: %r' % (attribute, getattr(info, attribute, None)) - diff --git a/lib/librarian/html.py b/lib/librarian/html.py deleted file mode 100644 index ad18952f..00000000 --- a/lib/librarian/html.py +++ /dev/null @@ -1,247 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import cStringIO -import re -import copy -import pkgutil - -from lxml import etree - - -ENTITY_SUBSTITUTIONS = [ - (u'---', u'—'), - (u'--', u'–'), - (u'...', u'…'), - (u',,', u'„'), - (u'"', u'”'), -] - - -def substitute_entities(context, text): - """XPath extension function converting all entites in passed text.""" - if isinstance(text, list): - text = ''.join(text) - for entity, substitutution in ENTITY_SUBSTITUTIONS: - text = text.replace(entity, substitutution) - return text - - -# Register substitute_entities function with lxml -ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') -ns['substitute_entities'] = substitute_entities - - -def transform(input_filename, output_filename=None, is_file=True): - """Transforms file input_filename in XML to output_filename in XHTML.""" - # Parse XSLT - style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt') - style = etree.parse(style_filename) - - doc_file = cStringIO.StringIO() - expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE); - - if is_file: - f = open(input_filename, 'rb') - input_filename = f.read() - f.close() - - data = input_filename.decode('utf-8') - data = expr.sub(u'
    \n', data) - doc_file.write(data.encode('utf-8')) - doc_file.seek(0); - - parser = etree.XMLParser(remove_blank_text=True) - doc = etree.parse(doc_file, parser) - - result = doc.xslt(style) - if result.find('//p') is not None: - add_anchors(result.getroot()) - add_table_of_contents(result.getroot()) - - if output_filename is not None: - result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8') - else: - return result - return True - else: - return False - - -class Fragment(object): - def __init__(self, id, themes): - super(Fragment, self).__init__() - self.id = id - self.themes = themes - self.events = [] - - def append(self, event, element): - self.events.append((event, element)) - - def closed_events(self): - stack = [] - for event, element in self.events: - if event == 'start': - stack.append(('end', element)) - elif event == 'end': - try: - stack.pop() - except IndexError: - print 'CLOSED NON-OPEN TAG:', element - - stack.reverse() - return self.events + stack - - def to_string(self): - result = [] - for event, element in self.closed_events(): - if event == 'start': - result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items()))) - if element.text: - result.append(element.text) - elif event == 'end': - result.append(u'' % element.tag) - if element.tail: - result.append(element.tail) - else: - result.append(element) - - return ''.join(result) - - def __unicode__(self): - return self.to_string() - - -def extract_fragments(input_filename): - """Extracts theme fragments from input_filename.""" - open_fragments = {} - closed_fragments = {} - - for event, element in etree.iterparse(input_filename, events=('start', 'end')): - # Process begin and end elements - if element.get('class', '') in ('theme-begin', 'theme-end'): - if not event == 'end': continue # Process elements only once, on end event - - # Open new fragment - if element.get('class', '') == 'theme-begin': - fragment = Fragment(id=element.get('fid'), themes=element.text) - - # Append parents - if element.getparent().get('id', None) != 'book-text': - parents = [element.getparent()] - while parents[-1].getparent().get('id', None) != 'book-text': - parents.append(parents[-1].getparent()) - - parents.reverse() - for parent in parents: - fragment.append('start', parent) - - open_fragments[fragment.id] = fragment - - # Close existing fragment - else: - try: - fragment = open_fragments[element.get('fid')] - except KeyError: - print '%s:closed not open fragment #%s' % (input_filename, element.get('fid')) - else: - closed_fragments[fragment.id] = fragment - del open_fragments[fragment.id] - - # Append element tail to lost_text (we don't want to lose any text) - if element.tail: - for fragment_id in open_fragments: - open_fragments[fragment_id].append('text', element.tail) - - - # Process all elements except begin and end - else: - # Omit annotation tags - if len(element.get('name', '')) or element.get('class', '') == 'annotation': - if event == 'end' and element.tail: - for fragment_id in open_fragments: - open_fragments[fragment_id].append('text', element.tail) - else: - for fragment_id in open_fragments: - open_fragments[fragment_id].append(event, copy.copy(element)) - - return closed_fragments, open_fragments - - -def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None): - if with_link: - if link_text is None: - link_text = prefix - anchor = etree.Element('a', href='#%s' % prefix) - anchor.set('class', 'anchor') - anchor.text = unicode(link_text) - if element.text: - anchor.tail = element.text - element.text = u'' - element.insert(0, anchor) - - if with_target: - anchor_target = etree.Element('a', name='%s' % prefix) - anchor_target.set('class', 'target') - anchor_target.text = u' ' - if element.text: - anchor_target.tail = element.text - element.text = u'' - element.insert(0, anchor_target) - - -def any_ancestor(element, test): - for ancestor in element.iterancestors(): - if test(ancestor): - return True - return False - - -def add_anchors(root): - counter = 1 - for element in root.iterdescendants(): - if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication') - or e.tag == 'blockquote'): - continue - - if element.tag == 'p' and 'verse' in element.get('class', ''): - if counter == 1 or counter % 5 == 0: - add_anchor(element, "f%d" % counter, link_text=counter) - counter += 1 - elif 'paragraph' in element.get('class', ''): - add_anchor(element, "f%d" % counter, link_text=counter) - counter += 1 - - -def add_table_of_contents(root): - sections = [] - counter = 1 - for element in root.iterdescendants(): - if element.tag in ('h2', 'h3'): - if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)): - continue - - if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2': - sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), [])) - else: - sections.append((counter, element.tag, ''.join(element.xpath('text()')), [])) - add_anchor(element, "s%d" % counter, with_link=False) - counter += 1 - - toc = etree.Element('div') - toc.set('id', 'toc') - toc_header = etree.SubElement(toc, 'h2') - toc_header.text = u'Spis treści' - toc_list = etree.SubElement(toc, 'ol') - - for n, section, text, subsections in sections: - section_element = etree.SubElement(toc_list, 'li') - add_anchor(section_element, "s%d" % n, with_target=False, link_text=text) - - if len(subsections): - subsection_list = etree.SubElement(section_element, 'ol') - for n, subsection, text, _ in subsections: - subsection_element = etree.SubElement(subsection_list, 'li') - add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text) - - root.insert(0, toc) - diff --git a/lib/librarian/tests/__init__.py b/lib/librarian/tests/__init__.py deleted file mode 100644 index c9b7f4c0..00000000 --- a/lib/librarian/tests/__init__.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- coding: utf-8 -*- -import unittest -from os.path import dirname, join, realpath - -from librarian import dcparser - - -def test_file_path(file_name): - return realpath(join(dirname(__file__), file_name)) - - -class TestDCParser(unittest.TestCase): - KNOWN_RESULTS = ( - ('andersen_brzydkie_kaczatko.xml', { - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczątko', - 'source_name': u'Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925', - 'author': u'Andersen, Hans Christian', - 'url': u'http://wolnelektury.pl/katalog/lektura/brzydkie-kaczatko', - 'created_at': u'2007-08-14', - 'title': u'Brzydkie kaczątko', - 'kind': u'Epika', - 'source_url': u'http://www.polona.pl/dlibra/doccontent2?id=3563&dirids=4', - 'translator': u'Niewiadomska, Cecylia', - 'released_to_public_domain_at': u'1925-01-01', - 'epoch': u'Romantyzm', - 'genre': u'Baśń', - 'technical_editor': u'Gałecki, Dariusz', - 'license_description': u'Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925', - }), - ('kochanowski_piesn7.xml', { - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)', - 'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976', - 'author': u'Kochanowski, Jan', - 'url': u'http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr', - 'created_at': u'2007-08-31', - 'title': u'Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...)', - 'kind': u'Liryka', - 'source_url': u'http://www.polona.pl/Content/1499', - 'released_to_public_domain_at': u'1584-01-01', - 'epoch': u'Renesans', - 'genre': u'Pieśń', - 'technical_editor': u'Gałecki, Dariusz', - 'license_description': u'Domena publiczna - Jan Kochanowski zm. 1584 ', - }), - ('mickiewicz_rybka.xml', { - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka', - 'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922', - 'author': u'Mickiewicz, Adam', - 'url': u'http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka', - 'created_at': u'2007-09-06', - 'title': u'Rybka', - 'kind': u'Liryka', - 'source_url': u'http://www.polona.pl/Content/2222', - 'released_to_public_domain_at': u'1855-01-01', - 'epoch': u'Romantyzm', - 'genre': u'Ballada', - 'technical_editor': u'Sutkowska, Olga', - 'license_description': u'Domena publiczna - Adam Mickiewicz zm. 1855', - }), - ('sofokles_antygona.xml', { - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona', - 'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939', - 'author': u'Sofokles', - 'url': u'http://wolnelektury.pl/katalog/lektura/antygona', - 'created_at': u'2007-08-30', - 'title': u'Antygona', - 'kind': u'Dramat', - 'source_url': u'http://www.polona.pl/Content/3768', - 'translator': u'Morawski, Kazimierz', - 'released_to_public_domain_at': u'1925-01-01', - 'epoch': u'Starożytność', - 'genre': u'Tragedia', - 'technical_editor': u'Gałecki, Dariusz', - 'license_description': u'Domena publiczna - tłumacz Kazimierz Morawski zm. 1925', - }), - ('biedrzycki_akslop.xml', { - 'publisher': u'Fundacja Nowoczesna Polska', - 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop', - 'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993', - 'author': u'Biedrzycki, Miłosz', - 'url': u'http://wolnelektury.pl/katalog/lektura/akslop', - 'created_at': u'2009-06-04', - 'title': u'Akslop', - 'kind': u'Liryka', - 'source_url': u'http://free.art.pl/mlb/gwiazdka.html#t1', - 'epoch': u'Współczesność', - 'genre': u'Wiersz', - 'technical_editor': u'Sutkowska, Olga', - 'license': u'http://creativecommons.org/licenses/by-sa/3.0/', - 'license_description': u'Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL' - }), - ) - - def test_parse(self): - for file_name, result in self.KNOWN_RESULTS: - self.assertEqual(dcparser.parse(test_file_path(file_name)).to_dict(), result) - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/lib/librarian/tests/andersen_brzydkie_kaczatko.xml b/lib/librarian/tests/andersen_brzydkie_kaczatko.xml deleted file mode 100644 index d653a9b5..00000000 --- a/lib/librarian/tests/andersen_brzydkie_kaczatko.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - Andersen, Hans Christian - Brzydkie kaczątko - Niewiadomska, Cecylia - Gałecki, Dariusz - Fundacja Nowoczesna Polska - Romantyzm - Epika - Baśń - Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. - http://wolnelektury.pl/katalog/lektura/brzydkie-kaczatko - http://www.polona.pl/dlibra/doccontent2?id=3563&dirids=4 - Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925 - Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925 - 1925 - xml - text - text - 2007-08-14 - SP1 - pol - - \ No newline at end of file diff --git a/lib/librarian/tests/biedrzycki_akslop.xml b/lib/librarian/tests/biedrzycki_akslop.xml deleted file mode 100644 index da0cd9fa..00000000 --- a/lib/librarian/tests/biedrzycki_akslop.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - Biedrzycki, Miłosz - Akslop - Sekuła, Aleksandra - Sutkowska, Olga - Fundacja Nowoczesna Polska - Współczesność - Liryka - Wiersz - Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). - http://wolnelektury.pl/katalog/lektura/akslop - http://free.art.pl/mlb/gwiazdka.html#t1 - Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993 - Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL - http://creativecommons.org/licenses/by-sa/3.0/ - xml - text - text - 2009-06-04 - L - pol - - \ No newline at end of file diff --git a/lib/librarian/tests/kochanowski_piesn7.xml b/lib/librarian/tests/kochanowski_piesn7.xml deleted file mode 100644 index 96be1ae0..00000000 --- a/lib/librarian/tests/kochanowski_piesn7.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - Kochanowski, Jan - Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...) - http://www.wolnelektury.pl/lektura/piesni-ksiegi-pierwsze - Sekuła, Aleksandra - Krzyżanowski, Julian - Otwinowska, Barbara - Gałecki, Dariusz - Fundacja Nowoczesna Polska - Renesans - Liryka - Pieśń - Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. - http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr - http://www.polona.pl/Content/1499 - Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976 - Domena publiczna - Jan Kochanowski zm. 1584 - 1584 - xml - text - text - 2007-08-31 - L - pol - - \ No newline at end of file diff --git a/lib/librarian/tests/mickiewicz_rybka.xml b/lib/librarian/tests/mickiewicz_rybka.xml deleted file mode 100644 index 0796a5b0..00000000 --- a/lib/librarian/tests/mickiewicz_rybka.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - Mickiewicz, Adam - Rybka - http://www.wolnelektury.pl/lektura/ballady-i-romanse - Sekuła, Aleksandra - Kallenbach, Józef - Sutkowska, Olga - Fundacja Nowoczesna Polska - Romantyzm - Liryka - Ballada - Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. - http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka - http://www.polona.pl/Content/2222 - Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922 - Domena publiczna - Adam Mickiewicz zm. 1855 - 1855 - xml - text - text - 2007-09-06 - SP2 - G - L - pol - - \ No newline at end of file diff --git a/lib/librarian/tests/sofokles_antygona.xml b/lib/librarian/tests/sofokles_antygona.xml deleted file mode 100644 index 4acb2d4f..00000000 --- a/lib/librarian/tests/sofokles_antygona.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - Sofokles - Antygona - Sekuła, Aleksandra - Morawski, Kazimierz - Gałecki, Dariusz - Fundacja Nowoczesna Polska - Starożytność - Dramat - Tragedia - Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. - http://wolnelektury.pl/katalog/lektura/antygona - http://www.polona.pl/Content/3768 - Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939 - Domena publiczna - tłumacz Kazimierz Morawski zm. 1925 - 1925 - xml - text - text - 2007-08-30 - G - pol - - \ No newline at end of file -- 2.20.1