From: Marek Stępniowski Date: Thu, 6 Aug 2009 09:37:23 +0000 (+0200) Subject: Pierwszy poważny import. X-Git-Url: https://git.mdrn.pl/redakcja.git/commitdiff_plain/38343a3fc11f5509c8522fec94c0ae7085b7244f Pierwszy poważny import. --- diff --git a/apps/explorer/__init__.py b/apps/explorer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/apps/explorer/forms.py b/apps/explorer/forms.py new file mode 100644 index 00000000..065a8153 --- /dev/null +++ b/apps/explorer/forms.py @@ -0,0 +1,6 @@ +from django import forms + + +class BookForm(forms.Form): + text = forms.CharField(widget=forms.Textarea) + diff --git a/apps/explorer/models.py b/apps/explorer/models.py new file mode 100644 index 00000000..e69de29b diff --git a/apps/explorer/views.py b/apps/explorer/views.py new file mode 100644 index 00000000..b1513e0c --- /dev/null +++ b/apps/explorer/views.py @@ -0,0 +1,42 @@ +from librarian import html +import hg + +from django.views.generic.simple import direct_to_template +from django.conf import settings +from django.http import HttpResponseRedirect + +from explorer import forms + + +repo = hg.Repository(settings.REPOSITORY_PATH) + + +def file_list(request): + return direct_to_template(request, 'explorer/file_list.html', extra_context={ + 'objects': repo.all_files(), + }) + + +def file_xml(request, path): + if request.method == 'POST': + form = forms.BookForm(request.POST) + if form.is_valid(): + repo.add_file(path, form.cleaned_data['text']) + repo.commit() + return HttpResponseRedirect('/') + else: + form = forms.BookForm() + form.fields['text'].initial = repo.get_file(path).data() + + return direct_to_template(request, 'explorer/file_xml.html', extra_context={ + 'hash': path, + 'form': form, + }) + + +def file_html(request, path): + return direct_to_template(request, 'explorer/file_html.html', extra_context={ + 'object': html.transform(repo.get_file(path).data(), is_file=False), + 'hash': path, + }) + \ No newline at end of file diff --git a/lib/hg.py b/lib/hg.py new file mode 100644 index 00000000..535707d8 --- /dev/null +++ b/lib/hg.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +import os +import codecs +from mercurial import localrepo, ui, error, match, node + + +class RepositoryDoesNotExist(Exception): + pass + + +class Repository(object): + """Abstrakcja repozytorium Mercurial. Działa z Mercurial w wersji 1.3.1.""" + + def __init__(self, path, create=False): + self.ui = ui.ui() + self.ui.config('ui', 'quiet', 'true') + self.ui.config('ui', 'interactive', 'false') + + self.real_path = os.path.realpath(path) + self.repo = self.open_repository(self.real_path, create) + self._pending_files = [] + + def open_repository(self, path, create=False): + if os.path.isdir(path): + try: + return localrepo.localrepository(self.ui, path) + except error.RepoError: + # dir is not an hg repo, we must init it + if create: + return localrepo.localrepository(self.ui, path, create=1) + elif create: + os.makedirs(path) + return localrepo.localrepository(self.ui, path, create=1) + raise RepositoryDoesNotExist("Repository %s does not exist." % path) + + def all_files(self): + return list(self.repo['tip']) + + def get_file(self, path): + ctx = self.repo.changectx(None) + return ctx.filectx(path) + + def add_file(self, path, value): + f = codecs.open(os.path.join(self.real_path, path), 'w', encoding='utf-8') + f.write(value) + f.close() + + if path not in self._pending_files: + self._pending_files.append(path) + + def commit(self, message='hgshelve auto commit', key=None, user=None): + """ + Commit unsynchronized data to disk. + Arguments:: + + - message: mercurial's changeset message + - key: supply to sync only one key + """ + commited = False + rev = None + files_to_add = [] + files_to_remove = [] + files_to_commit = [] + + # first of all, add absent data and clean removed + if key is None: + # will commit all keys + pending_files = self._pending_files + else: + if key not in self._pending_files: + # key isn't changed + return None + else: + pending_files = [key] + for path in pending_files: + files_to_commit.append(path) + if path in self.all_files(): + if not os.path.exists(os.path.join(self.real_path, path)): + # file removed + files_to_remove.append(path) + else: + # file added + files_to_add.append(path) + # hg add + if files_to_add: + self.repo.add(files_to_add) + # hg forget + if files_to_remove: + self.repo.forget(files_to_remove) + # ---- hg commit + if files_to_commit: + matcher = match.match(self.repo.root, self.repo.root, files_to_commit, default='path') + rev = self.repo.commit(message, user=user, match=matcher) + commited = True + # clean pending keys + for key in pending_files: + self._pending_files.remove(key) + # if commited: + # reread keys + # self._keys = self.get_persisted_objects_keys() + # return node.hex(rev) + \ No newline at end of file diff --git a/lib/librarian/__init__.py b/lib/librarian/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/librarian/bin/book2html.py b/lib/librarian/bin/book2html.py new file mode 100755 index 00000000..a0229bbe --- /dev/null +++ b/lib/librarian/bin/book2html.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +import os +import optparse + +from librarian import html + + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Convert SOURCE files to HTML format.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + # Do some real work + for input_filename in input_filenames: + if options.verbose: + print input_filename + + output_filename = os.path.splitext(input_filename)[0] + '.html' + html.transform(input_filename, output_filename) + diff --git a/lib/librarian/bin/book2txt.py b/lib/librarian/bin/book2txt.py new file mode 100755 index 00000000..9c470805 --- /dev/null +++ b/lib/librarian/bin/book2txt.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import re +import os +import optparse +import codecs + + +HEADER = u"""\ +Kodowanie znaków w dokumencie: UTF-8. +----- +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl/). Reprodukcja cyfrowa wykonana przez +Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. Ten utwór nie jest chroniony prawem autorskim i znajduje +się w domenie publicznej, co oznacza, że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. +----- + +""" + + +REGEXES = [ + (r']*>(.|\n)*?', ''), + (r']*>(.|\n)*?', ''), + ('<(begin|end)\\sid=[\'|"][b|e]\\d+[\'|"]\\s/>', ''), + (r'(()|())', ''), + (r'(.|\n)*?', ''), + (r'(.|\n)*?', ''), + (r'<[^>]+>', ''), + (r'/\n', '\n'), + (r'---', u'—'), + (r'--', u'-'), + (r',,', u'„'), + (r'"', u'”'), +] + + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Convert SOURCE files to TXT format.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + # Do some real work + for input_filename in input_filenames: + if options.verbose: + print input_filename + + output_filename = os.path.splitext(input_filename)[0] + '.txt' + + xml = codecs.open(input_filename, 'r', encoding='utf-8').read() + for pattern, repl in REGEXES: + # print pattern, repl + xml, n = re.subn(pattern, repl, xml) + # print n + + output = codecs.open(output_filename, 'w', encoding='utf-8') + output.write(HEADER) + output.write(xml) + diff --git a/lib/librarian/bin/bookfragments.py b/lib/librarian/bin/bookfragments.py new file mode 100755 index 00000000..f29e11e0 --- /dev/null +++ b/lib/librarian/bin/bookfragments.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +import os +import optparse + +from librarian import html + + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Extract theme fragments from SOURCE.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + # Do some real work + for input_filename in input_filenames: + if options.verbose: + print input_filename + + output_filename = os.path.splitext(input_filename)[0] + '.fragments.html' + + closed_fragments, open_fragments = html.extract_fragments(input_filename) + + for fragment_id in open_fragments: + print '%s:warning:unclosed fragment #%s' % (input_filename, fragment_id) + + output_file = open(output_filename, 'w') + output_file.write(""" + + + bookfragments output + + + + """) + for fragment in closed_fragments.values(): + fragment_html = u'

[#%s] %s

%s
' % (fragment.id, fragment.themes, fragment) + output_file.write(fragment_html.encode('utf-8')) + output_file.write('') + output_file.close() + diff --git a/lib/librarian/bin/genslugs.py b/lib/librarian/bin/genslugs.py new file mode 100755 index 00000000..3391d8e5 --- /dev/null +++ b/lib/librarian/bin/genslugs.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import optparse + +from lxml import etree +from librarian import html +from slughifi import slughifi + + +BOOK_URL = 'http://wolnelektury.pl/katalog/lektura/' + + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Generate slugs for SOURCE.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option('-f', '--force', action='store_true', dest='force', default=False, + help='overwrite current identifiers') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + # Do some real work + for input_filename in input_filenames: + if options.verbose: + print input_filename + + doc = etree.parse(input_filename) + try: + title = doc.find('//{http://purl.org/dc/elements/1.1/}title').text + except AttributeError: + print '%s:error:Book title not found. Skipping.' % input_filename + continue + + parent = '' + try: + parent_url = doc.find('//{http://purl.org/dc/elements/1.1/}relation.isPartOf').text + parent = parent_url.rsplit('/', 1)[1] + ' ' + except AttributeError: + pass + except IndexError: + print '%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url) + + book_url = doc.find('//{http://purl.org/dc/elements/1.1/}identifier.url') + if book_url is None: + book_description = doc.find('//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description') + book_url = etree.SubElement(book_description, '{http://purl.org/dc/elements/1.1/}identifier.url') + if not options.force and book_url.text.startswith('http://'): + print '%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text) + continue + + book_url.text = BOOK_URL + slughifi(parent + title)[:60] + + doc.write(input_filename, xml_declaration=True, pretty_print=True, encoding='utf-8') + diff --git a/lib/librarian/bin/master.css b/lib/librarian/bin/master.css new file mode 100644 index 00000000..98e142b9 --- /dev/null +++ b/lib/librarian/bin/master.css @@ -0,0 +1,207 @@ +body { + font-size: 16px; + font: Georgia, "Times New Roman", serif; + line-height: 1.5em; + margin: 3em; + max-width: 36em; +} + +a { + color: blue; + text-decoration: none; +} + +/* =================================================== */ +/* = Common elements: headings, paragraphs and lines = */ +/* =================================================== */ +h1 { + font-size: 3em; + margin: 1.5em 0; + text-align: center; + line-height: 1.5em; + font-weight: bold; +} + +h2 { + font-size: 2em; + margin: 1.5em 0 0; + font-weight: bold; + line-height: 1.5em; +} + +h3 { + font-size: 1.5em; + margin: 1.5em 0 0; + font-weight: normal; + line-height: 1.5em; +} + +h4 { + font-size: 1em; + margin: 1.5em 0 0; + line-height: 1.5em; +} + +p { + margin: 0; +} + +/* ======================== */ +/* = Footnotes and themes = */ +/* ======================== */ +.theme-begin { + border-left: 0.1em solid #DDDDDD; + color: #777; + padding: 0 0.5em; + width: 7.5em; + font-style: normal; + font-weight: normal; + font-size: 16px; + position: absolute; + left: 40em; + line-height: 1.5em; + text-align: left; +} + +.annotation { + font-style: normal; + font-weight: normal; + font-size: 12px; +} + +#footnotes .annotation { + display: block; + float: left; + width: 2.5em; + clear: both; +} + +#footnotes div { + margin: 1.5em 0 0 0; +} + +#footnotes p { + margin-left: 2.5em; +} + + +/* ============= */ +/* = Numbering = */ +/* ============= */ +.anchor { + float: left; + margin: -0.2em -0.5em -0.2em -3.5em; + color: #777; + font-size: 12px; + width: 2em; + text-align: center; + padding: 0.2em 0.5em; +} + +.anchor:hover, .anchor:active { + color: #FFF; + background-color: #CCC; +} + + +/* =================== */ +/* = Custom elements = */ +/* =================== */ +span.author { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-bottom: 0.25em; +} + +span.collection { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-bottom: -0.25em; +} + +span.subtitle { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-top: -0.25em; +} + +div.didaskalia { + font-style: italic; + margin: 0.5em 0 0; +} + +div.kwestia { + margin: 0.5em 0 0; +} + +div.stanza { + margin: 1.5em 0 0; +} + +div.kwestia div.stanza { + margin: 0; +} + +p.paragraph { + text-align: justify; + margin: 1.5em 0 0; +} + +p.motto { + text-align: justify; + font-style: italic; + margin: 1.5em 0 0; +} + +p.motto_podpis { + font-size: 0.875em; +} + +div.fragment { + border-bottom: 0.1em solid #999; + padding-bottom: 1.5em; +} + +div.note p, div.dedication p, div.note p.paragraph, div.dedication p.paragraph { + text-align: right; + font-style: italic; +} + +hr.spacer { + height: 3em; + visibility: hidden; +} + +hr.spacer-line { + margin: 1.5em 0; + border: none; + border-bottom: 0.1em solid #000; +} + +p.spacer-asterisk { + padding: 0; + margin: 1.5em 0; + text-align: center; +} + +div.person-list ol { + list-style: none; + padding: 0 0 0 1.5em; +} + +p.place-and-time { + font-style: italic; +} + +em.math, em.foreign-word, em.book-title, em.didaskalia, em.author-emphasis { + font-style: italic; +} + +em.person { + font-style: normal; + font-variant: small-caps; +} + diff --git a/lib/librarian/bin/master.plain.css b/lib/librarian/bin/master.plain.css new file mode 100644 index 00000000..3210e881 --- /dev/null +++ b/lib/librarian/bin/master.plain.css @@ -0,0 +1,160 @@ +body { + font-size: 16px; + font: Georgia, "Times New Roman", serif; + line-height: 1.5em; + margin: 3em; + max-width: 36em; +} + +a { + color: blue; + text-decoration: none; +} + +/* =================================================== */ +/* = Common elements: headings, paragraphs and lines = */ +/* =================================================== */ +h1 { + font-size: 3em; + margin: 1.5em 0; + text-align: center; + line-height: 1.5em; + font-weight: bold; +} + +h2 { + font-size: 2em; + margin: 1.5em 0 0; + font-weight: bold; + line-height: 1.5em; +} + +h3 { + font-size: 1.5em; + margin: 1.5em 0 0; + font-weight: normal; + line-height: 1.5em; +} + +h4 { + font-size: 1em; + margin: 1.5em 0 0; + line-height: 1.5em; +} + +p { + margin: 0; +} + +/* ======================== */ +/* = Footnotes and themes = */ +/* ======================== */ +.theme-begin { + border-left: 0.1em solid #DDDDDD; + color: #666; + float: right; + margin: 0 -9.5em 0 0; + padding: 0 0.5em; + width: 7.5em; + font-style: normal; + font-weight: normal; + font-size: 16px; + display: none; +} + +.annotation { + font-style: normal; + font-weight: normal; + font-size: 16px; + display: none; +} + +#footnotes { + display: none; +} + +#footnotes .annotation { + display: block; + float: left; + width: 2.5em; + clear: both; +} + +#footnotes div { + margin: 1.5em 0 0 0; +} + +#footnotes p { + margin-left: 2.5em; +} + +/* =================== */ +/* = Custom elements = */ +/* =================== */ +span.author { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-bottom: 0.25em; +} + +span.collection { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-bottom: -0.25em; +} + +span.subtitle { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-top: -0.25em; +} + +div.didaskalia { + font-style: italic; + margin: 0.5em 0 0; +} + +div.kwestia { + margin: 0.5em 0 0; +} + +div.stanza { + margin: 1.5em 0 0; +} + +div.kwestia div.stanza { + margin: 0; +} + +p.paragraph { + text-align: justify; + margin: 1.5em 0 0; +} + +p.motto { + text-align: justify; + font-style: italic; + margin: 1.5em 0 0; +} + +p.motto_podpis { + font-size: 0.875em; +} + +div.fragment { + border-bottom: 0.1em solid #999; + padding-bottom: 1.5em; +} + +div.note p, div.note p.paragraph { + text-align: right; + font-style: italic; +} + +hr.spacer { + height: 3em; + visibility: hidden; +} diff --git a/lib/librarian/book2html.xslt b/lib/librarian/book2html.xslt new file mode 100644 index 00000000..71f11820 --- /dev/null +++ b/lib/librarian/book2html.xslt @@ -0,0 +1,615 @@ + + + + + + + +
+ + +
+

Przypisy

+ +
+ + [] + + +

+
+ + + +
+
+
+
+
+
+ +
+ + + + + + + + +

+ +

+
+ +
+ + + + + + + +
+
+ + +
+

+
    + +
+
+
+ + +
+
+ + +
+ +
+
+ + +
+
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + +

+
+ + +

+
+ + +

+
+ + + +

+
+ + +
+
+ + +
  • +
    + + +

    +
    + + +
    + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +

    + + + padding-left: 1em + + + + + padding-left: em + + + padding-left: 1em + + + + + padding-left: 12em + + + +

    +
    + + +

    +
    + + + + + + + + + + [] + + + + + + + + + + + + + + + + + + „” + + + + + + + + + + + + + + + + + +
    +
    + + +

    *

    +
    + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + diff --git a/lib/librarian/dcparser.py b/lib/librarian/dcparser.py new file mode 100644 index 00000000..557509c9 --- /dev/null +++ b/lib/librarian/dcparser.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +from xml.parsers.expat import ExpatError +from datetime import date +import time + +# Import ElementTree from anywhere +try: + import xml.etree.ElementTree as etree # Python >= 2.5 +except ImportError: + try: + import elementtree.ElementTree as etree # effbot's pure Python module + except ImportError: + import lxml.etree as etree # ElementTree API using libxml2 + + +# ============== +# = Converters = +# ============== +class Person(object): + """Single person with last name and a list of first names.""" + def __init__(self, last_name, *first_names): + self.last_name = last_name + self.first_names = first_names + + + def __eq__(self, right): + return self.last_name == right.last_name and self.first_names == right.first_names + + + def __unicode__(self): + if len(self.first_names) > 0: + return '%s, %s' % (self.last_name, ' '.join(self.first_names)) + else: + return self.last_name + + + def __repr__(self): + return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names) + + +def str_to_unicode(value, previous): + return unicode(value) + + +def str_to_unicode_list(value, previous): + if previous is None: + previous = [] + previous.append(str_to_unicode(value, None)) + return previous + + +def str_to_person(value, previous): + comma_count = value.count(',') + + if comma_count == 0: + last_name, first_names = value, [] + elif comma_count == 1: + last_name, first_names = value.split(',') + first_names = [name for name in first_names.split(' ') if len(name)] + else: + raise ValueError("value contains more than one comma: %r" % value) + + return Person(last_name.strip(), *first_names) + + +def str_to_date(value, previous): + try: + t = time.strptime(value, '%Y-%m-%d') + except ValueError: + t = time.strptime(value, '%Y') + return date(t[0], t[1], t[2]) + + +# ========== +# = Parser = +# ========== +class ParseError(Exception): + def __init__(self, message): + super(ParseError, self).__init__(message) + + +class XMLNamespace(object): + '''Represents XML namespace.''' + + def __init__(self, uri): + self.uri = uri + + def __call__(self, tag): + return '{%s}%s' % (self.uri, tag) + + def __contains__(self, tag): + return tag.startswith(str(self)) + + def __repr__(self): + return 'XMLNamespace(%r)' % self.uri + + def __str__(self): + return '%s' % self.uri + + +class BookInfo(object): + RDF = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') + DC = XMLNamespace('http://purl.org/dc/elements/1.1/') + + mapping = { + DC('creator') : ('author', str_to_person), + DC('title') : ('title', str_to_unicode), + DC('subject.period') : ('epoch', str_to_unicode), + DC('subject.type') : ('kind', str_to_unicode), + DC('subject.genre') : ('genre', str_to_unicode), + DC('date') : ('created_at', str_to_date), + DC('date.pd') : ('released_to_public_domain_at', str_to_date), + DC('contributor.translator') : ('translator', str_to_person), + DC('contributor.technical_editor') : ('technical_editor', str_to_person), + DC('publisher') : ('publisher', str_to_unicode), + DC('source') : ('source_name', str_to_unicode), + DC('source.URL') : ('source_url', str_to_unicode), + DC('identifier.url') : ('url', str_to_unicode), + DC('relation.hasPart') : ('parts', str_to_unicode_list), + DC('rights.license') : ('license', str_to_unicode), + DC('rights') : ('license_description', str_to_unicode), + } + + @classmethod + def from_string(cls, xml): + from StringIO import StringIO + return cls.from_file(StringIO(xml)) + + @classmethod + def from_file(cls, xml_file): + book_info = cls() + + try: + tree = etree.parse(xml_file) + except ExpatError, e: + raise ParseError(e) + + description = tree.find('//' + book_info.RDF('Description')) + book_info.wiki_url = description.get(cls.RDF('about'), None) + + if description is None: + raise ParseError('no Description tag found in document') + + for element in description.findall('*'): + book_info.parse_element(element) + + return book_info + + def parse_element(self, element): + try: + attribute, converter = self.mapping[element.tag] + setattr(self, attribute, converter(element.text, getattr(self, attribute, None))) + except KeyError: + pass + + def to_xml(self): + """XML representation of this object.""" + etree._namespace_map[str(self.RDF)] = 'rdf' + etree._namespace_map[str(self.DC)] = 'dc' + + root = etree.Element(self.RDF('RDF')) + description = etree.SubElement(root, self.RDF('Description')) + + if self.wiki_url: + description.set(self.RDF('about'), self.wiki_url) + + for tag, (attribute, converter) in self.mapping.iteritems(): + if hasattr(self, attribute): + e = etree.Element(tag) + e.text = unicode(getattr(self, attribute)) + description.append(e) + + return unicode(etree.tostring(root, 'utf-8'), 'utf-8') + + def to_dict(self): + etree._namespace_map[str(self.RDF)] = 'rdf' + etree._namespace_map[str(self.DC)] = 'dc' + + result = {'about': self.wiki_url} + for tag, (attribute, converter) in self.mapping.iteritems(): + if hasattr(self, attribute): + result[attribute] = unicode(getattr(self, attribute)) + + return result + + +def parse(file_name): + return BookInfo.from_file(file_name) + + +if __name__ == '__main__': + import sys + + info = parse(sys.argv[1]) + for attribute, _ in BookInfo.mapping.values(): + print '%s: %r' % (attribute, getattr(info, attribute, None)) + diff --git a/lib/librarian/html.py b/lib/librarian/html.py new file mode 100644 index 00000000..ad18952f --- /dev/null +++ b/lib/librarian/html.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +import os +import cStringIO +import re +import copy +import pkgutil + +from lxml import etree + + +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] + + +def substitute_entities(context, text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + + +# Register substitute_entities function with lxml +ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') +ns['substitute_entities'] = substitute_entities + + +def transform(input_filename, output_filename=None, is_file=True): + """Transforms file input_filename in XML to output_filename in XHTML.""" + # Parse XSLT + style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt') + style = etree.parse(style_filename) + + doc_file = cStringIO.StringIO() + expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE); + + if is_file: + f = open(input_filename, 'rb') + input_filename = f.read() + f.close() + + data = input_filename.decode('utf-8') + data = expr.sub(u'
    \n', data) + doc_file.write(data.encode('utf-8')) + doc_file.seek(0); + + parser = etree.XMLParser(remove_blank_text=True) + doc = etree.parse(doc_file, parser) + + result = doc.xslt(style) + if result.find('//p') is not None: + add_anchors(result.getroot()) + add_table_of_contents(result.getroot()) + + if output_filename is not None: + result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8') + else: + return result + return True + else: + return False + + +class Fragment(object): + def __init__(self, id, themes): + super(Fragment, self).__init__() + self.id = id + self.themes = themes + self.events = [] + + def append(self, event, element): + self.events.append((event, element)) + + def closed_events(self): + stack = [] + for event, element in self.events: + if event == 'start': + stack.append(('end', element)) + elif event == 'end': + try: + stack.pop() + except IndexError: + print 'CLOSED NON-OPEN TAG:', element + + stack.reverse() + return self.events + stack + + def to_string(self): + result = [] + for event, element in self.closed_events(): + if event == 'start': + result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items()))) + if element.text: + result.append(element.text) + elif event == 'end': + result.append(u'' % element.tag) + if element.tail: + result.append(element.tail) + else: + result.append(element) + + return ''.join(result) + + def __unicode__(self): + return self.to_string() + + +def extract_fragments(input_filename): + """Extracts theme fragments from input_filename.""" + open_fragments = {} + closed_fragments = {} + + for event, element in etree.iterparse(input_filename, events=('start', 'end')): + # Process begin and end elements + if element.get('class', '') in ('theme-begin', 'theme-end'): + if not event == 'end': continue # Process elements only once, on end event + + # Open new fragment + if element.get('class', '') == 'theme-begin': + fragment = Fragment(id=element.get('fid'), themes=element.text) + + # Append parents + if element.getparent().get('id', None) != 'book-text': + parents = [element.getparent()] + while parents[-1].getparent().get('id', None) != 'book-text': + parents.append(parents[-1].getparent()) + + parents.reverse() + for parent in parents: + fragment.append('start', parent) + + open_fragments[fragment.id] = fragment + + # Close existing fragment + else: + try: + fragment = open_fragments[element.get('fid')] + except KeyError: + print '%s:closed not open fragment #%s' % (input_filename, element.get('fid')) + else: + closed_fragments[fragment.id] = fragment + del open_fragments[fragment.id] + + # Append element tail to lost_text (we don't want to lose any text) + if element.tail: + for fragment_id in open_fragments: + open_fragments[fragment_id].append('text', element.tail) + + + # Process all elements except begin and end + else: + # Omit annotation tags + if len(element.get('name', '')) or element.get('class', '') == 'annotation': + if event == 'end' and element.tail: + for fragment_id in open_fragments: + open_fragments[fragment_id].append('text', element.tail) + else: + for fragment_id in open_fragments: + open_fragments[fragment_id].append(event, copy.copy(element)) + + return closed_fragments, open_fragments + + +def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None): + if with_link: + if link_text is None: + link_text = prefix + anchor = etree.Element('a', href='#%s' % prefix) + anchor.set('class', 'anchor') + anchor.text = unicode(link_text) + if element.text: + anchor.tail = element.text + element.text = u'' + element.insert(0, anchor) + + if with_target: + anchor_target = etree.Element('a', name='%s' % prefix) + anchor_target.set('class', 'target') + anchor_target.text = u' ' + if element.text: + anchor_target.tail = element.text + element.text = u'' + element.insert(0, anchor_target) + + +def any_ancestor(element, test): + for ancestor in element.iterancestors(): + if test(ancestor): + return True + return False + + +def add_anchors(root): + counter = 1 + for element in root.iterdescendants(): + if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication') + or e.tag == 'blockquote'): + continue + + if element.tag == 'p' and 'verse' in element.get('class', ''): + if counter == 1 or counter % 5 == 0: + add_anchor(element, "f%d" % counter, link_text=counter) + counter += 1 + elif 'paragraph' in element.get('class', ''): + add_anchor(element, "f%d" % counter, link_text=counter) + counter += 1 + + +def add_table_of_contents(root): + sections = [] + counter = 1 + for element in root.iterdescendants(): + if element.tag in ('h2', 'h3'): + if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)): + continue + + if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2': + sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), [])) + else: + sections.append((counter, element.tag, ''.join(element.xpath('text()')), [])) + add_anchor(element, "s%d" % counter, with_link=False) + counter += 1 + + toc = etree.Element('div') + toc.set('id', 'toc') + toc_header = etree.SubElement(toc, 'h2') + toc_header.text = u'Spis treści' + toc_list = etree.SubElement(toc, 'ol') + + for n, section, text, subsections in sections: + section_element = etree.SubElement(toc_list, 'li') + add_anchor(section_element, "s%d" % n, with_target=False, link_text=text) + + if len(subsections): + subsection_list = etree.SubElement(section_element, 'ol') + for n, subsection, text, _ in subsections: + subsection_element = etree.SubElement(subsection_list, 'li') + add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text) + + root.insert(0, toc) + diff --git a/lib/librarian/tests/__init__.py b/lib/librarian/tests/__init__.py new file mode 100644 index 00000000..c9b7f4c0 --- /dev/null +++ b/lib/librarian/tests/__init__.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +import unittest +from os.path import dirname, join, realpath + +from librarian import dcparser + + +def test_file_path(file_name): + return realpath(join(dirname(__file__), file_name)) + + +class TestDCParser(unittest.TestCase): + KNOWN_RESULTS = ( + ('andersen_brzydkie_kaczatko.xml', { + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Andersen/Brzydkie_kaczątko', + 'source_name': u'Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925', + 'author': u'Andersen, Hans Christian', + 'url': u'http://wolnelektury.pl/katalog/lektura/brzydkie-kaczatko', + 'created_at': u'2007-08-14', + 'title': u'Brzydkie kaczątko', + 'kind': u'Epika', + 'source_url': u'http://www.polona.pl/dlibra/doccontent2?id=3563&dirids=4', + 'translator': u'Niewiadomska, Cecylia', + 'released_to_public_domain_at': u'1925-01-01', + 'epoch': u'Romantyzm', + 'genre': u'Baśń', + 'technical_editor': u'Gałecki, Dariusz', + 'license_description': u'Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925', + }), + ('kochanowski_piesn7.xml', { + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': u'http://wiki.wolnepodreczniki.pl/Lektury:Kochanowski/Pieśni/Pieśń_VII_(1)', + 'source_name': u'Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976', + 'author': u'Kochanowski, Jan', + 'url': u'http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr', + 'created_at': u'2007-08-31', + 'title': u'Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...)', + 'kind': u'Liryka', + 'source_url': u'http://www.polona.pl/Content/1499', + 'released_to_public_domain_at': u'1584-01-01', + 'epoch': u'Renesans', + 'genre': u'Pieśń', + 'technical_editor': u'Gałecki, Dariusz', + 'license_description': u'Domena publiczna - Jan Kochanowski zm. 1584 ', + }), + ('mickiewicz_rybka.xml', { + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Mickiewicz/Ballady/Rybka', + 'source_name': u'Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922', + 'author': u'Mickiewicz, Adam', + 'url': u'http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka', + 'created_at': u'2007-09-06', + 'title': u'Rybka', + 'kind': u'Liryka', + 'source_url': u'http://www.polona.pl/Content/2222', + 'released_to_public_domain_at': u'1855-01-01', + 'epoch': u'Romantyzm', + 'genre': u'Ballada', + 'technical_editor': u'Sutkowska, Olga', + 'license_description': u'Domena publiczna - Adam Mickiewicz zm. 1855', + }), + ('sofokles_antygona.xml', { + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Sofokles/Antygona', + 'source_name': u'Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939', + 'author': u'Sofokles', + 'url': u'http://wolnelektury.pl/katalog/lektura/antygona', + 'created_at': u'2007-08-30', + 'title': u'Antygona', + 'kind': u'Dramat', + 'source_url': u'http://www.polona.pl/Content/3768', + 'translator': u'Morawski, Kazimierz', + 'released_to_public_domain_at': u'1925-01-01', + 'epoch': u'Starożytność', + 'genre': u'Tragedia', + 'technical_editor': u'Gałecki, Dariusz', + 'license_description': u'Domena publiczna - tłumacz Kazimierz Morawski zm. 1925', + }), + ('biedrzycki_akslop.xml', { + 'publisher': u'Fundacja Nowoczesna Polska', + 'about': 'http://wiki.wolnepodreczniki.pl/Lektury:Biedrzycki/Akslop', + 'source_name': u'Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993', + 'author': u'Biedrzycki, Miłosz', + 'url': u'http://wolnelektury.pl/katalog/lektura/akslop', + 'created_at': u'2009-06-04', + 'title': u'Akslop', + 'kind': u'Liryka', + 'source_url': u'http://free.art.pl/mlb/gwiazdka.html#t1', + 'epoch': u'Współczesność', + 'genre': u'Wiersz', + 'technical_editor': u'Sutkowska, Olga', + 'license': u'http://creativecommons.org/licenses/by-sa/3.0/', + 'license_description': u'Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL' + }), + ) + + def test_parse(self): + for file_name, result in self.KNOWN_RESULTS: + self.assertEqual(dcparser.parse(test_file_path(file_name)).to_dict(), result) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/lib/librarian/tests/andersen_brzydkie_kaczatko.xml b/lib/librarian/tests/andersen_brzydkie_kaczatko.xml new file mode 100644 index 00000000..d653a9b5 --- /dev/null +++ b/lib/librarian/tests/andersen_brzydkie_kaczatko.xml @@ -0,0 +1,24 @@ + + + Andersen, Hans Christian + Brzydkie kaczątko + Niewiadomska, Cecylia + Gałecki, Dariusz + Fundacja Nowoczesna Polska + Romantyzm + Epika + Baśń + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + http://wolnelektury.pl/katalog/lektura/brzydkie-kaczatko + http://www.polona.pl/dlibra/doccontent2?id=3563&dirids=4 + Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925 + Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925 + 1925 + xml + text + text + 2007-08-14 + SP1 + pol + + \ No newline at end of file diff --git a/lib/librarian/tests/biedrzycki_akslop.xml b/lib/librarian/tests/biedrzycki_akslop.xml new file mode 100644 index 00000000..da0cd9fa --- /dev/null +++ b/lib/librarian/tests/biedrzycki_akslop.xml @@ -0,0 +1,25 @@ + + + Biedrzycki, Miłosz + Akslop + Sekuła, Aleksandra + Sutkowska, Olga + Fundacja Nowoczesna Polska + Współczesność + Liryka + Wiersz + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). + http://wolnelektury.pl/katalog/lektura/akslop + http://free.art.pl/mlb/gwiazdka.html#t1 + Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993 + Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL + http://creativecommons.org/licenses/by-sa/3.0/ + xml + text + text + 2009-06-04 + L + pol + + \ No newline at end of file diff --git a/lib/librarian/tests/kochanowski_piesn7.xml b/lib/librarian/tests/kochanowski_piesn7.xml new file mode 100644 index 00000000..96be1ae0 --- /dev/null +++ b/lib/librarian/tests/kochanowski_piesn7.xml @@ -0,0 +1,27 @@ + + + Kochanowski, Jan + Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...) + http://www.wolnelektury.pl/lektura/piesni-ksiegi-pierwsze + Sekuła, Aleksandra + Krzyżanowski, Julian + Otwinowska, Barbara + Gałecki, Dariusz + Fundacja Nowoczesna Polska + Renesans + Liryka + Pieśń + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr + http://www.polona.pl/Content/1499 + Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976 + Domena publiczna - Jan Kochanowski zm. 1584 + 1584 + xml + text + text + 2007-08-31 + L + pol + + \ No newline at end of file diff --git a/lib/librarian/tests/mickiewicz_rybka.xml b/lib/librarian/tests/mickiewicz_rybka.xml new file mode 100644 index 00000000..0796a5b0 --- /dev/null +++ b/lib/librarian/tests/mickiewicz_rybka.xml @@ -0,0 +1,28 @@ + + + Mickiewicz, Adam + Rybka + http://www.wolnelektury.pl/lektura/ballady-i-romanse + Sekuła, Aleksandra + Kallenbach, Józef + Sutkowska, Olga + Fundacja Nowoczesna Polska + Romantyzm + Liryka + Ballada + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka + http://www.polona.pl/Content/2222 + Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922 + Domena publiczna - Adam Mickiewicz zm. 1855 + 1855 + xml + text + text + 2007-09-06 + SP2 + G + L + pol + + \ No newline at end of file diff --git a/lib/librarian/tests/sofokles_antygona.xml b/lib/librarian/tests/sofokles_antygona.xml new file mode 100644 index 00000000..4acb2d4f --- /dev/null +++ b/lib/librarian/tests/sofokles_antygona.xml @@ -0,0 +1,25 @@ + + + Sofokles + Antygona + Sekuła, Aleksandra + Morawski, Kazimierz + Gałecki, Dariusz + Fundacja Nowoczesna Polska + Starożytność + Dramat + Tragedia + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + http://wolnelektury.pl/katalog/lektura/antygona + http://www.polona.pl/Content/3768 + Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939 + Domena publiczna - tłumacz Kazimierz Morawski zm. 1925 + 1925 + xml + text + text + 2007-08-30 + G + pol + + \ No newline at end of file diff --git a/project/settings.py b/project/settings.py index 6f0f6941..ea7114d8 100644 --- a/project/settings.py +++ b/project/settings.py @@ -100,8 +100,11 @@ INSTALLED_APPS = ( 'django.contrib.sites', 'django.contrib.admin', 'django.contrib.admindocs', + + 'explorer' ) +REPOSITORY_PATH = '/Users/zuber/Projekty/books/01' try: from localsettings import * diff --git a/project/static/css/master.css b/project/static/css/master.css new file mode 100644 index 00000000..1bc59daa --- /dev/null +++ b/project/static/css/master.css @@ -0,0 +1,69 @@ +body { + margin: 0; + font: 100%/1.5 Georgia, Verdana, sans-serif; +} + +#breadcrumbs { + padding: 5px 10px; + background-color: #CDCDCD; + border-bottom: 1px solid #858585; +} + +ul { + margin: 20px; + padding: 0; +} + +li { + margin: 0; + padding: 0; + list-style: none; +} + +a { + text-decoration: none; +} + +a:hover { + text-decoration: underline; +} + +li a { + display: block; + padding: 5px 20px; + width: 480px; +} + +li a:hover { + background-color: #BCD6E9; +} + +#tabs { + border-bottom: 1px solid #858585; + background-color: #B7B7B7; +} + +#tabs a { + display: block; + border-right: 1px solid #858585; + float: left; + padding: 5px 10px; +} + +#tabs a.active { + background-color: #909090; +} + +.theme-begin { + display: none; +} + +textarea { + width: 97%; + height: 42em; + padding: 5px 10px; +} + +#file-text { + padding: 5px 10px; +} diff --git a/project/templates/base.html b/project/templates/base.html new file mode 100644 index 00000000..7e2457bd --- /dev/null +++ b/project/templates/base.html @@ -0,0 +1,17 @@ + + + + + {% block title %}Platforma Redakcyjna{% endblock %} + + {% block extrahead %} + {% endblock %} + + + + {% block maincontent %} + + {% endblock %} + + diff --git a/project/templates/explorer/file_html.html b/project/templates/explorer/file_html.html new file mode 100644 index 00000000..8fed0828 --- /dev/null +++ b/project/templates/explorer/file_html.html @@ -0,0 +1,10 @@ +{% extends "base.html" %} + +{% block breadcrumbs %}Platforma Redakcyjna ❯ plik {{ hash }}{% endblock breadcrumbs %} + +{% block maincontent %} + +
    + {{ object|safe }} +
    +{% endblock maincontent %} \ No newline at end of file diff --git a/project/templates/explorer/file_list.html b/project/templates/explorer/file_list.html new file mode 100644 index 00000000..7063d293 --- /dev/null +++ b/project/templates/explorer/file_list.html @@ -0,0 +1,9 @@ +{% extends "base.html" %} + +{% block maincontent %} +
      +{% for blob in objects %} +
    • {{ blob }}
    • +{% endfor %} +
    +{% endblock maincontent %} diff --git a/project/templates/explorer/file_xml.html b/project/templates/explorer/file_xml.html new file mode 100644 index 00000000..14ae61ec --- /dev/null +++ b/project/templates/explorer/file_xml.html @@ -0,0 +1,11 @@ +{% extends "base.html" %} + +{% block breadcrumbs %}Platforma Redakcyjna ❯ plik {{ hash }}{% endblock breadcrumbs %} + +{% block maincontent %} + +
    + {{ form }} +

    +
    +{% endblock maincontent %} diff --git a/project/urls.py b/project/urls.py index e3399662..c3b14c87 100644 --- a/project/urls.py +++ b/project/urls.py @@ -8,7 +8,9 @@ admin.autodiscover() urlpatterns = patterns('', # Example: - # (r'^platforma/', include('platforma.foo.urls')), + url(r'^$', 'explorer.views.file_list', name='file_list'), + url(r'^file/(?P[^/]+)/$', 'explorer.views.file_xml', name='file_xml'), + url(r'^html/(?P[^/]+)/', 'explorer.views.file_html', name='file_html'), # Admin panel url(r'^admin/doc/', include('django.contrib.admindocs.urls')),