From 07fdba2c7fe8e11b6867712d47bdd608e88c29fb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20St=C4=99pniowski?= Date: Fri, 19 Mar 2010 16:59:37 +0100 Subject: [PATCH 1/1] Extracted from project "Wolnelektury.pl". Version 1.1 --- .gitignore | 4 + MANIFEST.in | 2 + librarian/__init__.py | 0 librarian/__init__.pyc | Bin 0 -> 151 bytes librarian/book2html.xslt | 615 ++++++++++++++++++ librarian/book2txt.xslt | 321 +++++++++ librarian/dcparser.py | 197 ++++++ librarian/dcparser.pyc | Bin 0 -> 8696 bytes librarian/html.py | 242 +++++++ librarian/html.pyc | Bin 0 -> 8882 bytes librarian/tests/__init__.py | 115 ++++ librarian/tests/files/dcparser/.DS_Store | Bin 0 -> 6148 bytes .../dcparser/andersen_brzydkie_kaczatko.xml | 24 + .../files/dcparser/biedrzycki_akslop.xml | 25 + .../files/dcparser/kochanowski_piesn7.xml | 27 + .../tests/files/dcparser/mickiewicz_rybka.xml | 28 + .../files/dcparser/sofokles_antygona.xml | 25 + .../files/erroneous/asnyk_miedzy_nami.html | 46 ++ .../files/erroneous/asnyk_miedzy_nami.xml | 25 + librarian/text.py | 89 +++ librarian/text.pyc | Bin 0 -> 2993 bytes scripts/book2html | 31 + scripts/book2txt | 31 + scripts/bookfragments | 50 ++ scripts/genslugs | 64 ++ scripts/master.css | 207 ++++++ scripts/master.plain.css | 160 +++++ setup.py | 19 + 28 files changed, 2347 insertions(+) create mode 100644 .gitignore create mode 100644 MANIFEST.in create mode 100644 librarian/__init__.py create mode 100644 librarian/__init__.pyc create mode 100644 librarian/book2html.xslt create mode 100644 librarian/book2txt.xslt create mode 100644 librarian/dcparser.py create mode 100644 librarian/dcparser.pyc create mode 100644 librarian/html.py create mode 100644 librarian/html.pyc create mode 100644 librarian/tests/__init__.py create mode 100644 librarian/tests/files/dcparser/.DS_Store create mode 100644 librarian/tests/files/dcparser/andersen_brzydkie_kaczatko.xml create mode 100644 librarian/tests/files/dcparser/biedrzycki_akslop.xml create mode 100644 librarian/tests/files/dcparser/kochanowski_piesn7.xml create mode 100644 librarian/tests/files/dcparser/mickiewicz_rybka.xml create mode 100644 librarian/tests/files/dcparser/sofokles_antygona.xml create mode 100644 librarian/tests/files/erroneous/asnyk_miedzy_nami.html create mode 100644 librarian/tests/files/erroneous/asnyk_miedzy_nami.xml create mode 100644 librarian/text.py create mode 100644 librarian/text.pyc create mode 100755 scripts/book2html create mode 100755 scripts/book2txt create mode 100755 scripts/bookfragments create mode 100755 scripts/genslugs create mode 100644 scripts/master.css create mode 100644 scripts/master.plain.css create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7189e7b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +*.pyc +MANIFEST +dist diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..4c76fc3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include librarian/*.xslt +recursive-include librarian/tests/files/ *.xml diff --git a/librarian/__init__.py b/librarian/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/librarian/__init__.pyc b/librarian/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d4eb1301d13decc7aa425513c20d7c2e466f51b GIT binary patch literal 151 zcmdn|iI+?LgQ{mT0~9a + + + + + + +
+ + +
+

Przypisy

+ +
+ + [] + + +

+
+ + + +
+
+
+
+
+
+ +
+ + + + + + + + +

+ +

+
+ +
+ + + + + + + +
+
+ + +
+

+
    + +
+
+
+ + +
+
+ + +
+ +
+
+ + +
+
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + +

+
+ + +

+
+ + +

+
+ + + +

+
+ + +
+
+ + +
  • +
    + + +

    +
    + + +
    + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +

    + + + padding-left: 1em + + + + + padding-left: em + + + padding-left: 1em + + + + + padding-left: 12em + + + +

    +
    + + +

    +
    + + + + + + + + + + [] + + + + + + + + + + + + + + + + + + „” + + + + + + + + + + + + + + + + + +
    +
    + + +

    *

    +
    + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/librarian/book2txt.xslt b/librarian/book2txt.xslt new file mode 100644 index 0000000..cd98524 --- /dev/null +++ b/librarian/book2txt.xslt @@ -0,0 +1,321 @@ + + + + + + + + + +Kodowanie znaków w dokumencie: UTF-8. +----- +Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl/). Reprodukcja cyfrowa wykonana przez +Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. Ten utwór nie jest chroniony prawem autorskim i znajduje +się w domenie publicznej, co oznacza, że możesz go swobodnie wykorzystywać, publikować i rozpowszechniać. + +Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie %s. +----- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +/ / + + + + + * + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +„” + + + +** + + + + + + + + + + + + + + + + + + + + + + +* + + + + + + + +------------------------------------------------ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/librarian/dcparser.py b/librarian/dcparser.py new file mode 100644 index 0000000..557509c --- /dev/null +++ b/librarian/dcparser.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +from xml.parsers.expat import ExpatError +from datetime import date +import time + +# Import ElementTree from anywhere +try: + import xml.etree.ElementTree as etree # Python >= 2.5 +except ImportError: + try: + import elementtree.ElementTree as etree # effbot's pure Python module + except ImportError: + import lxml.etree as etree # ElementTree API using libxml2 + + +# ============== +# = Converters = +# ============== +class Person(object): + """Single person with last name and a list of first names.""" + def __init__(self, last_name, *first_names): + self.last_name = last_name + self.first_names = first_names + + + def __eq__(self, right): + return self.last_name == right.last_name and self.first_names == right.first_names + + + def __unicode__(self): + if len(self.first_names) > 0: + return '%s, %s' % (self.last_name, ' '.join(self.first_names)) + else: + return self.last_name + + + def __repr__(self): + return 'Person(last_name=%r, first_names=*%r)' % (self.last_name, self.first_names) + + +def str_to_unicode(value, previous): + return unicode(value) + + +def str_to_unicode_list(value, previous): + if previous is None: + previous = [] + previous.append(str_to_unicode(value, None)) + return previous + + +def str_to_person(value, previous): + comma_count = value.count(',') + + if comma_count == 0: + last_name, first_names = value, [] + elif comma_count == 1: + last_name, first_names = value.split(',') + first_names = [name for name in first_names.split(' ') if len(name)] + else: + raise ValueError("value contains more than one comma: %r" % value) + + return Person(last_name.strip(), *first_names) + + +def str_to_date(value, previous): + try: + t = time.strptime(value, '%Y-%m-%d') + except ValueError: + t = time.strptime(value, '%Y') + return date(t[0], t[1], t[2]) + + +# ========== +# = Parser = +# ========== +class ParseError(Exception): + def __init__(self, message): + super(ParseError, self).__init__(message) + + +class XMLNamespace(object): + '''Represents XML namespace.''' + + def __init__(self, uri): + self.uri = uri + + def __call__(self, tag): + return '{%s}%s' % (self.uri, tag) + + def __contains__(self, tag): + return tag.startswith(str(self)) + + def __repr__(self): + return 'XMLNamespace(%r)' % self.uri + + def __str__(self): + return '%s' % self.uri + + +class BookInfo(object): + RDF = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') + DC = XMLNamespace('http://purl.org/dc/elements/1.1/') + + mapping = { + DC('creator') : ('author', str_to_person), + DC('title') : ('title', str_to_unicode), + DC('subject.period') : ('epoch', str_to_unicode), + DC('subject.type') : ('kind', str_to_unicode), + DC('subject.genre') : ('genre', str_to_unicode), + DC('date') : ('created_at', str_to_date), + DC('date.pd') : ('released_to_public_domain_at', str_to_date), + DC('contributor.translator') : ('translator', str_to_person), + DC('contributor.technical_editor') : ('technical_editor', str_to_person), + DC('publisher') : ('publisher', str_to_unicode), + DC('source') : ('source_name', str_to_unicode), + DC('source.URL') : ('source_url', str_to_unicode), + DC('identifier.url') : ('url', str_to_unicode), + DC('relation.hasPart') : ('parts', str_to_unicode_list), + DC('rights.license') : ('license', str_to_unicode), + DC('rights') : ('license_description', str_to_unicode), + } + + @classmethod + def from_string(cls, xml): + from StringIO import StringIO + return cls.from_file(StringIO(xml)) + + @classmethod + def from_file(cls, xml_file): + book_info = cls() + + try: + tree = etree.parse(xml_file) + except ExpatError, e: + raise ParseError(e) + + description = tree.find('//' + book_info.RDF('Description')) + book_info.wiki_url = description.get(cls.RDF('about'), None) + + if description is None: + raise ParseError('no Description tag found in document') + + for element in description.findall('*'): + book_info.parse_element(element) + + return book_info + + def parse_element(self, element): + try: + attribute, converter = self.mapping[element.tag] + setattr(self, attribute, converter(element.text, getattr(self, attribute, None))) + except KeyError: + pass + + def to_xml(self): + """XML representation of this object.""" + etree._namespace_map[str(self.RDF)] = 'rdf' + etree._namespace_map[str(self.DC)] = 'dc' + + root = etree.Element(self.RDF('RDF')) + description = etree.SubElement(root, self.RDF('Description')) + + if self.wiki_url: + description.set(self.RDF('about'), self.wiki_url) + + for tag, (attribute, converter) in self.mapping.iteritems(): + if hasattr(self, attribute): + e = etree.Element(tag) + e.text = unicode(getattr(self, attribute)) + description.append(e) + + return unicode(etree.tostring(root, 'utf-8'), 'utf-8') + + def to_dict(self): + etree._namespace_map[str(self.RDF)] = 'rdf' + etree._namespace_map[str(self.DC)] = 'dc' + + result = {'about': self.wiki_url} + for tag, (attribute, converter) in self.mapping.iteritems(): + if hasattr(self, attribute): + result[attribute] = unicode(getattr(self, attribute)) + + return result + + +def parse(file_name): + return BookInfo.from_file(file_name) + + +if __name__ == '__main__': + import sys + + info = parse(sys.argv[1]) + for attribute, _ in BookInfo.mapping.values(): + print '%s: %r' % (attribute, getattr(info, attribute, None)) + diff --git a/librarian/dcparser.pyc b/librarian/dcparser.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0e911b8d97be8a856f2b1d6fcbfb500ab12c811a GIT binary patch literal 8696 zcmcIpTXPi074Df`t+YZ{1S@1?vKTgz9kgpms$x4hq%hdV7a5OiV`T7Tw9}GCTFore zy+}$RRS7)iAuoAwRo?UHO64&Rsmf2vA4tCM^y~#(NySxb!Swc=zMu1*)90M|+rMU~ z{}Eq*zpaYDdHntok9q1T7306B_Lb^*YSU95-+i_3t4>L6mfU+u?U&VNSsmA;J65+y^UV&Z z*xy{AIzOPEQt^UJby{g0bW!d3D*I}{fn~M1tTft|RdPnfC)LUmaBM{#T~+#&N?uUO zi(K=@{U51&zS=yiHeXWllG10DKBx2wD>(6!lG^rF?cMhteB#|n$?uZdSUm-4B>%?W zDkP%pG>V%?-N@d|^DK9?GH7vR^}4N~c3bBrO9v9=sOxyl6%-nb0{c+<6fOByqD)yS za}TUcsJJYZiV8MXCph+JC^nLGr=#m#nZACQ*xh<3GPa&Z2f7}maXqSc61->I_3b1t z8ceh8V{8czf8WJpYA7Cg>XA|*D!x=?QyX%e^;Pz?RTP{*~>n6WQ$K8c-2k{gz42DVSj~i()7Ssf|Og0>W?zuIXA-Pv722QpR5w!lrz8+XE7r4Ai(!wtU7f}Of$2bX619MQ9P=qf zG|!weq-9^jN@fa$@@BnhsEx>r@FDL-Y5t0e(-^1@4)hr3gURs494?BH01N!bCH1(h z(i{XaC^VLzN-NBy8;N_;^+Nyh;|cYsOcRQW3A5g~e@;EFC^E0Y_v{45qDw`ce9%@N zp5IQ$0$35I2HpbpR71z#dt~V&Sg;B81UoIOKak->{koW^OJcOJ5ExrUVSb20P*rbd zsg07<)DN;;*X?eU)*%>>4-TR?>y2Dy5&PZFda0F~>2?wu2#8sHPI$__EgImu*gWZq zk_H2D%55hThcegDdI%uGFE8Jh3iWx+qNm0e0CyMtPf1unp}cv2)?0+=PkYz=1z&_u zkP;Ch{7qEg20~xKKyaLbgcNJJu1Kdj=aqu842Z$B^l$HEUz?-5Y zBvKOmHSVr84%Qm6=w0LPP-eKEpbS($;g3z=IdFS8QI{{li+BkrU2z~rI4(A2%)I1{ zQ}B}B*>%1W2FfczdFH%jPfYP`x*LLuHXHfJ-AMOER5V;Nu^~@U@#&JWS=i6xQUt<& zUhLp2a17pqsbB`o2~bbJ{TZu5bu~T6H)x`ov?&Zl)N&)YkG4qML{tVCa z*BKX=EfS4Gm&lW7WkdF~F0RyX8s>4sI1f(t$HO_lmUGx>XS_Qu+?saO>4f1ISmD3! zoHrV8+<|(CV)T`Q+Z0taHp;DGXd@mfly`*VucFtO_fI5-u<`C@3Bxb(-aiB7&SwA+ znv((dYkj`cG>?LLl`bd8IfO`!YCYbai)Z*1ru~k)USEo=9 zh8Bm+6SEWin6)Mg>S54AG31!k#^FB(oGHL#D0w)(ezoNO36!sA+5W9`J8Pp+{wTQX zc+5B8lQT#f7bDC&(hDWUx&Rrm`!3uh_*r^_TIp*E=F#hF8P}+;42a@CfjBWr6ekMA z$x-4I7Xh(4N}Mi;CzPHX%sEpKXGe*11#y0KOA7^Yag=znAR-?!+~-n3JT*!@T@Y)d z#N~o`W|X*65MNNpKJ^)b9E_X@S(o8LFBL@OVTQyX2yqifpH^|5xeziOhsNvq{wp$1 zFSE|f(*>y@^K?-v^SlHa6dZB2Yi;+!WKkztY+^fFYPy@X zcMY*_dRu$CZJW68lPng`Xg^6~!C=459(Uca9i8IF5;w3ICKNG`vXG9$$chhT&tmc% zTZ;^c2#LDAtxnQLD0u)!8NCc+1G+xVZY&enJW5T6o0TLG4PTyY_3+*tHd$(=)$Lt` z>QN`ual-yw>zV$~b&!0KY{2a5Ts&x#_42lM`6IcJ<=q*2*8~oWN9KAqKMOt-yH7G1 zm&D+CvYj9q1d?zSxgl%BAI8BWn`CKoH!^VHtcU`~*QwDmTGBYCS&%HRprRhe+O*;J zbGQi8brqM67iAfbT(}odfrdk`_c0i;BeI}y5a?d_ojU+?Lma8~ktE?1#%+ML>CUZ> z-3@jF6k*YJo*jhSNk_XB8kcK#j2!#upcAm4m^&%PVe?$-j;q2NV@KG3kw^0vh7lx@ zB1irwD(Lb=4vTDY5h;k7LqK>ZJwFH~P9u~+7|9S1ksKI-@J_-A26!-l!X-p{U>_Qn z@i2~*^fZ}>2IO4Dvxc_}hK5sYja{&g+#TkP2lK*>KDsEQs|+YH?#BHkMxhv(Oc#Nu z_uwz;4ha2Vh1XrHB^Rn`@`9lC0#FqQuV??cRoVi$sGP@5t|twgK~(a?2bh`$WvZ|+ zH7+E5r#}eT5Yz~Uws21&E4rPzD2rR+J?^r!quAyO{rOOJ$QlQzf}5D_cX$k=9_3g4 zlDFh7c#Hm7?;OI^1%Cn8>!J%%%jm)TR`7KXmE-G#7@R@6gq~WtClM&n<?P_KX>T@E`Sm~eB4QwfWnFdw38Chy6+RiIEyD21|J}61Jnof9))1EjjYWb>-3Sc z8xRnQLm_>jk6nVFn->>4xXT%;DAm>xOZAzM_ zqeT^tE`S1EbBgLX2*yD{u0rvip*rV-B+1C2m(xJ>MOKV-sFQ?1Y?9t!ICDs7np9a` z@M6z`JP!ARGJejd|mNBcs+$0fKb{Nl37FD?)*%Q6iBw2>0U>_|Gj4lQ@vksULs zG^3ylgC1OPAhIxw;we2kyq7K`^$7#fS7-ClcrmEl+LAqq)Dx)d|x*AN&B*4W6Q zGR=**1#35*+aOooMsKTl4L)JF5(;23VAe)}lmJ6`4vY-NttN<+Men0eaE}GQ-G~*j znKS3kPKfJ4sODK_1BPM(_yD1-3Vy}n4vO(2RBldq9WYIQ#^i-1sChO2jQ65H?bTre zPQoV4duP1UF2T=j@)6t;+}{PaySSKv3t-m=Y_9^bN4EuR^0iaIc1>XB7mx$qz$!vX z3`h17z*c3TIUo^T_>b8htOHT&xIB2BlacE|gfiG-K^%zf7wHFW(gyMlV!l$OBFX^U zQ~_+83I?K+Z+I>ar`7_Cy^eiOKx3AT*uN}{leYZ^OBNYK3I`$)v7Gl;CH$hJ-iZ^kbpA^N}mEtlpe=1)~8t4nR#PCnFh!62_)S`aZ~K*MN8BH$8oE{W$wN9nkU z;t$U@mScT{rT&hm2I)nSLE}65mP5_ zP#B20tMQv!b*_4%Hc?xueO@ik L0\n', line) + doc_file.write(line.encode('utf-8')) + f.close() + + doc_file.seek(0); + + parser = etree.XMLParser(remove_blank_text=True) + doc = etree.parse(doc_file, parser) + + result = doc.xslt(style) + if result.find('//p') is not None: + add_anchors(result.getroot()) + add_table_of_contents(result.getroot()) + result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8') + return True + else: + return False + + +class Fragment(object): + def __init__(self, id, themes): + super(Fragment, self).__init__() + self.id = id + self.themes = themes + self.events = [] + + def append(self, event, element): + self.events.append((event, element)) + + def closed_events(self): + stack = [] + for event, element in self.events: + if event == 'start': + stack.append(('end', element)) + elif event == 'end': + try: + stack.pop() + except IndexError: + print 'CLOSED NON-OPEN TAG:', element + + stack.reverse() + return self.events + stack + + def to_string(self): + result = [] + for event, element in self.closed_events(): + if event == 'start': + result.append(u'<%s %s>' % (element.tag, ' '.join('%s="%s"' % (k, v) for k, v in element.attrib.items()))) + if element.text: + result.append(element.text) + elif event == 'end': + result.append(u'' % element.tag) + if element.tail: + result.append(element.tail) + else: + result.append(element) + + return ''.join(result) + + def __unicode__(self): + return self.to_string() + + +def extract_fragments(input_filename): + """Extracts theme fragments from input_filename.""" + open_fragments = {} + closed_fragments = {} + + for event, element in etree.iterparse(input_filename, events=('start', 'end')): + # Process begin and end elements + if element.get('class', '') in ('theme-begin', 'theme-end'): + if not event == 'end': continue # Process elements only once, on end event + + # Open new fragment + if element.get('class', '') == 'theme-begin': + fragment = Fragment(id=element.get('fid'), themes=element.text) + + # Append parents + if element.getparent().get('id', None) != 'book-text': + parents = [element.getparent()] + while parents[-1].getparent().get('id', None) != 'book-text': + parents.append(parents[-1].getparent()) + + parents.reverse() + for parent in parents: + fragment.append('start', parent) + + open_fragments[fragment.id] = fragment + + # Close existing fragment + else: + try: + fragment = open_fragments[element.get('fid')] + except KeyError: + print '%s:closed not open fragment #%s' % (input_filename, element.get('fid')) + else: + closed_fragments[fragment.id] = fragment + del open_fragments[fragment.id] + + # Append element tail to lost_text (we don't want to lose any text) + if element.tail: + for fragment_id in open_fragments: + open_fragments[fragment_id].append('text', element.tail) + + + # Process all elements except begin and end + else: + # Omit annotation tags + if len(element.get('name', '')) or element.get('class', '') == 'annotation': + if event == 'end' and element.tail: + for fragment_id in open_fragments: + open_fragments[fragment_id].append('text', element.tail) + else: + for fragment_id in open_fragments: + open_fragments[fragment_id].append(event, copy.copy(element)) + + return closed_fragments, open_fragments + + +def add_anchor(element, prefix, with_link=True, with_target=True, link_text=None): + if with_link: + if link_text is None: + link_text = prefix + anchor = etree.Element('a', href='#%s' % prefix) + anchor.set('class', 'anchor') + anchor.text = unicode(link_text) + if element.text: + anchor.tail = element.text + element.text = u'' + element.insert(0, anchor) + + if with_target: + anchor_target = etree.Element('a', name='%s' % prefix) + anchor_target.set('class', 'target') + anchor_target.text = u' ' + if element.text: + anchor_target.tail = element.text + element.text = u'' + element.insert(0, anchor_target) + + +def any_ancestor(element, test): + for ancestor in element.iterancestors(): + if test(ancestor): + return True + return False + + +def add_anchors(root): + counter = 1 + for element in root.iterdescendants(): + if any_ancestor(element, lambda e: e.get('class') in ('note', 'motto', 'motto_podpis', 'dedication') + or e.tag == 'blockquote'): + continue + + if element.tag == 'p' and 'verse' in element.get('class', ''): + if counter == 1 or counter % 5 == 0: + add_anchor(element, "f%d" % counter, link_text=counter) + counter += 1 + elif 'paragraph' in element.get('class', ''): + add_anchor(element, "f%d" % counter, link_text=counter) + counter += 1 + + +def add_table_of_contents(root): + sections = [] + counter = 1 + for element in root.iterdescendants(): + if element.tag in ('h2', 'h3'): + if any_ancestor(element, lambda e: e.get('id') in ('footnotes',) or e.get('class') in ('person-list',)): + continue + + if element.tag == 'h3' and len(sections) and sections[-1][1] == 'h2': + sections[-1][3].append((counter, element.tag, ''.join(element.xpath('text()')), [])) + else: + sections.append((counter, element.tag, ''.join(element.xpath('text()')), [])) + add_anchor(element, "s%d" % counter, with_link=False) + counter += 1 + + toc = etree.Element('div') + toc.set('id', 'toc') + toc_header = etree.SubElement(toc, 'h2') + toc_header.text = u'Spis treści' + toc_list = etree.SubElement(toc, 'ol') + + for n, section, text, subsections in sections: + section_element = etree.SubElement(toc_list, 'li') + add_anchor(section_element, "s%d" % n, with_target=False, link_text=text) + + if len(subsections): + subsection_list = etree.SubElement(section_element, 'ol') + for n, subsection, text, _ in subsections: + subsection_element = etree.SubElement(subsection_list, 'li') + add_anchor(subsection_element, "s%d" % n, with_target=False, link_text=text) + + root.insert(0, toc) + diff --git a/librarian/html.pyc b/librarian/html.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dfb837db2bd1077d904719c9d591cb02dd42d5bc GIT binary patch literal 8882 zcmb_iU2GiJb-pu8F1ag`qDY!DEj!vsp}n$2GF4IAP-(@sC`&>lN}ZKTnH95H?hMHx zmov+`vl5v~K@hqr`c@zT`cNcq$#c=X1n5%>B#$jx6etP=C{UmeeJarRqW!+_?k*|C zE)bZa=JNjEbMHClJLlZXpa1u0`O&|ipLwxqvNJ9Mg%#Jau{k&&ZyT$}1XG}C=Hn8{vYYvV1bam8(lWHC_(P{n%wc(Wc48)u< z;VWvIFyXW^lO{Z?%oMxTs&8O}@&DrAa@9jNA4C%6Srl~&$j!~o>G!{X??)Y9eMI{NVP3PwTaZAJ>q1_~7=v79hYgy1)I5Un?DtTH-JNa+4 z+*Un>B%|@Jr$}Fczlv9ryVOR&!Y^9kMMVr9lG|eFct}Sn@ zx1xHwQ5TXVxz*C2G+H*Qa=cLFML7^FUXlO@Hu(YO>WPacvaHg1a`7PZo}^zu!M=^e6u#=6_D8(e{nJ3LqIcSx@Lu!E z{-{6UO`vYt8^_OiZ`jv%KFJ!JyI^ROL-~)=O&zkoha`gah58v8Mi_HX8CY^(8JO{c zGK18~&(OJOf|?9OlVZ{Pu5-RCNPv4dE?^Z<0w(Q{(4ul3_6wHFnEHU(g!w}Ta~dc$ z=~=GwP#q&W?I;W0XOT)$ug6K8*XvS`IMhk!ThVT0ofJO7)@+q@h(R`LZ3MrMQm}~R z1epWbgqJ?V-_z!iCyHaE+Dn zM7S|20!$qFcanRdr*I3OB_YOZk_7|!$ON(SFOUJ3;9~3_bhop#mK<|MBr4nRUe1ca z*efIkeo1$)d*i4^AK$Rg?-=i3Ozw-5h?TwXngQs}cG;xph<}i>n;3ob7FxVV9F32z z4ICcHkWLikvo*OC%UzvqWVv(@=O=@H{m#AGjit)+z2&)k_irp$9(;83s-#t?9dO_3 z-%i_daBe4Iw0|SZ(o9F4Mevb;Cp?QIR=@*l?gSsQ?js~8$OxEqfHKiDtYc|N@;6ao zUqxcPqF07bGU}bhSMbL0Gwqj;ctt!Hul@s>F0cNf%PSn&(%RpC9=jgB$S$ZTjy{II zOS{1W9C$p)Ghc$i#(yw$nx>mgMUzhT#=A0G_Wl_koLKUEQ2c(k*9rav`yHEN=%p#( zqa2?C42R0V&0*!FS%D7%;=p%Q)T-3H<$y}q;qDh_ZDrP86FgU%l;HwC^3L+Rgk=8$ zPh{3h(eb=H1xVb)$)c5LR1f}Z)WYn>gXPfkzP>JRNry;kONY$caoS64@?f}aM)O-Omr;~y)alJ5?HVAreOz8L|ukK z{BWEhkpbPaBl?v4(t(U@BH5)ADte`2sZbg#jq3z?>}0l1@XtZ|me8B2Z}y-1E*`WA9~)>+PD43aODe(J?Q>JH(>6e`^Zd~Z7BPXJgPka0wCTVf<|6YX!>&JqLlt0 z;iseXaBr>>brv2ydlSTcgfYwuT)5aUgLf`=%4`=*`aOU~uXz)yiYVtBjB?=k&wSya za~Ome1~Ja@j#${+`<3w&-zdED2n39nrWFt&zQNrC437jN#23^D$9JGcjimsS@&GjkFa_3Rx$%?##ynHo3%gx2kd>yq?Zxu zU*alDYi}4tTI?{49fLlYO!dJTd3%5Dt?Yf@Cks486Wyg0=b;%JZeyu#ZJ&$dVhrN2 zJ{MC$^KZBCJAQxR{dXLp7hfb4v%`?ZI0F}4lH5PSs0*;**tl#+tvW@?okMbCA3kj} zw-vc@m5r`{ibyU?cVCL+=AGRpi5Z3wHZ6(CwU{`3at)}u|_RS11lhDRVjQ09Ik z4&@K3ok8CmBQ{LNK34#>`2G*>_g@sBj#VL01SgJyUM!CM?h6A*mDg#NA|>YLGl zoS8tEOq>M)vGBi{whyYDB)H7ZT$tOe_fJ0`M^)Dq=~o_^2uTxL8Q}U&;Pa!v0M!`9vc^4$xTS$yw^ostp8S!Vlsltr+)xvp%MQHNN?agY!92Xi~@eqwJj#2d9L6PAPL~R9$%;>BW6X*wISQn%f=gUZt*;gWU!$mq$SzIWnu4|K} z1x~E#=s?wRwIJIp4@T?bf@LG#*Y(5GINz!xSl^LMs^k_|D;soY6Tm(ayQt5+d8oTi zCn>lBi~iNdPqCE!8WQ6T!3ipRGYHjB<2N1sDSt@iQn%bBgiwk3NgBV746TI@_>fNi zIofw{A2JFfkPb^SYI~!A;8C*z&$Sm{a}Mp4D8n0Fo8zV>AT7Z=NL=_y_rm#$XqV3S zx}M~jP@Edc0k3;R#^0bUr^diZgrkVlXwu;$&Ha${KT&{|F(m!<@e}xaU90K@+@%F1 zH>I;O>6{haLpUEM{WkPLH|SImTt7(6>FLl$LW4bdI%iyA$V|gWVf+d&gUSmncbZ18 zh8Mb?(?|_`kJs_M%7?yT$(QDz{lXdCvc1uYdHNcMSO}QRgdZ`fhRvnOemg+`Qxq3Sd+ql@LpoUQxH{FdTi|ehlxwF^7 zKo>gfqb@jL@*O6Bj6`NGSY`1pizHM)5uK#K3}e>CR%3TPY+U;#@&c7z_TzTOfb8gs zNw<h&87=~kRkI}Oig{SwNb_|*s zEHl%+;kkNLS%mW{Wx>o`3=uqx9r7?XdLDqpg3Jy1ie9pyk-xTJgGt4=0%r3)XhXsD5tYVKaesUC9KI?J@BU#&Q3qgm)9r^o^3 zpRn!+NUDUN;QLI7pALC=3FO>+0=MvOUr$1$Rn}TTL$J-_4!aPoYn}D31Ld40ddF2F zL~zJ1#84aQG1vyscX#y@{2ALg96PCRMU4=C6}z&i_g zzU^e^?d;o-?E-M)_t`Zt1Tdfr_Dx`a-FgAMXY5`Ch2d;v}A=-vSuB5 zMx3?ABfa2Sqy|}8MTORQVzlbaw2$wG)np2o0*9*r&up>5X+UdD0aL&f*eW332TvDF zA~pf_)4@is0K`7SYV6BhLUMe>Bw`bgPiQWr5<{wsR}2@@8TWBq60r#w(&2Pw^2C{4 zyrDRqopGtDuXqqz`rW+4Xmw^kpKVy literal 0 HcmV?d00001 diff --git a/librarian/tests/files/dcparser/andersen_brzydkie_kaczatko.xml b/librarian/tests/files/dcparser/andersen_brzydkie_kaczatko.xml new file mode 100644 index 0000000..d653a9b --- /dev/null +++ b/librarian/tests/files/dcparser/andersen_brzydkie_kaczatko.xml @@ -0,0 +1,24 @@ + + + Andersen, Hans Christian + Brzydkie kaczątko + Niewiadomska, Cecylia + Gałecki, Dariusz + Fundacja Nowoczesna Polska + Romantyzm + Epika + Baśń + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + http://wolnelektury.pl/katalog/lektura/brzydkie-kaczatko + http://www.polona.pl/dlibra/doccontent2?id=3563&dirids=4 + Andersen, Hans Christian (1805-1875), Baśnie, Gebethner i Wolff, wyd. 7, Kraków, 1925 + Domena publiczna - tłumacz Cecylia Niewiadomska zm. 1925 + 1925 + xml + text + text + 2007-08-14 + SP1 + pol + + \ No newline at end of file diff --git a/librarian/tests/files/dcparser/biedrzycki_akslop.xml b/librarian/tests/files/dcparser/biedrzycki_akslop.xml new file mode 100644 index 0000000..da0cd9f --- /dev/null +++ b/librarian/tests/files/dcparser/biedrzycki_akslop.xml @@ -0,0 +1,25 @@ + + + Biedrzycki, Miłosz + Akslop + Sekuła, Aleksandra + Sutkowska, Olga + Fundacja Nowoczesna Polska + Współczesność + Liryka + Wiersz + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). + http://wolnelektury.pl/katalog/lektura/akslop + http://free.art.pl/mlb/gwiazdka.html#t1 + Miłosz Biedrzycki, * ("Gwiazdka"), Fundacja "brulion", Kraków-Warszawa, 1993 + Creative Commons Uznanie Autorstwa - Na Tych Samych Warunkach 3.0.PL + http://creativecommons.org/licenses/by-sa/3.0/ + xml + text + text + 2009-06-04 + L + pol + + \ No newline at end of file diff --git a/librarian/tests/files/dcparser/kochanowski_piesn7.xml b/librarian/tests/files/dcparser/kochanowski_piesn7.xml new file mode 100644 index 0000000..96be1ae --- /dev/null +++ b/librarian/tests/files/dcparser/kochanowski_piesn7.xml @@ -0,0 +1,27 @@ + + + Kochanowski, Jan + Pieśń VII (Trudna rada w tej mierze: przyjdzie się rozjechać...) + http://www.wolnelektury.pl/lektura/piesni-ksiegi-pierwsze + Sekuła, Aleksandra + Krzyżanowski, Julian + Otwinowska, Barbara + Gałecki, Dariusz + Fundacja Nowoczesna Polska + Renesans + Liryka + Pieśń + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + http://wolnelektury.pl/katalog/lektura/piesni-ksiegi-pierwsze-piesn-vii-trudna-rada-w-tej-mierze-pr + http://www.polona.pl/Content/1499 + Kochanowski, Jan (1530-1584), Dzieła polskie, tom 1, oprac. Julian Krzyżanowski, wyd. 8, Państwowy Instytut Wydawniczy, Warszawa, 1976 + Domena publiczna - Jan Kochanowski zm. 1584 + 1584 + xml + text + text + 2007-08-31 + L + pol + + \ No newline at end of file diff --git a/librarian/tests/files/dcparser/mickiewicz_rybka.xml b/librarian/tests/files/dcparser/mickiewicz_rybka.xml new file mode 100644 index 0000000..0796a5b --- /dev/null +++ b/librarian/tests/files/dcparser/mickiewicz_rybka.xml @@ -0,0 +1,28 @@ + + + Mickiewicz, Adam + Rybka + http://www.wolnelektury.pl/lektura/ballady-i-romanse + Sekuła, Aleksandra + Kallenbach, Józef + Sutkowska, Olga + Fundacja Nowoczesna Polska + Romantyzm + Liryka + Ballada + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + http://wolnelektury.pl/katalog/lektura/ballady-i-romanse-rybka + http://www.polona.pl/Content/2222 + Mickiewicz, Adam (1798-1855), Poezje, tom 1 (Wiersze młodzieńcze - Ballady i romanse - Wiersze do r. 1824), Krakowska Spółdzielnia Wydawnicza, wyd. 2 zwiększone, Kraków, 1922 + Domena publiczna - Adam Mickiewicz zm. 1855 + 1855 + xml + text + text + 2007-09-06 + SP2 + G + L + pol + + \ No newline at end of file diff --git a/librarian/tests/files/dcparser/sofokles_antygona.xml b/librarian/tests/files/dcparser/sofokles_antygona.xml new file mode 100644 index 0000000..4acb2d4 --- /dev/null +++ b/librarian/tests/files/dcparser/sofokles_antygona.xml @@ -0,0 +1,25 @@ + + + Sofokles + Antygona + Sekuła, Aleksandra + Morawski, Kazimierz + Gałecki, Dariusz + Fundacja Nowoczesna Polska + Starożytność + Dramat + Tragedia + Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez Bibliotekę Narodową z egzemplarza pochodzącego ze zbiorów BN. + http://wolnelektury.pl/katalog/lektura/antygona + http://www.polona.pl/Content/3768 + Sofokles (496-406 a.C.), Antygona, Zakład Narodowy im. Ossolińskich, wyd. 7, Lwów, 1939 + Domena publiczna - tłumacz Kazimierz Morawski zm. 1925 + 1925 + xml + text + text + 2007-08-30 + G + pol + + \ No newline at end of file diff --git a/librarian/tests/files/erroneous/asnyk_miedzy_nami.html b/librarian/tests/files/erroneous/asnyk_miedzy_nami.html new file mode 100644 index 0000000..1d7e17f --- /dev/null +++ b/librarian/tests/files/erroneous/asnyk_miedzy_nami.html @@ -0,0 +1,46 @@ +
    +
    +

    Spis treści

    +
      +
    +

    + Adam Asnyk + Między nami nic nie było +

    +
    +

    1Między nami nic nie było!

    +

    + Żadnych zwierzeń, wyznań żadnych!

    +

    + Nic nas z sobą nie łączyło —

    +

    + Prócz wiosennych marzeń zdradnych;

    +
    +
    +

    5Prócz tych woni, barw i blasków,

    +

    + Unoszących się w przestrzeni;

    +

    + Prócz szumiących śpiewem lasków

    +

    + I tej świeżej łąk zieleni;

    +
    +
    +

    Prócz tych kaskad i potoków,

    +

    10 + Zraszających każdy parów,

    +

    + Prócz girlandy tęcz, obłoków,

    +

    + Prócz natury słodkich czarów;

    +
    +
    +

    Prócz tych wspólnych, jasnych zdrojów,

    +

    + Z których serce zachwyt piło;

    +

    15 + Prócz pierwiosnków i powojów,—

    +

    + Między nami nic nie było!

    +
    +
    diff --git a/librarian/tests/files/erroneous/asnyk_miedzy_nami.xml b/librarian/tests/files/erroneous/asnyk_miedzy_nami.xml new file mode 100644 index 0000000..aa5ef17 --- /dev/null +++ b/librarian/tests/files/erroneous/asnyk_miedzy_nami.xml @@ -0,0 +1,25 @@ + + + Adam Asnyk + Między nami nic nie było + + Między nami nic nie było!/ + Żadnych zwierzeń, wyznań żadnych!/ + Nic nas z sobą nie łączyło ---/ + Prócz wiosennych marzeń zdradnych; + + Prócz tych woni, barw i blasków,/ + Unoszących się w przestrzeni;/ + Prócz szumiących śpiewem lasków/ + I tej świeżej łąk zieleni; + + Prócz tych kaskad i potoków,/ + Zraszających każdy parów,/ + Prócz girlandy tęcz, obłoków,/ + Prócz natury słodkich czarów; + + Prócz tych wspólnych, jasnych zdrojów,/ + Z których serce zachwyt piło;/ + Prócz pierwiosnków i powojów,---/ + Między nami nic nie było! + diff --git a/librarian/text.py b/librarian/text.py new file mode 100644 index 0000000..db0d2b2 --- /dev/null +++ b/librarian/text.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +import os +import cStringIO +import re +import codecs + +from lxml import etree + +from librarian import dcparser + + +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] + + +MAX_LINE_LENGTH = 80 + + +def strip(context, text): + """Remove unneeded whitespace from beginning and end""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'\s+', ' ', text).strip() + + +def substitute_entities(context, text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + + +def wrap_words(context, text): + """XPath extension function automatically wrapping words in passed text""" + if isinstance(text, list): + text = ''.join(text) + words = re.split(r'\s', text) + + line_length = 0 + lines = [[]] + for word in words: + line_length += len(word) + 1 + if line_length > MAX_LINE_LENGTH: + # Max line length was exceeded. We create new line + lines.append([]) + line_length = len(word) + lines[-1].append(word) + return '\n'.join(' '.join(line) for line in lines) + + +# Register substitute_entities function with lxml +ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') +ns['strip'] = strip +ns['substitute_entities'] = substitute_entities +ns['wrap_words'] = wrap_words + + +def transform(input_filename, output_filename): + """Transforms file input_filename in XML to output_filename in TXT.""" + # Parse XSLT + style_filename = os.path.join(os.path.dirname(__file__), 'book2txt.xslt') + style = etree.parse(style_filename) + + doc_file = cStringIO.StringIO() + expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE); + + f = open(input_filename, 'r') + for line in f: + line = line.decode('utf-8') + line = expr.sub(u'
    \n', line) + doc_file.write(line.encode('utf-8')) + f.close() + + doc_file.seek(0) + + parser = etree.XMLParser(remove_blank_text=True) + doc = etree.parse(doc_file, parser) + + result = doc.xslt(style) + output_file = codecs.open(output_filename, 'wb', encoding='utf-8') + output_file.write(unicode(result) % dcparser.parse(input_filename).url) + diff --git a/librarian/text.pyc b/librarian/text.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c6eb91802d9dea061615b0f83adc4b5dc39c1c6 GIT binary patch literal 2993 zcmb_eUvC>l5T8B&N$kW;nxtvcLgj*lEY#RlUnmetX%irjy3xgH#<<7a? zYaHZAmC83LukgqlPdxH%_y~Lf_|2S?lp-V~>OXrkyK_6UJHOxT{`yCyZ0paz>eBEt zj^|zc?JsBo{1ojIZJo)Aw$y3AM_b;hoTIIr$R6$IDJ;-dffT==qx~XGY1o`3rKAx6)9nnCMQ}n{4cpB?hWIXP|l1>ywid6qS zayeSszDAEdI>9f;<0el780Ppz7#_Y)kI=hbV=wkoG@EN3_oS-(PDj?nQFS+sx{ifz z96i&CGtpkP6NXhCIpeggnyA|ASgV7o!%KApMpWYsa%D8en#j6N)YXpH5E|=5cp96? z&7fzw*;;9R+g{(ew+>;WweqmJ?nW?>=w8_A>KeZ$9q`W{7?CKB-35p?-LVKM z#TVh$&_mMLk0CWSkQ>|hF5S^dV=alF>V0?IIEuqahY-``xZVpJq1oYo(n(AwYRGze z$8P%7$#q*?8g1ven?X(9q?%SEG_GcqWQzmi9guAO2H`(s3#meCf-HxUFW?&~+auaT zO7qm;=EN;+Z_r7B^9K+P3Y_jBPnSR7>&BplCem$8vge-22`>62`mYr4<>oa@& zc{D;B)|mN1o7>1GXajrzl|uml&kN$E?4hJ#(Wc0>02Y94lnFCRFNo>lN@P&-VFO`I z+HtzaMXx}v!f*n=SS?b2nFd8L>k~QwBQOW5zW8Ak9wwMA=x1y6fLW7bc5!G1=jt#* z9R-7?*uAiCn!qR?;vi?3Snd)A)ye2-*r>#a7L%4}fFNh3Dq|sKa+k>;=Zk!z0>aGbjighn_Z5E@> z0jt%`2dh;VSL4*ZKGNE3)olfn?8Nc@Er;6Px1qC)+s0!nTuUsoEOomJw^Ih{;!e`I zQx;;oi_jB&5I@uHov;(_x20w?7ms!v16oJjI6$Fj)~-Smg2WkkdpLnI)-W6blW?%| zPFrGcw|(?yWRO!AIR=K-U4rRW9*X6u;!m + + bookfragments output + + + + """) + for fragment in closed_fragments.values(): + fragment_html = u'

    [#%s] %s

    %s
    ' % (fragment.id, fragment.themes, fragment) + output_file.write(fragment_html.encode('utf-8')) + output_file.write('') + output_file.close() + diff --git a/scripts/genslugs b/scripts/genslugs new file mode 100755 index 0000000..3391d8e --- /dev/null +++ b/scripts/genslugs @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import optparse + +from lxml import etree +from librarian import html +from slughifi import slughifi + + +BOOK_URL = 'http://wolnelektury.pl/katalog/lektura/' + + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Generate slugs for SOURCE.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + parser.add_option('-f', '--force', action='store_true', dest='force', default=False, + help='overwrite current identifiers') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + # Do some real work + for input_filename in input_filenames: + if options.verbose: + print input_filename + + doc = etree.parse(input_filename) + try: + title = doc.find('//{http://purl.org/dc/elements/1.1/}title').text + except AttributeError: + print '%s:error:Book title not found. Skipping.' % input_filename + continue + + parent = '' + try: + parent_url = doc.find('//{http://purl.org/dc/elements/1.1/}relation.isPartOf').text + parent = parent_url.rsplit('/', 1)[1] + ' ' + except AttributeError: + pass + except IndexError: + print '%s:error:Invalid parent URL "%s". Skipping.' % (input_filename, parent_url) + + book_url = doc.find('//{http://purl.org/dc/elements/1.1/}identifier.url') + if book_url is None: + book_description = doc.find('//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description') + book_url = etree.SubElement(book_description, '{http://purl.org/dc/elements/1.1/}identifier.url') + if not options.force and book_url.text.startswith('http://'): + print '%s:Notice:Book already has identifier URL "%s". Skipping.' % (input_filename, book_url.text) + continue + + book_url.text = BOOK_URL + slughifi(parent + title)[:60] + + doc.write(input_filename, xml_declaration=True, pretty_print=True, encoding='utf-8') + diff --git a/scripts/master.css b/scripts/master.css new file mode 100644 index 0000000..98e142b --- /dev/null +++ b/scripts/master.css @@ -0,0 +1,207 @@ +body { + font-size: 16px; + font: Georgia, "Times New Roman", serif; + line-height: 1.5em; + margin: 3em; + max-width: 36em; +} + +a { + color: blue; + text-decoration: none; +} + +/* =================================================== */ +/* = Common elements: headings, paragraphs and lines = */ +/* =================================================== */ +h1 { + font-size: 3em; + margin: 1.5em 0; + text-align: center; + line-height: 1.5em; + font-weight: bold; +} + +h2 { + font-size: 2em; + margin: 1.5em 0 0; + font-weight: bold; + line-height: 1.5em; +} + +h3 { + font-size: 1.5em; + margin: 1.5em 0 0; + font-weight: normal; + line-height: 1.5em; +} + +h4 { + font-size: 1em; + margin: 1.5em 0 0; + line-height: 1.5em; +} + +p { + margin: 0; +} + +/* ======================== */ +/* = Footnotes and themes = */ +/* ======================== */ +.theme-begin { + border-left: 0.1em solid #DDDDDD; + color: #777; + padding: 0 0.5em; + width: 7.5em; + font-style: normal; + font-weight: normal; + font-size: 16px; + position: absolute; + left: 40em; + line-height: 1.5em; + text-align: left; +} + +.annotation { + font-style: normal; + font-weight: normal; + font-size: 12px; +} + +#footnotes .annotation { + display: block; + float: left; + width: 2.5em; + clear: both; +} + +#footnotes div { + margin: 1.5em 0 0 0; +} + +#footnotes p { + margin-left: 2.5em; +} + + +/* ============= */ +/* = Numbering = */ +/* ============= */ +.anchor { + float: left; + margin: -0.2em -0.5em -0.2em -3.5em; + color: #777; + font-size: 12px; + width: 2em; + text-align: center; + padding: 0.2em 0.5em; +} + +.anchor:hover, .anchor:active { + color: #FFF; + background-color: #CCC; +} + + +/* =================== */ +/* = Custom elements = */ +/* =================== */ +span.author { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-bottom: 0.25em; +} + +span.collection { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-bottom: -0.25em; +} + +span.subtitle { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-top: -0.25em; +} + +div.didaskalia { + font-style: italic; + margin: 0.5em 0 0; +} + +div.kwestia { + margin: 0.5em 0 0; +} + +div.stanza { + margin: 1.5em 0 0; +} + +div.kwestia div.stanza { + margin: 0; +} + +p.paragraph { + text-align: justify; + margin: 1.5em 0 0; +} + +p.motto { + text-align: justify; + font-style: italic; + margin: 1.5em 0 0; +} + +p.motto_podpis { + font-size: 0.875em; +} + +div.fragment { + border-bottom: 0.1em solid #999; + padding-bottom: 1.5em; +} + +div.note p, div.dedication p, div.note p.paragraph, div.dedication p.paragraph { + text-align: right; + font-style: italic; +} + +hr.spacer { + height: 3em; + visibility: hidden; +} + +hr.spacer-line { + margin: 1.5em 0; + border: none; + border-bottom: 0.1em solid #000; +} + +p.spacer-asterisk { + padding: 0; + margin: 1.5em 0; + text-align: center; +} + +div.person-list ol { + list-style: none; + padding: 0 0 0 1.5em; +} + +p.place-and-time { + font-style: italic; +} + +em.math, em.foreign-word, em.book-title, em.didaskalia, em.author-emphasis { + font-style: italic; +} + +em.person { + font-style: normal; + font-variant: small-caps; +} + diff --git a/scripts/master.plain.css b/scripts/master.plain.css new file mode 100644 index 0000000..3210e88 --- /dev/null +++ b/scripts/master.plain.css @@ -0,0 +1,160 @@ +body { + font-size: 16px; + font: Georgia, "Times New Roman", serif; + line-height: 1.5em; + margin: 3em; + max-width: 36em; +} + +a { + color: blue; + text-decoration: none; +} + +/* =================================================== */ +/* = Common elements: headings, paragraphs and lines = */ +/* =================================================== */ +h1 { + font-size: 3em; + margin: 1.5em 0; + text-align: center; + line-height: 1.5em; + font-weight: bold; +} + +h2 { + font-size: 2em; + margin: 1.5em 0 0; + font-weight: bold; + line-height: 1.5em; +} + +h3 { + font-size: 1.5em; + margin: 1.5em 0 0; + font-weight: normal; + line-height: 1.5em; +} + +h4 { + font-size: 1em; + margin: 1.5em 0 0; + line-height: 1.5em; +} + +p { + margin: 0; +} + +/* ======================== */ +/* = Footnotes and themes = */ +/* ======================== */ +.theme-begin { + border-left: 0.1em solid #DDDDDD; + color: #666; + float: right; + margin: 0 -9.5em 0 0; + padding: 0 0.5em; + width: 7.5em; + font-style: normal; + font-weight: normal; + font-size: 16px; + display: none; +} + +.annotation { + font-style: normal; + font-weight: normal; + font-size: 16px; + display: none; +} + +#footnotes { + display: none; +} + +#footnotes .annotation { + display: block; + float: left; + width: 2.5em; + clear: both; +} + +#footnotes div { + margin: 1.5em 0 0 0; +} + +#footnotes p { + margin-left: 2.5em; +} + +/* =================== */ +/* = Custom elements = */ +/* =================== */ +span.author { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-bottom: 0.25em; +} + +span.collection { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-bottom: -0.25em; +} + +span.subtitle { + font-size: 0.75em; + display: block; + line-height: 1.5em; + margin-top: -0.25em; +} + +div.didaskalia { + font-style: italic; + margin: 0.5em 0 0; +} + +div.kwestia { + margin: 0.5em 0 0; +} + +div.stanza { + margin: 1.5em 0 0; +} + +div.kwestia div.stanza { + margin: 0; +} + +p.paragraph { + text-align: justify; + margin: 1.5em 0 0; +} + +p.motto { + text-align: justify; + font-style: italic; + margin: 1.5em 0 0; +} + +p.motto_podpis { + font-size: 0.875em; +} + +div.fragment { + border-bottom: 0.1em solid #999; + padding-bottom: 1.5em; +} + +div.note p, div.note p.paragraph { + text-align: right; + font-style: italic; +} + +hr.spacer { + height: 3em; + visibility: hidden; +} diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0988321 --- /dev/null +++ b/setup.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from distutils.core import setup + + +setup( + name='librarian', + version='1.1', + description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats', + author='Marek Stępniowski', + author_email='marek@stepniowski.com', + url='http://redmine.nowoczesnapolska.org.pl/', + packages=['librarian', 'librarian.tests'], + package_dir={'librarian': 'librarian'}, + package_data={ + 'librarian': ['*.xslt'], + 'librarian.tests': ['files/dcparser/*.xml', 'files/erroneous/*.xml'], + }, + scripts=['scripts/book2html', 'scripts/book2txt', 'scripts/bookfragments', 'scripts/genslugs'], +) -- 2.20.1