src/librarian/text.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import unicode_literals
   7
   8 import copy
   9 from librarian import functions, OutputFile, get_resource
  10 from lxml import etree
  11 import io
  12 import os
  13 import six
  14
  15
  16 functions.reg_substitute_entities()
  17 functions.reg_wrap_words()
  18 functions.reg_strip()
  19 functions.reg_person_name()
  20
  21
  22 with io.open(get_resource("res/text/template.txt")) as f:
  23     TEMPLATE = f.read()
  24
  25
  26 def transform(wldoc, flags=None, **options):
  27     """
  28     Transforms input_file in XML to output_file in TXT.
  29     possible flags: raw-text,
  30     """
  31     # Parse XSLT
  32     style_filename = os.path.join(os.path.dirname(__file__),
  33                                   'xslt/book2txt.xslt')
  34     style = etree.parse(style_filename)
  35
  36     document = copy.deepcopy(wldoc)
  37     del wldoc
  38     document.swap_endlines()
  39
  40     if flags:
  41         for flag in flags:
  42             document.edoc.getroot().set(flag, 'yes')
  43     if 'wrapping' in options:
  44         options['wrapping'] = str(options['wrapping'])
  45
  46     result = document.transform(style, **options)
  47
  48     if not flags or 'raw-text' not in flags:
  49         if document.book_info:
  50             parsed_dc = document.book_info
  51             description = parsed_dc.description
  52             url = document.book_info.url
  53
  54             license_description = parsed_dc.license_description
  55             license = parsed_dc.license
  56             if license:
  57                 license_description = (
  58                     u"Ten utwór jest udostępniony na licencji %s: \n%s" % (
  59                         license_description, license
  60                     )
  61                 )
  62             else:
  63                 license_description = (
  64                     "Ten utwór nie jest objęty majątkowym prawem autorskim "
  65                     "i znajduje się w domenie publicznej, co oznacza że "
  66                     "możesz go swobodnie wykorzystywać, publikować "
  67                     "i rozpowszechniać. Jeśli utwór opatrzony jest "
  68                     "dodatkowymi materiałami (przypisy, motywy literackie "
  69                     "etc.), które podlegają prawu autorskiemu, to te "
  70                     "dodatkowe materiały udostępnione są na licencji "
  71                     "Creative Commons Uznanie Autorstwa – Na Tych Samych "
  72                     "Warunkach 3.0 PL "
  73                     "(http://creativecommons.org/licenses/by-sa/3.0/)"
  74                 )
  75
  76             source = parsed_dc.source_name
  77             if source:
  78                 source = "\n\nTekst opracowany na podstawie: " + source
  79             else:
  80                 source = ''
  81
  82             contributors = ', '.join(
  83                 person.readable()
  84                 for person in sorted(set(
  85                     p for p in (
  86                         parsed_dc.technical_editors + parsed_dc.editors
  87                     ) if p))
  88             )
  89             if contributors:
  90                 contributors = (
  91                     "\n\nOpracowanie redakcyjne i przypisy: %s."
  92                     % contributors
  93                 )
  94             funders = ', '.join(parsed_dc.funders)
  95             if funders:
  96                 funders = u"\n\nPublikację wsparli i wsparły: %s." % funders
  97             publisher = '\n\nWydawca: ' + ', '.join(parsed_dc.publisher)
  98             isbn = getattr(parsed_dc, 'isbn_txt', None)
  99             if isbn:
 100                 isbn = '\n\n' + isbn
 101             else:
 102                 isbn = ''
 103         else:
 104             description = ("Publikacja zrealizowana w ramach projektu "
 105                            "Wolne Lektury (http://wolnelektury.pl).")
 106             url = '*' * 10
 107             license_description = ""
 108             source = ""
 109             contributors = ""
 110             funders = ""
 111             publisher = ""
 112             isbn = ""
 113         result = (TEMPLATE % {
 114             'description': description,
 115             'url': url,
 116             'license_description': license_description,
 117             'text': six.text_type(result),
 118             'source': source,
 119             'contributors': contributors,
 120             'funders': funders,
 121             'publisher': publisher,
 122             'isbn': isbn,
 123         }).encode('utf-8')
 124     else:
 125         result = six.text_type(result).encode('utf-8')
 126     return OutputFile.from_bytes(b"\r\n".join(result.splitlines()) + b"\r\n")