src/librarian/text.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import unicode_literals
   7
   8 import copy
   9 from librarian import functions, OutputFile, get_resource
  10 from lxml import etree
  11 import os
  12 import six
  13
  14
  15 functions.reg_substitute_entities()
  16 functions.reg_wrap_words()
  17 functions.reg_strip()
  18 functions.reg_person_name()
  19
  20
  21 with open(get_resource("res/text/template.txt")) as f:
  22     TEMPLATE = f.read()
  23
  24
  25 def transform(wldoc, flags=None, **options):
  26     """
  27     Transforms input_file in XML to output_file in TXT.
  28     possible flags: raw-text,
  29     """
  30     # Parse XSLT
  31     style_filename = os.path.join(os.path.dirname(__file__),
  32                                   'xslt/book2txt.xslt')
  33     style = etree.parse(style_filename)
  34
  35     document = copy.deepcopy(wldoc)
  36     del wldoc
  37     document.swap_endlines()
  38
  39     if flags:
  40         for flag in flags:
  41             document.edoc.getroot().set(flag, 'yes')
  42     if 'wrapping' in options:
  43         options['wrapping'] = str(options['wrapping'])
  44
  45     result = document.transform(style, **options)
  46
  47     if not flags or 'raw-text' not in flags:
  48         if document.book_info:
  49             parsed_dc = document.book_info
  50             description = parsed_dc.description
  51             url = document.book_info.url
  52
  53             license_description = parsed_dc.license_description
  54             license = parsed_dc.license
  55             if license:
  56                 license_description = (
  57                     u"Ten utwór jest udostępniony na licencji %s: \n%s" % (
  58                         license_description, license
  59                     )
  60                 )
  61             else:
  62                 license_description = (
  63                     "Ten utwór nie jest objęty majątkowym prawem autorskim "
  64                     "i znajduje się w domenie publicznej, co oznacza że "
  65                     "możesz go swobodnie wykorzystywać, publikować "
  66                     "i rozpowszechniać. Jeśli utwór opatrzony jest "
  67                     "dodatkowymi materiałami (przypisy, motywy literackie "
  68                     "etc.), które podlegają prawu autorskiemu, to te "
  69                     "dodatkowe materiały udostępnione są na licencji "
  70                     "Creative Commons Uznanie Autorstwa – Na Tych Samych "
  71                     "Warunkach 3.0 PL "
  72                     "(http://creativecommons.org/licenses/by-sa/3.0/)"
  73                 )
  74
  75             source = parsed_dc.source_name
  76             if source:
  77                 source = "\n\nTekst opracowany na podstawie: " + source
  78             else:
  79                 source = ''
  80
  81             contributors = ', '.join(
  82                 person.readable()
  83                 for person in sorted(set(
  84                     p for p in (
  85                         parsed_dc.technical_editors + parsed_dc.editors
  86                     ) if p))
  87             )
  88             if contributors:
  89                 contributors = (
  90                     "\n\nOpracowanie redakcyjne i przypisy: %s."
  91                     % contributors
  92                 )
  93             funders = ', '.join(parsed_dc.funders)
  94             if funders:
  95                 funders = u"\n\nPublikację wsparli i wsparły: %s." % funders
  96             publisher = '\n\nWydawca: ' + ', '.join(parsed_dc.publisher)
  97             isbn = getattr(parsed_dc, 'isbn_txt', None)
  98             if isbn:
  99                 isbn = '\n\n' + isbn
 100             else:
 101                 isbn = ''
 102         else:
 103             description = ("Publikacja zrealizowana w ramach projektu "
 104                            "Wolne Lektury (http://wolnelektury.pl).")
 105             url = '*' * 10
 106             license_description = ""
 107             source = ""
 108             contributors = ""
 109             funders = ""
 110             publisher = ""
 111             isbn = ""
 112         result = (TEMPLATE % {
 113             'description': description,
 114             'url': url,
 115             'license_description': license_description,
 116             'text': six.text_type(result),
 117             'source': source,
 118             'contributors': contributors,
 119             'funders': funders,
 120             'publisher': publisher,
 121             'isbn': isbn,
 122         }).encode('utf-8')
 123     else:
 124         result = six.text_type(result).encode('utf-8')
 125     return OutputFile.from_bytes(b"\r\n".join(result.splitlines()) + b"\r\n")