From: Marcin Koziej <marcin@rho-desktop.(none)> Date: Wed, 5 Oct 2011 14:54:25 +0000 (+0200) Subject: #1242 generowanie pdf przy imporcie xml X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/46705d393ae76a1d8e0eb2dc8bf121269f6adb67?ds=sidebyside #1242 generowanie pdf przy imporcie xml --- diff --git a/apps/catalogue/management/commands/importbooks.py b/apps/catalogue/management/commands/importbooks.py index 72878fe25..ea3768ba7 100644 --- a/apps/catalogue/management/commands/importbooks.py +++ b/apps/catalogue/management/commands/importbooks.py @@ -24,6 +24,8 @@ class Command(BaseCommand): help='Don\'t build EPUB file'), make_option('-T', '--no-build-txt', action='store_false', dest='build_txt', default=True, help='Don\'t build TXT file'), + make_option('-T', '--no-build-pdf', action='store_false', dest='build_pdf', default=True, + help='Don\'t build PDF file'), make_option('-w', '--wait-until', dest='wait_until', metavar='TIME', help='Wait until specified time (Y-M-D h:m:s)'), ) @@ -81,7 +83,8 @@ class Command(BaseCommand): try: book = Book.from_xml_file(file_path, overwrite=force, build_epub=options.get('build_epub'), - build_txt=options.get('build_txt')) + build_txt=options.get('build_txt'), + build_pdf=options.get('build_pdf')) files_imported += 1 if os.path.isfile(file_base + '.pdf'): diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index ead5ba040..930f98d14 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -22,14 +22,14 @@ from django.conf import settings from newtagging.models import TagBase, tags_updated from newtagging import managers from catalogue.fields import JSONField, OverwritingFileField -from catalogue.utils import ExistingFile +from catalogue.utils import ExistingFile, BookImportDocProvider from librarian import dcparser, html, epub, NoDublinCore import mutagen from mutagen import id3 from slughifi import slughifi from sortify import sortify - +from os import unlink TAG_CATEGORIES = ( ('author', _('author')), @@ -51,6 +51,7 @@ MEDIA_FORMATS = ( # not quite, but Django wants you to set a timeout CACHE_FOREVER = 2419200 # 28 days + class TagSubcategoryManager(models.Manager): def __init__(self, subcategory): super(TagSubcategoryManager, self).__init__() @@ -295,9 +296,9 @@ class Book(models.Model): xml_file = models.FileField(_('XML file'), upload_to=book_upload_path('xml'), blank=True) html_file = models.FileField(_('HTML file'), upload_to=book_upload_path('html'), blank=True) pdf_file = models.FileField(_('PDF file'), upload_to=book_upload_path('pdf'), blank=True) - epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True) - txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True) - + epub_file = models.FileField(_('EPUB file'), upload_to=book_upload_path('epub'), blank=True) + txt_file = models.FileField(_('TXT file'), upload_to=book_upload_path('txt'), blank=True) + parent = models.ForeignKey('self', blank=True, null=True, related_name='children') objects = models.Manager() tagged = managers.ModelTaggedItemManager(Tag) @@ -503,34 +504,49 @@ class Book(models.Model): return bool(self.has_media("ogg")) has_ogg_file.short_description = 'OGG' has_ogg_file.boolean = True - + def has_daisy_file(self): return bool(self.has_media("daisy")) has_daisy_file.short_description = 'DAISY' - has_daisy_file.boolean = True - + has_daisy_file.boolean = True + + def build_pdf(self): + """ (Re)builds the pdf file. + + """ + from librarian import pdf, ParseError + from tempfile import NamedTemporaryFile + import os + + try: + path, fname = os.path.realpath(self.xml_file.path).rsplit('/', 1) + try: + pdf_file = NamedTemporaryFile(delete=False) + + pdf.transform(BookImportDocProvider(self), + file_path=str(self.xml_file.path), + output_file=pdf_file, + ) + + self.pdf_file.save('%s.pdf' % self.slug, File(open(pdf_file.name))) + finally: + unlink(pdf_file.name) + + except ParseError, e: + print '%(file)s:%(name)s:%(message)s; use -v to see more output' % { + 'file': self.xml_file.path, + 'name': e.__class__.__name__, + 'message': e + } + def build_epub(self, remove_descendants=True): """ (Re)builds the epub file. If book has a parent, does nothing. Unless remove_descendants is False, descendants' epubs are removed. """ - from StringIO import StringIO from hashlib import sha1 from django.core.files.base import ContentFile - from librarian import DocProvider - - class BookImportDocProvider(DocProvider): - """ used for joined EPUBs """ - - def __init__(self, book): - self.book = book - - def by_slug(self, slug): - if slug == self.book.slug: - return self.book.xml_file - else: - return Book.objects.get(slug=slug).xml_file if self.parent: # don't need an epub @@ -633,7 +649,7 @@ class Book(models.Model): xml_file.close() @classmethod - def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True): + def from_text_and_meta(cls, raw_file, book_info, overwrite=False, build_epub=True, build_txt=True, build_pdf=True): import re # check for parts before we do anything @@ -706,6 +722,9 @@ class Book(models.Model): if not settings.NO_BUILD_EPUB and build_epub: book.root_ancestor.build_epub() + if not settings.NO_BUILD_PDF and build_pdf: + book.root_ancestor.build_pdf() + book_descendants = list(book.children.all()) # add l-tag to descendants and their fragments # delete unnecessary EPUB files diff --git a/apps/catalogue/test_utils.py b/apps/catalogue/test_utils.py index 1dcd7266a..7905efbb3 100644 --- a/apps/catalogue/test_utils.py +++ b/apps/catalogue/test_utils.py @@ -10,7 +10,7 @@ class WLTestCase(TestCase): """ def setUp(self): self._MEDIA_ROOT, settings.MEDIA_ROOT = settings.MEDIA_ROOT, tempfile.mkdtemp(prefix='djangotest_') - settings.NO_BUILD_EPUB = settings.NO_BUILD_TXT = True + settings.NO_BUILD_PDF = settings.NO_BUILD_EPUB = settings.NO_BUILD_TXT = True def tearDown(self): shutil.rmtree(settings.MEDIA_ROOT, True) diff --git a/apps/catalogue/tests/book_import.py b/apps/catalogue/tests/book_import.py index 57dbf27a3..29be73177 100644 --- a/apps/catalogue/tests/book_import.py +++ b/apps/catalogue/tests/book_import.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- -from django.core.files.base import ContentFile +from __future__ import with_statement + +from django.core.files.base import ContentFile, File from catalogue.test_utils import * from catalogue import models from nose.tools import raises - +import tempfile +from os import unlink,path class BookImportLogicTests(WLTestCase): @@ -228,3 +231,37 @@ class ChildImportTests(WLTestCase): self.assertEqual(['Kot'], [tag.name for tag in themes], 'wrong related theme list') + + +class BookImportGenerateTest(WLTestCase): + def setUp(self): + WLTestCase.setUp(self) + self.book_info = BookInfoStub( + url=u"http://wolnelektury.pl/example/default-book", + about=u"http://wolnelektury.pl/example/URI/default_book", + title=u"Default Book", + author=PersonStub(("Jim",), "Lazy"), + kind="X-Kind", + genre="X-Genre", + epoch="X-Epoch", + ) + + self.expected_tags = [ + ('author', 'jim-lazy'), + ('genre', 'x-genre'), + ('epoch', 'x-epoch'), + ('kind', 'x-kind'), + ] + self.expected_tags.sort() + + def test_gen_pdf(self): + input = open(path.dirname(__file__) + '/but-w-butonierce-but-w-butonierce.xml') + book = models.Book.from_text_and_meta(File(input), self.book_info, overwrite=True) + book.build_pdf() + self.assertTrue(path.exists(book.pdf_file.path)) + + def test_gen_pdf_child(self): + input = open(path.dirname(__file__) + "/fraszka-do-anusie.xml") + book = models.Book.from_text_and_meta(File(input), self.book_info, overwrite=True) + book.build_pdf() + self.assertTrue(path.exists(book.pdf_file.path)) diff --git a/apps/catalogue/tests/but-w-butonierce-but-w-butonierce.xml b/apps/catalogue/tests/but-w-butonierce-but-w-butonierce.xml new file mode 100755 index 000000000..8b52addfb --- /dev/null +++ b/apps/catalogue/tests/but-w-butonierce-but-w-butonierce.xml @@ -0,0 +1,67 @@ +<?xml version='1.0' encoding='utf-8'?> +<utwor><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" +xmlns:dc="http://purl.org/dc/elements/1.1/"> +<rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/Lektury:Jasie%C5%84ski/But/But_w_butonierce"> +<dc:creator xml:lang="pl">JasieÅski, Bruno</dc:creator> +<dc:title xml:lang="pl">But w butonierce</dc:title> +<dc:relation.isPartOf xml:lang="pl">http://wolnelektury.pl/katalog/lektura/but-w-butonierce</dc:relation.isPartOf> +<dc:contributor.editor xml:lang="pl">SekuÅa, Aleksandra</dc:contributor.editor> +<dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor> +<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher> +<dc:subject.period xml:lang="pl">Dwudziestolecie miÄdzywojenne</dc:subject.period> +<dc:subject.type xml:lang="pl">Liryka</dc:subject.type> +<dc:subject.genre xml:lang="pl">Wiersz sylabotoniczny</dc:subject.genre> +<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez BibliotekÄ NarodowÄ z egzemplarza pochodzÄ cego ze zbiorów BN.</dc:description> +<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/but-w-butonierce-but-w-butonierce</dc:identifier.url> +<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/14667/27384_But_w_butoni.html</dc:source.URL> +<dc:source xml:lang="pl">JasieÅski, Bruno (1901-1938), But w butonierce, Klub Futurystów "Katarynka", Warszawa, 1921</dc:source> +<dc:rights xml:lang="pl">Domena publiczna - Bruno JasieÅski zm. 1938</dc:rights> +<dc:date.pd xml:lang="pl">1938</dc:date.pd> +<dc:format xml:lang="pl">xml</dc:format> +<dc:type xml:lang="pl">text</dc:type> +<dc:type xml:lang="en">text</dc:type> +<dc:date xml:lang="pl">2009-02-23</dc:date> +<dc:audience xml:lang="pl">L</dc:audience> +<dc:language xml:lang="pl">pol</dc:language> +</rdf:Description> +</rdf:RDF><liryka_l> + +<autor_utworu>Bruno JasieÅski</autor_utworu> + +<dzielo_nadrzedne>But w butonierce</dzielo_nadrzedne> + +<nazwa_utworu>But w butonierce</nazwa_utworu> + + + +<strofa>ZmarnowaÅem podeszwy w caÅodziennych spieszeniach,/ +Teraz jestem sÅoneczny, siebiepewny i rad./ +IdÄ mÅody, genialny, trzymam rÄce w kieszeniach,/ +Stawiam kroki milowe, zamaszyste, jak Åwiat.</strofa> + +<strofa>Nie zatrzymam siÄ nigdzie na rozstajach, na wiorstach,/ +Bo mnie niesie coÅ wiecznie, motorycznie i przed./ +Mijam strachy na wróble w eleganckich windhorstach,/ +Wszystkim kÅaniam siÄ grzecznie i poprawiam im pled.</strofa> + +<strofa>W parkocieniu krokietni --- jakiÅ meeting panieÅski./ +DyskutujÄ o sztuce, objawiajÄ c swój traf./ +One jeszcze nie wiedzÄ , że, gdy nastaÅ JasieÅski,/ +Bezpowrotnie umarli i Tetmajer i Staff.</strofa> + +<strofa>One jeszcze nie wiedzÄ , one jeszcze nie wierzÄ ./ +PoezyjnoÅÄ, futuryzm --- niewiadoma i X./ +Chodźmy biegaÄ, panienki, niech siÄ gÅówki oÅwieÅ¼Ä , ---/ +BÄdzie lepiej smakowaÄ poobiedni jour-fixe.</strofa> + +<strofa>PrzeleciaÅo gdzieÅ auto w biaÅych kÅÄbach benzyny,/ +ZafurkotaÅ na wietrze trzepocÄ cy siÄ szal./ +PojechaÅa mi bajka poza góry doliny/ +I nic jakoÅ mi nie żal, a powinno byÄ Å¼al...</strofa> + +<strofa>Tak mi dobrze, tak mojo, aż rechoce siÄ serce./ +Same nogi mnie niosÄ gdzieÅ --- i po co mi, gdzie?/ +IdÄ mÅody, genialny, niosÄ BUT W BUTONIERCE<extra>wersaliki</extra>,/ +Tym co za mnÄ nie zdÄ Å¼Ä echopowiem: --- Adieu! ---</strofa> + +</liryka_l></utwor> diff --git a/apps/catalogue/tests/fraszka-do-anusie.xml b/apps/catalogue/tests/fraszka-do-anusie.xml new file mode 100755 index 000000000..3bbda155e --- /dev/null +++ b/apps/catalogue/tests/fraszka-do-anusie.xml @@ -0,0 +1,49 @@ +<?xml version='1.0' encoding='utf-8'?> +<utwor> + <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/"> +<rdf:Description rdf:about="http://wiki.wolnepodreczniki.pl/index.php?title=Lektury:S%C4%99p-Szarzy%C5%84ski/Rytmy/Fraszka_do_Anusie"> +<dc:creator xml:lang="pl">SÄp SzarzyÅski, MikoÅaj</dc:creator> +<dc:title xml:lang="pl">Fraszka do Anusie</dc:title> +<dc:contributor.editor xml:lang="pl">SekuÅa, Aleksandra</dc:contributor.editor> +<dc:contributor.technical_editor xml:lang="pl">Sutkowska, Olga</dc:contributor.technical_editor> +<dc:publisher xml:lang="pl">Fundacja Nowoczesna Polska</dc:publisher> +<dc:subject.period xml:lang="pl">Barok</dc:subject.period> +<dc:subject.type xml:lang="pl">Liryka</dc:subject.type> +<dc:subject.genre xml:lang="pl">Fraszka</dc:subject.genre> +<dc:description xml:lang="pl">Publikacja zrealizowana w ramach projektu Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa wykonana przez BibliotekÄ NarodowÄ z egzemplarza pochodzÄ cego ze zbiorów BN.</dc:description> +<dc:identifier.url xml:lang="pl">http://wolnelektury.pl/katalog/lektura/fraszka-do-anusie</dc:identifier.url> +<dc:source.URL xml:lang="pl">http://www.polona.pl/Content/8759</dc:source.URL> +<dc:source xml:lang="pl">SzarzyÅski SÄp, MikoÅaj (ca 1550-1581), Rytmy abo Wiersze polskie w wyborze, E. Wende, Warszawa, 1914</dc:source> +<dc:rights xml:lang="pl">Domena publiczna - MikoÅaj SÄp SzarzyÅski zm. 1581</dc:rights> +<dc:date.pd xml:lang="pl">1581</dc:date.pd> +<dc:format xml:lang="pl">xml</dc:format> +<dc:type xml:lang="pl">text</dc:type> +<dc:type xml:lang="en">text</dc:type> +<dc:date xml:lang="pl">2008-12-29</dc:date> +<dc:audience xml:lang="pl">L</dc:audience> +<dc:audience xml:lang="pl">L</dc:audience> +<dc:language xml:lang="pl">pol</dc:language> +</rdf:Description> +</rdf:RDF> + <liryka_l> + +<autor_utworu>MikoÅaj SÄp SzarzyÅski</autor_utworu> + +<nazwa_utworu>Fraszka do Anusie</nazwa_utworu> + + + +<strofa><begin id="b1230084410751"/><motyw id="m1230084410751">Kochanek, Åzy, MiÅoÅÄ, Oko, Serce, Wzrok</motyw>JeÅli oczu hamowaÄ swoich nie umiaÅy/ +LeÅnych krynic boginie, aby nie pÅakaÅy,/ +Gdy baczyÅy<pe><slowo_obce>baczyÅy</slowo_obce> --- tu: zobaczyÅy, patrzyÅy na.</pe> przy studni Narcyza piÄknego,/ +A on umarÅ prze miÅoÅÄ oblicza swojego;/ +JeÅli nieÅmiertelnym stanom żaÅoÅÄ rozkazuje,/ +Gdy niebaczna fortuna co niesÅusznie psuje:</strofa> + +<strofa>Jakoż ja mam hamowaÄ, by na lice moje/ +Z oczu smutnych żaÅoÅne nie pÅynÄÅy zdroje?/ +Jako serce powÅciÄ gaÄ, aby nie wzdychaÅo/ +I od ciÄżkiej żaÅoÅci omdlewaÄ nie miaÅo?<end id="e1230084410751"/></strofa> + +</liryka_l> +</utwor> diff --git a/apps/catalogue/utils.py b/apps/catalogue/utils.py index 566eaf4cd..02e5b6d93 100644 --- a/apps/catalogue/utils.py +++ b/apps/catalogue/utils.py @@ -10,6 +10,8 @@ from django.core.files.uploadedfile import UploadedFile from django.utils.hashcompat import sha_constructor from django.conf import settings +from librarian import DocProvider + # Use the system (hardware-based) random number generator if it exists. if hasattr(random, 'SystemRandom'): @@ -44,3 +46,16 @@ class ExistingFile(UploadedFile): def close(self): pass + + +class BookImportDocProvider(DocProvider): + """ used for joined EPUBs """ + + def __init__(self, book): + self.book = book + + def by_slug(self, slug): + if slug == self.book.slug: + return self.book.xml_file + else: + return Book.objects.get(slug=slug).xml_file