Epub debugging.
authorRadek Czajka <rczajka@rczajka.pl>
Tue, 5 Dec 2023 14:08:34 +0000 (15:08 +0100)
committerRadek Czajka <rczajka@rczajka.pl>
Tue, 5 Dec 2023 14:08:34 +0000 (15:08 +0100)
requirements/requirements.txt
src/depot/publishers/base.py
src/documents/templates/documents/book_detail.html
src/documents/templates/documents/book_epubcheck.html [new file with mode: 0644]
src/documents/urls.py
src/documents/views.py
src/documents/xml_tools.py
src/sources/document.py

index 7dcf4fd..8feffa8 100644 (file)
@@ -10,7 +10,7 @@ python-slugify==8.0.1
 python-docx==0.8.11
 Wikidata==0.7
 
-librarian==23.08
+librarian==23.12
 
 ## Django
 Django==4.1.9
index 88ee56b..8fff399 100644 (file)
@@ -44,10 +44,11 @@ class BasePublisher:
             ) if p is not None else ''
             for p in wlbook.meta.authors
         ) + '<br>'
-        description += '<a href="https://wolnelektury.pl/katalog/lektura/{}/">{}</a><br>'.format(
-            wlbook.meta.url.slug,
-            wlbook.meta.title
-        )
+        if wlbook.meta.url is not None:
+            description += '<a href="https://wolnelektury.pl/katalog/lektura/{}/">{}</a><br>'.format(
+                wlbook.meta.url.slug,
+                wlbook.meta.title
+            )
         if wlbook.meta.translators:
             description += 'tłum. ' + ', '.join(p.readable() for p in wlbook.meta.translators) + '<br>'
         description += 'Epoka: ' + ', '.join(
index a286302..2d58734 100644 (file)
               <a href="{% url 'documents_book_txt' book.slug %}" rel="nofollow">{% trans "TXT version" %}</a><br/>
               <a href="{% url 'documents_book_pdf' book.slug %}" rel="nofollow">{% trans "PDF version" %}</a><br/>
               <a href="{% url 'documents_book_pdf_mobile' book.slug %}" rel="nofollow">{% trans "PDF version for mobiles" %}</a><br/>
-              <a href="{% url 'documents_book_epub' book.slug %}" rel="nofollow">{% trans "EPUB version" %}</a><br/>
+              <a href="{% url 'documents_book_epub' book.slug %}" rel="nofollow">{% trans "EPUB version" %}</a>
+              <a href="{% url 'documents_book_epubcheck' book.slug %}" rel="nofollow">sprawdź</a><br/>
               <a href="{% url 'documents_book_mobi' book.slug %}" rel="nofollow">{% trans "MOBI version" %}</a><br/>
             </p>
 
diff --git a/src/documents/templates/documents/book_epubcheck.html b/src/documents/templates/documents/book_epubcheck.html
new file mode 100644 (file)
index 0000000..ca0a0f9
--- /dev/null
@@ -0,0 +1,43 @@
+{% extends "documents/base.html" %}
+{% load book_list i18n %}
+{% load bootstrap4 %}
+{% load depot %}
+{% load isbn %}
+
+
+{% block titleextra %}{{ book.title }}{% endblock %}
+
+
+{% block content %}
+
+  <div class="card mt-4">
+    <div class="card-header">
+      <h1>Weryfikacja epub: <a href="{{ book.get_absolute_url }}">{{ book.title }}</a></h1>
+    </div>
+    <div class="card-body">
+      {% for msg in messages %}
+        <div class="alert alert-{% if msg.severity == "ERROR" %}danger{% else %}info{% endif %}">
+          {{ msg.message }}
+          {% if msg.suggestion %}
+            <br><br>
+            {{ msg.suggestion }}
+          {% endif %}
+          {% for loc in msg.locations %}
+            {% if loc.wl_chunk %}
+              <br>
+              <a class="btn btn-primary" target="_blank" href="{% url 'wiki_editor' book.slug loc.wl_chunk.slug %}#CodeMirrorPerspective">
+                {% if loc.wl_chunk.title %}
+                  {{ loc.wl_chunk.title }}:
+                {% endif %}
+                Linia {{ loc.wl_line }}
+              </a>
+            {% else %}
+              <br><small><tt>{{ loc }}</tt></small>
+            {% endif %}
+          {% endfor %}
+        </div>
+      {% endfor %}
+    </div>
+  </div>
+
+{% endblock content %}
index 71ce10a..d9114cc 100644 (file)
@@ -41,6 +41,7 @@ urlpatterns = [
     path('book/<slug:slug>/txt', views.book_txt, name="documents_book_txt"),
     path('book/<slug:slug>/html', views.book_html, name="documents_book_html"),
     path('book/<slug:slug>/epub', views.book_epub, name="documents_book_epub"),
+    path('book/<slug:slug>/epubcheck', views.book_epubcheck, name="documents_book_epubcheck"),
     path('book/<slug:slug>/mobi', views.book_mobi, name="documents_book_mobi"),
     path('book/<slug:slug>/pdf', views.book_pdf, name="documents_book_pdf"),
     path('book/<slug:slug>/pdf-mobile', views.book_pdf, kwargs={'mobile': True}, name="documents_book_pdf_mobile"),
index ecc9219..905e885 100644 (file)
@@ -23,6 +23,7 @@ from django.utils.translation import gettext_lazy as _
 from django.views.decorators.http import require_POST
 from django_cas_ng.decorators import user_passes_test
 
+from librarian import epubcheck
 from apiclient import api_call, NotAuthorizedError
 from . import forms
 from . import helpers
@@ -300,7 +301,8 @@ def book_epub(request, slug):
 
     from librarian.builders import EpubBuilder
     epub = EpubBuilder(
-        base_url='file://' + book.gallery_path() + '/'
+        base_url='file://' + book.gallery_path() + '/',
+        debug=True
     ).build(doc).get_bytes()
     response = HttpResponse(content_type='application/epub+zip')
     response['Content-Disposition'] = 'attachment; filename=%s' % book.slug + '.epub'
@@ -308,6 +310,35 @@ def book_epub(request, slug):
     return response
 
 
+@login_required
+@never_cache
+def book_epubcheck(request, slug):
+    book = get_object_or_404(Book, slug=slug)
+    if not book.accessible(request):
+        return HttpResponseForbidden("Not authorized.")
+
+    # TODO: move to celery
+    doc = book.wldocument(librarian2=True)
+    # TODO: error handling
+
+    from librarian.builders import EpubBuilder
+    epub = EpubBuilder(
+        base_url='file://' + book.gallery_path() + '/',
+        debug=True
+    ).build(doc)
+    fname = epub.get_filename()
+
+    messages = epubcheck.epubcheck(fname)
+    for message in messages:
+        for location in message.get('locations', []):
+            if 'wl_chunk' in location:
+                location['wl_chunk'] = book[location['wl_chunk']]
+    return render(request, 'documents/book_epubcheck.html', {
+        'messages': messages,
+        'book': book,
+    })
+
+
 @login_required
 @never_cache
 def book_mobi(request, slug):
index f2c885d..ac145db 100644 (file)
@@ -21,7 +21,11 @@ def _trim(text, trim_begin=True, trim_end=True):
         that eg. one big XML file can be compiled from many small XML files.
     """
     if trim_begin:
-        text = RE_TRIM_BEGIN.split(text, maxsplit=1)[-1]
+        parts = RE_TRIM_BEGIN.split(text, maxsplit=1)
+        text = parts[-1]
+        if len(parts) > 1:
+            lines = parts[0].count('\n')
+            text = f'<!--TRIM:{lines}-->' + text
     if trim_end:
         text = RE_TRIM_END.split(text, maxsplit=1)[0]
     return text
index e46ce2f..876bda9 100644 (file)
@@ -48,7 +48,7 @@ def add_rdf(root, book_source):
     # created_at
     etree.SubElement(desc, DCNS('date')).text = date.today().isoformat()
     # date.pd
-    etree.SubElement(desc, DCNS('date.pd')).text = book.pd_year
+    etree.SubElement(desc, DCNS('date.pd')).text = str(book.pd_year)
     #publisher
     etree.SubElement(desc, DCNS('publisher')). text = 'Fundacja Wolne Lektury'
     #language