Epub debugging.

author Radek Czajka <rczajka@rczajka.pl>

Tue, 5 Dec 2023 14:08:34 +0000 (15:08 +0100)

committer Radek Czajka <rczajka@rczajka.pl>

Tue, 5 Dec 2023 14:08:34 +0000 (15:08 +0100)
author Radek Czajka <rczajka@rczajka.pl>
Tue, 5 Dec 2023 14:08:34 +0000 (15:08 +0100)
committer Radek Czajka <rczajka@rczajka.pl>
Tue, 5 Dec 2023 14:08:34 +0000 (15:08 +0100)
diff --git a/requirements/requirements.txt b/requirements/requirements.txt

index 7dcf4fd..8feffa8 100644 (file)
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -10,7 +10,7 @@ python-slugify==8.0.1
  python-docx==0.8.11
  Wikidata==0.7
  
-librarian==23.08
+librarian==23.12
  
  ## Django
  Django==4.1.9
diff --git a/src/depot/publishers/base.py b/src/depot/publishers/base.py

index 88ee56b..8fff399 100644 (file)
--- a/src/depot/publishers/base.py
+++ b/src/depot/publishers/base.py
@@ -44,10 +44,11 @@ class BasePublisher:
              ) if p is not None else ''
              for p in wlbook.meta.authors
          ) + '<br>'
-        description += '<a href="https://wolnelektury.pl/katalog/lektura/{}/">{}</a><br>'.format(
-            wlbook.meta.url.slug,
-            wlbook.meta.title
-        )
+        if wlbook.meta.url is not None:
+            description += '<a href="https://wolnelektury.pl/katalog/lektura/{}/">{}</a><br>'.format(
+                wlbook.meta.url.slug,
+                wlbook.meta.title
+            )
          if wlbook.meta.translators:
              description += 'tłum. ' + ', '.join(p.readable() for p in wlbook.meta.translators) + '<br>'
          description += 'Epoka: ' + ', '.join(
diff --git a/src/documents/templates/documents/book_detail.html b/src/documents/templates/documents/book_detail.html

index a286302..2d58734 100644 (file)
--- a/src/documents/templates/documents/book_detail.html
+++ b/src/documents/templates/documents/book_detail.html
@@ -122,7 +122,8 @@
                <a href="{% url 'documents_book_txt' book.slug %}" rel="nofollow">{% trans "TXT version" %}</a><br/>
                <a href="{% url 'documents_book_pdf' book.slug %}" rel="nofollow">{% trans "PDF version" %}</a><br/>
                <a href="{% url 'documents_book_pdf_mobile' book.slug %}" rel="nofollow">{% trans "PDF version for mobiles" %}</a><br/>
-              <a href="{% url 'documents_book_epub' book.slug %}" rel="nofollow">{% trans "EPUB version" %}</a><br/>
+              <a href="{% url 'documents_book_epub' book.slug %}" rel="nofollow">{% trans "EPUB version" %}</a>
+              <a href="{% url 'documents_book_epubcheck' book.slug %}" rel="nofollow">sprawdź</a><br/>
                <a href="{% url 'documents_book_mobi' book.slug %}" rel="nofollow">{% trans "MOBI version" %}</a><br/>
              </p>
  
diff --git a/src/documents/templates/documents/book_epubcheck.html b/src/documents/templates/documents/book_epubcheck.html

new file mode 100644 (file)

index 0000000..ca0a0f9
--- /dev/null
+++ b/src/documents/templates/documents/book_epubcheck.html
@@ -0,0 +1,43 @@
+{% extends "documents/base.html" %}
+{% load book_list i18n %}
+{% load bootstrap4 %}
+{% load depot %}
+{% load isbn %}
+
+
+{% block titleextra %}{{ book.title }}{% endblock %}
+
+
+{% block content %}
+
+  <div class="card mt-4">
+    <div class="card-header">
+      <h1>Weryfikacja epub: <a href="{{ book.get_absolute_url }}">{{ book.title }}</a></h1>
+    </div>
+    <div class="card-body">
+      {% for msg in messages %}
+        <div class="alert alert-{% if msg.severity == "ERROR" %}danger{% else %}info{% endif %}">
+          {{ msg.message }}
+          {% if msg.suggestion %}
+            <br><br>
+            {{ msg.suggestion }}
+          {% endif %}
+          {% for loc in msg.locations %}
+            {% if loc.wl_chunk %}
+              <br>
+              <a class="btn btn-primary" target="_blank" href="{% url 'wiki_editor' book.slug loc.wl_chunk.slug %}#CodeMirrorPerspective">
+                {% if loc.wl_chunk.title %}
+                  {{ loc.wl_chunk.title }}:
+                {% endif %}
+                Linia {{ loc.wl_line }}
+              </a>
+            {% else %}
+              <br><small><tt>{{ loc }}</tt></small>
+            {% endif %}
+          {% endfor %}
+        </div>
+      {% endfor %}
+    </div>
+  </div>
+
+{% endblock content %}
diff --git a/src/documents/urls.py b/src/documents/urls.py

index 71ce10a..d9114cc 100644 (file)
--- a/src/documents/urls.py
+++ b/src/documents/urls.py
@@ -41,6 +41,7 @@ urlpatterns = [
      path('book/<slug:slug>/txt', views.book_txt, name="documents_book_txt"),
      path('book/<slug:slug>/html', views.book_html, name="documents_book_html"),
      path('book/<slug:slug>/epub', views.book_epub, name="documents_book_epub"),
+    path('book/<slug:slug>/epubcheck', views.book_epubcheck, name="documents_book_epubcheck"),
      path('book/<slug:slug>/mobi', views.book_mobi, name="documents_book_mobi"),
      path('book/<slug:slug>/pdf', views.book_pdf, name="documents_book_pdf"),
      path('book/<slug:slug>/pdf-mobile', views.book_pdf, kwargs={'mobile': True}, name="documents_book_pdf_mobile"),
diff --git a/src/documents/views.py b/src/documents/views.py

index ecc9219..905e885 100644 (file)
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -23,6 +23,7 @@ from django.utils.translation import gettext_lazy as _
  from django.views.decorators.http import require_POST
  from django_cas_ng.decorators import user_passes_test
  
+from librarian import epubcheck
  from apiclient import api_call, NotAuthorizedError
  from . import forms
  from . import helpers
@@ -300,7 +301,8 @@ def book_epub(request, slug):
  
      from librarian.builders import EpubBuilder
      epub = EpubBuilder(
-        base_url='file://' + book.gallery_path() + '/'
+        base_url='file://' + book.gallery_path() + '/',
+        debug=True
      ).build(doc).get_bytes()
      response = HttpResponse(content_type='application/epub+zip')
      response['Content-Disposition'] = 'attachment; filename=%s' % book.slug + '.epub'
@@ -308,6 +310,35 @@ def book_epub(request, slug):
      return response
  
  
+@login_required
+@never_cache
+def book_epubcheck(request, slug):
+    book = get_object_or_404(Book, slug=slug)
+    if not book.accessible(request):
+        return HttpResponseForbidden("Not authorized.")
+
+    # TODO: move to celery
+    doc = book.wldocument(librarian2=True)
+    # TODO: error handling
+
+    from librarian.builders import EpubBuilder
+    epub = EpubBuilder(
+        base_url='file://' + book.gallery_path() + '/',
+        debug=True
+    ).build(doc)
+    fname = epub.get_filename()
+
+    messages = epubcheck.epubcheck(fname)
+    for message in messages:
+        for location in message.get('locations', []):
+            if 'wl_chunk' in location:
+                location['wl_chunk'] = book[location['wl_chunk']]
+    return render(request, 'documents/book_epubcheck.html', {
+        'messages': messages,
+        'book': book,
+    })
+
+
  @login_required
  @never_cache
  def book_mobi(request, slug):
diff --git a/src/documents/xml_tools.py b/src/documents/xml_tools.py

index f2c885d..ac145db 100644 (file)
--- a/src/documents/xml_tools.py
+++ b/src/documents/xml_tools.py
@@ -21,7 +21,11 @@ def _trim(text, trim_begin=True, trim_end=True):
          that eg. one big XML file can be compiled from many small XML files.
      """
      if trim_begin:
-        text = RE_TRIM_BEGIN.split(text, maxsplit=1)[-1]
+        parts = RE_TRIM_BEGIN.split(text, maxsplit=1)
+        text = parts[-1]
+        if len(parts) > 1:
+            lines = parts[0].count('\n')
+            text = f'<!--TRIM:{lines}-->' + text
      if trim_end:
          text = RE_TRIM_END.split(text, maxsplit=1)[0]
      return text
diff --git a/src/sources/document.py b/src/sources/document.py

index e46ce2f..876bda9 100644 (file)
--- a/src/sources/document.py
+++ b/src/sources/document.py
@@ -48,7 +48,7 @@ def add_rdf(root, book_source):
      # created_at
      etree.SubElement(desc, DCNS('date')).text = date.today().isoformat()
      # date.pd
-    etree.SubElement(desc, DCNS('date.pd')).text = book.pd_year
+    etree.SubElement(desc, DCNS('date.pd')).text = str(book.pd_year)
      #publisher
      etree.SubElement(desc, DCNS('publisher')). text = 'Fundacja Wolne Lektury'
      #language
author	Radek Czajka <rczajka@rczajka.pl>
	Tue, 5 Dec 2023 14:08:34 +0000 (15:08 +0100)
committer	Radek Czajka <rczajka@rczajka.pl>
	Tue, 5 Dec 2023 14:08:34 +0000 (15:08 +0100)
requirements/requirements.txt		patch \| blob \| history
src/depot/publishers/base.py		patch \| blob \| history
src/documents/templates/documents/book_detail.html		patch \| blob \| history
src/documents/templates/documents/book_epubcheck.html	[new file with mode: 0644]	patch \| blob
src/documents/urls.py		patch \| blob \| history
src/documents/views.py		patch \| blob \| history
src/documents/xml_tools.py		patch \| blob \| history
src/sources/document.py		patch \| blob \| history