Hard linking of texts to catalogue. master
authorRadek Czajka <rczajka@rczajka.pl>
Tue, 15 Oct 2024 13:43:24 +0000 (15:43 +0200)
committerRadek Czajka <rczajka@rczajka.pl>
Tue, 15 Oct 2024 13:43:24 +0000 (15:43 +0200)
13 files changed:
src/catalogue/admin.py
src/documents/ebook_utils.py
src/documents/management/commands/add_parent.py [deleted file]
src/documents/management/commands/insert_isbn.py [deleted file]
src/documents/migrations/0017_rename_catalogue_book.py [new file with mode: 0644]
src/documents/migrations/0018_book_catalogue_book_alter_book_catalogue_book_slug.py [new file with mode: 0644]
src/documents/migrations/0019_fk_uses_id.py [new file with mode: 0644]
src/documents/migrations/0020_alter_book_dc_slug.py [new file with mode: 0644]
src/documents/models/book.py
src/documents/templates/documents/book_detail.html
src/documents/urls.py
src/documents/views.py
src/sources/models.py

index 399f9fd..c319af5 100644 (file)
@@ -328,13 +328,13 @@ class SourcesInline(admin.TabularInline):
     extra = 1
 
 
-class SourcesInline(admin.TabularInline):
+class EditorNoteInline(admin.TabularInline):
     model = models.EditorNote
     extra = 1
 
 
 class BookAdmin(WikidataAdminMixin, NumericFilterModelAdmin, VersionAdmin):
-    inlines = [SourcesInline]
+    inlines = [EditorNoteInline, SourcesInline]
     list_display = [
         "smart_title",
         "authors_str",
index c5a758f..5c36cbe 100644 (file)
@@ -15,7 +15,7 @@ class RedakcjaDocProvider(DocProvider):
 
     def by_slug(self, slug):
         print(slug)
-        return BytesIO(Book.objects.get(catalogue_book_id=slug
+        return BytesIO(Book.objects.get(dc_slug=slug
                     ).materialize(publishable=self.publishable
                     ).encode('utf-8'))
 
diff --git a/src/documents/management/commands/add_parent.py b/src/documents/management/commands/add_parent.py
deleted file mode 100644 (file)
index 4bd242d..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import sys
-
-from datetime import date
-from lxml import etree
-
-from django.core.management import BaseCommand
-
-from documents.models import Book
-from librarian import RDFNS, DCNS
-
-TEMPLATE = '''<utwor>
-<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
-<rdf:Description rdf:about="http://redakcja.wolnelektury.pl/documents/book/%(slug)s/">
-%(dc)s
-</rdf:Description>
-</rdf:RDF>
-
-</utwor>
-'''
-
-DC_TEMPLATE = '<dc:%(tag)s xml:lang="pl" xmlns:dc="http://purl.org/dc/elements/1.1/">%(value)s</dc:%(tag)s>'
-
-DC_TAGS = (
-    'creator',
-    'title',
-    'relation.hasPart',
-    'contributor.translator',
-    'contributor.editor',
-    'contributor.technical_editor',
-    'contributor.funding',
-    'contributor.thanks',
-    'publisher',
-    'subject.period',
-    'subject.type',
-    'subject.genre',
-    'description',
-    'identifier.url',
-    'source',
-    'source.URL',
-    'rights.license',
-    'rights',
-    'date.pd',
-    'format',
-    'type',
-    'date',
-    'audience',
-    'language',
-)
-
-IDENTIFIER_PREFIX = 'http://wolnelektury.pl/katalog/lektura/'
-
-
-def dc_desc_element(book):
-    xml = book.materialize()
-    tree = etree.fromstring(xml)
-    return tree.find(".//" + RDFNS("Description"))
-
-
-def distinct_dc_values(tag, desc_elements):
-    values = set()
-    for desc in desc_elements:
-        values.update(elem.text for elem in desc.findall(DCNS(tag)))
-    return values
-
-
-class Command(BaseCommand):
-    args = 'slug'
-
-    def handle(self, slug, **options):
-        children_slugs = [line.strip() for line in sys.stdin]
-        children = Book.objects.filter(catalogue_book_id__in=children_slugs)
-        desc_elements = [dc_desc_element(child) for child in children]
-        title = u'Utwory wybrane'
-        own_attributes = {
-            'title': title,
-            'relation.hasPart': [IDENTIFIER_PREFIX + child_slug for child_slug in children_slugs],
-            'identifier.url': IDENTIFIER_PREFIX + slug,
-            'date': date.today().isoformat(),
-        }
-        dc_tags = []
-        for tag in DC_TAGS:
-            if tag in own_attributes:
-                values = own_attributes[tag]
-                if not isinstance(values, list):
-                    values = [values]
-            else:
-                values = distinct_dc_values(tag, desc_elements)
-            for value in values:
-                dc_tags.append(DC_TEMPLATE % {'tag': tag, 'value': value})
-        xml = TEMPLATE % {'slug': slug, 'dc': '\n'.join(dc_tags)}
-        Book.create(
-            text=xml,
-            creator=None,
-            slug=slug,
-            title=title,
-            gallery=slug)
diff --git a/src/documents/management/commands/insert_isbn.py b/src/documents/management/commands/insert_isbn.py
deleted file mode 100644 (file)
index 15104d7..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
-#
-import csv
-
-import sys
-from django.contrib.auth.models import User
-from lxml import etree
-from collections import defaultdict
-from django.core.management import BaseCommand
-
-from documents.models import Book
-from librarian import RDFNS, DCNS
-
-CONTENT_TYPES = {
-    'pdf':  'application/pdf',
-    'epub': 'application/epub+zip',
-    'mobi': 'application/x-mobipocket-ebook',
-    'txt':  'text/plain',
-    'html': 'text/html',
-}
-
-
-ISBN_TEMPLATES = (
-    r'<dc:relation.hasFormat id="%(format)s" xmlns:dc="http://purl.org/dc/elements/1.1/">%(url)s'
-    r'</dc:relation.hasFormat>',
-    r'<meta refines="#%(format)s" id="%(format)s-id" property="dcterms:identifier">ISBN-%(isbn)s</meta>',
-    r'<meta refines="#%(format)s-id" property="identifier-type">ISBN</meta>',
-    r'<meta refines="#%(format)s" property="dcterms:format">%(content_type)s</meta>',
-)
-
-
-def url_for_format(slug, format):
-    if format == 'html':
-        return 'https://wolnelektury.pl/katalog/lektura/%s.html' % slug
-    else:
-        return 'http://wolnelektury.pl/media/book/%(format)s/%(slug)s.%(format)s' % {'slug': slug, 'format': format}
-
-
-class Command(BaseCommand):
-    args = 'csv_file'
-
-    def add_arguments(self, parser):
-        self.add_argument(
-            '-u', '--username', dest='username', metavar='USER',
-            help='Assign commits to this user (required, preferably yourself).')
-
-    def handle(self, csv_file, **options):
-        username = options.get('username')
-
-        if username:
-            user = User.objects.get(username=username)
-        else:
-            print('Please provide a username.')
-            sys.exit(1)
-
-        csvfile = open(csv_file, 'rb')
-        isbn_lists = defaultdict(list)
-        for slug, format, isbn in csv.reader(csvfile, delimiter=','):
-            isbn_lists[slug].append((format, isbn))
-        csvfile.close()
-
-        for slug, isbn_list in isbn_lists.iteritems():
-            print('processing %s' % slug)
-            book = Book.objects.get(catalogue_book_id=slug)
-            chunk = book.chunk_set.first()
-            old_head = chunk.head
-            src = old_head.materialize()
-            tree = etree.fromstring(src)
-            isbn_node = tree.find('.//' + DCNS("relation.hasFormat"))
-            if isbn_node is not None:
-                print('%s already contains ISBN metadata, skipping' % slug)
-                continue
-            desc = tree.find(".//" + RDFNS("Description"))
-            for format, isbn in isbn_list:
-                for template in ISBN_TEMPLATES:
-                    isbn_xml = template % {
-                        'format': format,
-                        'isbn': isbn,
-                        'content_type': CONTENT_TYPES[format],
-                        'url': url_for_format(slug, format),
-                    }
-                    element = etree.XML(isbn_xml)
-                    element.tail = '\n'
-                    desc.append(element)
-            new_head = chunk.commit(
-                etree.tostring(tree, encoding='unicode'),
-                author=user,
-                description='automatyczne dodanie isbn'
-            )
-            print('committed %s' % slug)
-            if old_head.publishable:
-                new_head.set_publishable(True)
-            else:
-                print('Warning: %s not publishable' % slug)
diff --git a/src/documents/migrations/0017_rename_catalogue_book.py b/src/documents/migrations/0017_rename_catalogue_book.py
new file mode 100644 (file)
index 0000000..2b717eb
--- /dev/null
@@ -0,0 +1,18 @@
+# Generated by Django 4.1.9 on 2024-10-15 14:21
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("documents", "0016_project_can_sell_project_private_notes"),
+    ]
+
+    operations = [
+        migrations.RenameField(
+            model_name="book",
+            old_name="catalogue_book",
+            new_name="dc_slug",
+        ),
+    ]
diff --git a/src/documents/migrations/0018_book_catalogue_book_alter_book_catalogue_book_slug.py b/src/documents/migrations/0018_book_catalogue_book_alter_book_catalogue_book_slug.py
new file mode 100644 (file)
index 0000000..2ee2a13
--- /dev/null
@@ -0,0 +1,43 @@
+# Generated by Django 4.1.9 on 2024-10-15 14:22
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("catalogue", "0055_book_parent_book_parent_number_editornote"),
+        ("documents", "0017_rename_catalogue_book"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="book",
+            name="catalogue_book",
+            field=models.ForeignKey(
+                blank=True,
+                null=True,
+                editable=False,
+                on_delete=django.db.models.deletion.PROTECT,
+                related_name="document_books",
+                related_query_name="document_book",
+                to="catalogue.book",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="book",
+            name="dc_slug",
+            field=models.ForeignKey(
+                blank=True,
+                db_constraint=False,
+                editable=False,
+                null=True,
+                on_delete=django.db.models.deletion.DO_NOTHING,
+                related_name="document_books_by_slug",
+                related_query_name="document_book_by_slug",
+                to="catalogue.book",
+                to_field="slug",
+            ),
+        ),
+    ]
diff --git a/src/documents/migrations/0019_fk_uses_id.py b/src/documents/migrations/0019_fk_uses_id.py
new file mode 100644 (file)
index 0000000..ccd562e
--- /dev/null
@@ -0,0 +1,32 @@
+# Generated by Django 4.1.9 on 2024-10-15 14:23
+
+from django.db import migrations
+
+
+def copy_slug_to_fk(apps, schema_editor):
+    cBook = apps.get_model('catalogue', 'Book')
+    dBook = apps.get_model('documents', 'Book')
+
+    for db in dBook.objects.all():
+        try:
+            cb = db.dc_slug
+        except cBook.DoesNotExist:
+            pass
+        else:
+            if cb is not None:
+                db.catalogue_book = cb
+                db.save()
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("documents", "0018_book_catalogue_book_alter_book_catalogue_book_slug"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            copy_slug_to_fk,
+            migrations.RunPython.noop,
+        )
+    ]
diff --git a/src/documents/migrations/0020_alter_book_dc_slug.py b/src/documents/migrations/0020_alter_book_dc_slug.py
new file mode 100644 (file)
index 0000000..47974b0
--- /dev/null
@@ -0,0 +1,20 @@
+# Generated by Django 4.1.9 on 2024-10-15 14:39
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("documents", "0019_fk_uses_id"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="book",
+            name="dc_slug",
+            field=models.CharField(
+                blank=True, editable=False, max_length=2048, null=True
+            ),
+        ),
+    ]
index 9c12085..293d2c4 100644 (file)
@@ -47,13 +47,17 @@ class Book(models.Model):
         db_index=True, on_delete=models.SET_NULL, editable=False)
     dc = models.JSONField(null=True, editable=False)
     cover = models.FileField(blank=True, upload_to='documents/cover')
+
+    dc_slug = models.CharField(
+        max_length=2048,
+        null=True, blank=True,
+        editable=False,
+    )
     catalogue_book = models.ForeignKey(
         'catalogue.Book',
-        models.DO_NOTHING,
-        to_field='slug',
+        models.PROTECT,
         null=True, blank=True,
-        db_constraint=False,
-        editable=False, db_index=True,
+        editable=False,
         related_name='document_books',
         related_query_name='document_book',
     )
@@ -366,13 +370,13 @@ class Book(models.Model):
 
     def refresh_dc_cache(self):
         update = {
-            'catalogue_book_id': None,
+            'dc_slug': None,
             'dc_cover_image': None,
         }
 
         info = self.book_info()
         if info is not None:
-            update['catalogue_book_id'] = info.url.slug
+            update['dc_slug'] = info.url.slug
             if info.cover_source:
                 try:
                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
index b9dd6ab..a9c4d2b 100644 (file)
@@ -16,7 +16,7 @@
     </div>
     <div class="card-body">
       <div class="row">
-       <div class="col-md-8">
+       <div class="col-md-6">
 
 
           {% if editable %}<form method='POST'>{% csrf_token %}{% endif %}
 
 
        </div>
-       <div class="col-md-4">
+       <div class="col-md-6">
           {% with cbook=book.catalogue_book %}
             {% if cbook %}
-             W katalogu:
-             <a href="{% url 'catalogue_book' cbook.slug %}">{{ cbook }}</a>
+              <p>
+               W katalogu:
+               <a href="{% url 'catalogue_book' cbook.slug %}">{{ cbook }}</a>
+              </p>
+
+              {% if cbook.slug != book.dc_slug %}
+                <p class="alert alert-warning">
+                  Slug książki w metadanych tekstu nie zgadza się ze slugiem książki w katalogu.</p>
+                <p>
+                Upewnij się że tekst jest przypisany do właściwego rekordu w katalogu:<br>
+                <tt>{{ book.dc_slug }}</tt> (tekst)<br>
+                <tt>{{ cbook.slug }}</tt> (<a href="/admin/catalogue/book/{{ cbook.pk }}/change/">katalog</a>)<br>
+                </p>
+              {% endif %}
+
+            {% else %}
+              <p class="alert alert-warning">Tekst nie przypisany do rekordu książki w katalogu.</p>
+
+              {% if book.dc_slug %}
+                {% if cbook_by_slug %}
+                  W katalogue znajduje się
+                  <a href="/admin/catalogue/book/{{ cbook_by_slug.id }}/change/">rekord książki</a> o slugu:<br>
+                  {{ book.dc_slug }}<br><br>
+                  {% if request.user.is_staff %}
+                    <form method="POST" action="{% url 'documents_book_attach_to_catalogue' book.id %}">
+                      {% csrf_token %}
+                      <button class="btn btn-primary">Przypisz tekst do tego rekordu w katalogu</button>
+                    </form>
+                  {% endif %}
+                {% else %}
+                  <p>
+                    W katalogu brak rekordu o slugu:<br><tt>{{ book.dc_slug }}</tt>
+                  </p>
+                {% endif %}
+              {% else %}
+                <p>
+                  Ten tekst nie ma slugu określonego w metadanych.
+                </p>
+              {% endif %}
             {% endif %}
           {% endwith %}
        </div>
                         {{ error }}
                       </div>
                     {% endfor %}
+                    {% for info in site.info %}
+                      <div class="alert alert-info">
+                        {{ info }}
+                      </div>
+                    {% endfor %}
                   {% endif %}
                   {% for warning in site.warnings %}
                     <div class="alert alert-warning">
                     id:{{ site.id }}
                   {% endif %}
                   {% with last=site.last %}
-                  {% if last %}
-                    {{ last.created_at }} &rarr;
-                    {{ last.started_at }} &rarr;
-                    {{ last.finished_at }}
-                    <span title="{{ last.error }}">
-                      ({{ last.get_status_display }})
-                    </span>
-                    <!-- {{ site.last.id }} -->
-                  {% endif %}
+                    {% if last %}
+                      {{ last.created_at }} &rarr;
+                      {{ last.started_at }} &rarr;
+                      {{ last.finished_at }}
+                      <span title="{{ last.error }}">
+                        ({{ last.get_status_display }})
+                      </span>
+                      <!-- {{ site.last.id }} -->
+                    {% endif %}
                   {% endwith %}
                 {% endfor %}
               {% endif %}
index e7b68c2..60aa3fa 100644 (file)
@@ -46,6 +46,7 @@ urlpatterns = [
     path('book/<slug:slug>/pdf', views.book_pdf, name="documents_book_pdf"),
     path('book/<slug:slug>/pdf-mobile', views.book_pdf, kwargs={'mobile': True}, name="documents_book_pdf_mobile"),
     path('book/<slug:slug>/synchro', views.synchro, name="documents_book_synchro"),
+    path('book/<int:pk>/attach/', views.attach_book_to_catalogue, name="documents_book_attach_to_catalogue"),
 
     path('chunk_add/<slug:slug>/<slug:chunk>/',
         views.chunk_add, name="documents_chunk_add"),
index 8f40303..97d2ad6 100644 (file)
@@ -35,6 +35,7 @@ from . import helpers
 from .helpers import active_tab
 from .models import (Book, Chunk, Image, BookPublishRecord, 
         ChunkPublishRecord, ImagePublishRecord, Project)
+import catalogue.models
 from fileupload.views import UploadView
 
 #
@@ -237,7 +238,7 @@ def book_xml(request, slug):
 
 @never_cache
 def book_xml_dc(request, slug):
-    book = get_object_or_404(Book, catalogue_book_id=slug)
+    book = get_object_or_404(Book, dc_slug=slug)
     return serve_xml(request, book, slug)
 
 
@@ -409,14 +410,18 @@ def book(request, slug):
         except:
             pass
 
-    if book.catalogue_book_id:
-        audio_items = requests.get(f'https://audio.wolnelektury.pl/archive/book/{book.catalogue_book_id}.json').json()['items']
+    cbook_by_slug = None
+    if book.dc_slug:
+        audio_items = requests.get(f'https://audio.wolnelektury.pl/archive/book/{book.dc_slug}.json').json()['items']
         has_audio = bool(audio_items)
         can_sell_audio = has_audio and all(x['project']['can_sell'] for x in audio_items)
+
+        if book.catalogue_book is None or book.dc_slug != book.catalogue_book.slug:
+            cbook_by_slug = catalogue.models.Book.objects.filter(slug=book.dc_slug).first()
     else:
         has_audio = None
         can_sell_audio = None
-        
+
     return render(request, "documents/book_detail.html", {
         "book": book,
         "doc": doc,
@@ -428,6 +433,7 @@ def book(request, slug):
         "editable": editable,
         "has_audio": has_audio,
         "can_sell_audio": can_sell_audio,
+        "cbook_by_slug": cbook_by_slug,
     })
 
 
@@ -840,3 +846,13 @@ def synchro(request, slug):
         'table': table,
         'error': error,
     })
+
+
+@permission_required('documents.change_book')
+def attach_book_to_catalogue(request, pk):
+    dbook = get_object_or_404(Book, pk=pk)
+    if dbook.dc_slug:
+        cbook = get_object_or_404(catalogue.models.Book, slug=dbook.dc_slug)
+        dbook.catalogue_book = cbook
+        dbook.save()
+    return http.HttpResponseRedirect(dbook.get_absolute_url())
index dd521da..ed2775e 100644 (file)
@@ -130,7 +130,7 @@ class BookSource(models.Model):
         return f'{self.source} -> {self.book}'
 
     def get_absolute_url(self):
-        return reverse('source_book_prepare', args=[self.pk])
+        return reverse('source_book_prepare', args=[self.book.pk])
 
     def get_view_files(self):
         # TODO: won't work for PDFs.
@@ -162,6 +162,7 @@ class BookSource(models.Model):
         if dbook is None:
             dbook = DBook.create(
                 user, texts[0],
+                catalogue_book=book,
                 title=book.title,
                 slug=str(uuid.uuid4()),
             )