Preliminary source objects.
authorRadek Czajka <rczajka@rczajka.pl>
Tue, 3 Oct 2023 13:31:56 +0000 (15:31 +0200)
committerRadek Czajka <rczajka@rczajka.pl>
Tue, 3 Oct 2023 13:31:56 +0000 (15:31 +0200)
27 files changed:
src/catalogue/admin.py
src/catalogue/models.py
src/catalogue/templates/catalogue/book_detail.html
src/redakcja/settings/__init__.py
src/redakcja/static/js/wiki/view_gallery.js
src/redakcja/static/js/wiki/view_properties.js
src/redakcja/static/js/wiki/wikiapi.js
src/redakcja/urls.py
src/sources/__init__.py [new file with mode: 0644]
src/sources/admin.py [new file with mode: 0644]
src/sources/apps.py [new file with mode: 0644]
src/sources/conversion.py [new file with mode: 0644]
src/sources/document.py [new file with mode: 0644]
src/sources/migrations/0001_initial.py [new file with mode: 0644]
src/sources/migrations/0002_source_wikisource.py [new file with mode: 0644]
src/sources/migrations/0003_source_modified_at_source_processed_at.py [new file with mode: 0644]
src/sources/migrations/__init__.py [new file with mode: 0644]
src/sources/models.py [new file with mode: 0644]
src/sources/ocr.py [new file with mode: 0644]
src/sources/templates/sources/prepare.html [new file with mode: 0644]
src/sources/templates/sources/source_detail.html [new file with mode: 0644]
src/sources/urls.py [new file with mode: 0644]
src/sources/utils.py [new file with mode: 0644]
src/sources/views.py [new file with mode: 0644]
src/wiki/templates/wiki/document_details_base.html
src/wiki/urls.py
src/wiki/views.py

index af08369..a035cc8 100644 (file)
@@ -15,6 +15,7 @@ from fnpdjango.actions import export_as_csv_action
 from modeltranslation.admin import TabbedTranslationAdmin
 from . import models
 import documents.models
 from modeltranslation.admin import TabbedTranslationAdmin
 from . import models
 import documents.models
+import sources.models
 from .wikidata import WikidataAdminMixin
 
 
 from .wikidata import WikidataAdminMixin
 
 
@@ -301,7 +302,13 @@ class FirstPublicationYearFilter(admin.ListFilter):
         ]
 
 
         ]
 
 
+class SourcesInline(admin.TabularInline):
+    model = sources.models.BookSource
+    extra = 1
+
+
 class BookAdmin(WikidataAdminMixin, NumericFilterModelAdmin):
 class BookAdmin(WikidataAdminMixin, NumericFilterModelAdmin):
+    inlines = [SourcesInline]
     list_display = [
         "smart_title",
         "authors_str",
     list_display = [
         "smart_title",
         "authors_str",
index 9074fa8..9768057 100644 (file)
@@ -321,6 +321,12 @@ class Book(WikidataModel):
     def get_absolute_url(self):
         return reverse("catalogue_book", args=[self.slug])
 
     def get_absolute_url(self):
         return reverse("catalogue_book", args=[self.slug])
 
+    def is_text_public(self):
+        return self.free_license or (self.pd_year is not None and self.pd_year <= date.today().year)
+
+    def audio_status(self):
+        return {}
+    
     @property
     def wluri(self):
         return f'https://wolnelektury.pl/katalog/lektura/{self.slug}/'
     @property
     def wluri(self):
         return f'https://wolnelektury.pl/katalog/lektura/{self.slug}/'
@@ -369,6 +375,15 @@ class Book(WikidataModel):
             for work_type in WorkType.objects.all()
         }
 
             for work_type in WorkType.objects.all()
         }
 
+    def scans_gallery(self):
+        bs = self.booksource_set.first()
+        if bs is None:
+            return ''
+        return bs.pk
+
+    def is_published(self):
+        return any(b.is_published() for b in self.document_books.all())
+    
     def update_monthly_stats(self):
         # Find publication date.
         # By default, get previous 12 months.
     def update_monthly_stats(self):
         # Find publication date.
         # By default, get previous 12 months.
index 85cfcc9..3c98787 100644 (file)
 {% block content %}
   <div class="card">
     <div class="card-header">
 {% block content %}
   <div class="card">
     <div class="card-header">
-      <h1>{% trans "Catalogue" %}</h1>
+      <h1>{{ book }}</h1>
     </div>
     <div class="card-body">
       <table class="table">
     </div>
     <div class="card-body">
       <table class="table">
+        {% with is_published=book.is_published %}
+          {% if is_published %}
+            <a href="https://wolnelektury.pl/katalog/lektura/{{ book.slug }}" target="_blank">
+          {% endif %}
+          {% if is_published %}
+            </a>
+          {% endif %}
+        {% endwith %}
+
         {% for author in book.authors.all %}
           <tr>
             <th>
         {% for author in book.authors.all %}
           <tr>
             <th>
             {{ book.get_priorty_display }}
           </td>
           <td>
             {{ book.get_priorty_display }}
           </td>
           <td>
-            {% for b in book.document_books.all %}
-              <a href="{{ b.get_absolute_url }}">
-                {{ b }}
-              </a>
-            {% endfor %}
           </td>
         </tr>
       </table>
     </div>
   </div>
           </td>
         </tr>
       </table>
     </div>
   </div>
+
+  <div class="card mt-4">
+    <div class="card-header">
+      Tekst
+    </div>
+    <div class="card-body">
+      {% for b in book.document_books.all %}
+        <a href="{{ b.get_absolute_url }}">
+          {{ b }}
+        </a>
+      {% empty %}
+        Brak tekstu. Jeśli masz źródło ze skanami, utwórz z niego tekst niżej.
+      {% endfor %}
+    </div>
+  </div>
+
+  <div class="card mt-4">
+    <div class="card-header">
+      Audio
+    </div>
+    <div class="card-body">
+      {% with audio=book.audio_status.items %}
+        {% if audio %}
+          <table class="table">
+          {% for s in audio %}
+            <tr><td>{{ s.part }}</td><td>{{ s.youtube_status }}</td></tr>
+          {% endfor %}
+          </table>
+        {% endif %}
+      {% endwith %}
+      <!-- tbd -->
+    </div>
+  </div>
+
+  <div class="card mt-4">
+    <div class="card-header">
+      Źródło
+    </div>
+    <div class="card-body">
+      {% for bs in book.booksource_set.all %}
+        <a href="{{ bs.source.get_absolute_url }}">
+          {{ bs.source }}
+        </a>
+        {% if bs.page_start or bs.page_end %}
+          ({{ bs.page_start }}—{{ bs.page_end }})
+        {% else %}
+          (całość)
+        {% endif %}
+        <form method="post" action="{% url 'source_book_prepare' bs.pk %}">
+          {% csrf_token %}
+          <button class="btn btn-primary">Utwórz tekst książki</button>
+        </form>
+      {% empty %}
+        Brak źródła.
+        Możesz je dodać.
+      {% endfor %}
+    </div>
+  </div>
+
+
+
 {% endblock content %}
 {% endblock content %}
index 1630f00..f4f494c 100644 (file)
@@ -107,6 +107,7 @@ INSTALLED_APPS = (
     'wlxml.apps.WlxmlConfig',
     'alerts',
     'team',
     'wlxml.apps.WlxmlConfig',
     'alerts',
     'team',
+    'sources',
     'isbn',
 )
 
     'isbn',
 )
 
index 64bb46f..c238ebf 100644 (file)
         }
 
         setPage(newPage) {
         }
 
         setPage(newPage) {
-            newPage = normalizeNumber(newPage, this.doc.galleryImages.length);
+            newPage = normalizeNumber(newPage, this.galleryImages.length);
             this.$numberInput.val(newPage);
             this.config().page = newPage;
             this.$numberInput.val(newPage);
             this.config().page = newPage;
-            $('.gallery-image img', this.$element).attr('src', this.doc.galleryImages[newPage - 1].url);
+            $('.gallery-image img', this.$element).attr('src', this.galleryImages[newPage - 1].url);
         }
 
         alterZoom(delta) {
         }
 
         alterZoom(delta) {
          */
         refreshGallery(success, failure) {
             var self = this;
          */
         refreshGallery(success, failure) {
             var self = this;
-            this.doc.refreshGallery({
-                success: function(doc, data){
+            this.doc.refreshScansGallery({
+                
+                success: function(galleryImages) {
+                    self.galleryImages = galleryImages;
                     self.$image.show();
                     console.log("gconfig:", self.config().page );
                     self.setPage( self.config().page );
                     self.$image.show();
                     console.log("gconfig:", self.config().page );
                     self.setPage( self.config().page );
-                    $('#imagesCount').html("/" + doc.galleryImages.length);
+                    $('#imagesCount').html("/" + galleryImages.length);
 
                     $('.error_message', self.$element).hide();
                     if(success) success();
                 },
 
                     $('.error_message', self.$element).hide();
                     if(success) success();
                 },
-                failure: function(doc, message){
+                failure: function(message) {
                     self.$image.hide();
                     $('.error_message', self.$element).show().html(message);
                     if(failure) failure();
                     self.$image.hide();
                     $('.error_message', self.$element).show().html(message);
                     if(failure) failure();
index 3d30fd3..51f26d5 100644 (file)
                     modal.data('target-input', input);
                     var imglist = modal.find('.modal-body');
                     imglist.html('');
                     modal.data('target-input', input);
                     var imglist = modal.find('.modal-body');
                     imglist.html('');
-                    $.each(self.doc.galleryImages, (i, imgItem) => {
-                        let img = $("<img>").attr("src", imgItem.thumb).attr('title', imgItem.url).data('url', imgItem.url).on('click', function() {
-                            imglist.find('img').removeClass('active');
-                            $(this).addClass('active');
-                        });
-                        imglist.append(img);
+
+                    self.doc.refreshImageGallery({
+                        success: function(galleryImages) {
+                            $.each(self.doc.galleryImages, (i, imgItem) => {
+                                let img = $("<img>").attr("src", imgItem.thumb).attr('title', imgItem.url).data('url', imgItem.url).on('click', function() {
+                                    imglist.find('img').removeClass('active');
+                                    $(this).addClass('active');
+                                });
+                                imglist.append(img);
+                            });
+                        }
                     });
                 })
                 $('#media-chooser .ctrl-ok').on('click', function (event) {
                     });
                 })
                 $('#media-chooser .ctrl-ok').on('click', function (event) {
index 1bf6ebb..14b8337 100644 (file)
@@ -35,6 +35,9 @@
            return base_path + "/history/" + arguments[1] + '/';
        }
 
            return base_path + "/history/" + arguments[1] + '/';
        }
 
+       if (vname == "ajax_document_scans") {
+            return base_path + "/scans/" + arguments[1] + '/';
+       }
        if (vname == "ajax_document_gallery") {
            return base_path + "/gallery/" + arguments[1] + '/';
        }
        if (vname == "ajax_document_gallery") {
            return base_path + "/gallery/" + arguments[1] + '/';
        }
            this.readonly = !!$("*[data-key='readonly']", meta).text();
 
            this.bookSlug = $("*[data-key='book-slug']", meta).text();
            this.readonly = !!$("*[data-key='readonly']", meta).text();
 
            this.bookSlug = $("*[data-key='book-slug']", meta).text();
+           this.scansLink = $("*[data-key='scans']", meta).text();
            this.galleryLink = $("*[data-key='gallery']", meta).text();
             this.galleryStart = parseInt($("*[data-key='gallery-start']", meta).text());
             this.fullUri = $("*[data-key='full-uri']", meta).text();
 
            this.galleryLink = $("*[data-key='gallery']", meta).text();
             this.galleryStart = parseInt($("*[data-key='gallery-start']", meta).text());
             this.fullUri = $("*[data-key='full-uri']", meta).text();
 
-           this.galleryImages = [];
            this.text = null;
            this.has_local_changes = false;
             this.active = true;
            this.text = null;
            this.has_local_changes = false;
             this.active = true;
             });
         }
 
             });
         }
 
+        refreshImageGallery(params) {
+            if (this.galleryLink) {
+                params = $.extend({}, params, {
+                    url: reverse("ajax_document_gallery", this.galleryLink)
+                });
+            }
+            this.refreshGallery(params);
+        }
+
+        refreshScansGallery(params) {
+            if (this.scansLink) {
+                params = $.extend({}, params, {
+                    url: reverse("ajax_document_scans", this.scansLink)
+                });
+                this.refreshGallery(params);
+            } else {
+                // Fallback to image gallery.
+                this.refreshImageGallery(params)
+            }
+        }
+        
         /*
          * Fetch gallery
          */
         refreshGallery(params) {
            params = $.extend({}, noops, params);
            var self = this;
         /*
          * Fetch gallery
          */
         refreshGallery(params) {
            params = $.extend({}, noops, params);
            var self = this;
-           if (!self.galleryLink) {
-               params['failure'](self, 'Brak galerii.');
+            if (!params.url) {
+               params.failure('Brak galerii.');
                return;
                return;
-           }
+            }
            $.ajax({
                method: "GET",
            $.ajax({
                method: "GET",
-               url: reverse("ajax_document_gallery", self.galleryLink),
+               url: params.url,
                dataType: 'json',
                dataType: 'json',
-               // data: {},
                success: function(data) {
                success: function(data) {
-                   self.galleryImages = data;
-                   params['success'](self, data);
+                   params.success(data);
                },
                error: function(xhr) {
                     switch (xhr.status) {
                },
                error: function(xhr) {
                     switch (xhr.status) {
                         var msg = 'Galerie dostępne tylko dla zalogowanych użytkowników.';
                         break;
                     case 404:
                         var msg = 'Galerie dostępne tylko dla zalogowanych użytkowników.';
                         break;
                     case 404:
-                        var msg = "Nie znaleziono galerii o nazwie: '" + self.galleryLink + "'.";
+                        var msg = "Nie znaleziono galerii.";
                     default:
                     default:
-                        var msg = "Nie udało się wczytać galerii o nazwie: '" + self.galleryLink + "'.";
+                        var msg = "Nie udało się wczytać galerii.";
                     }
                     }
-                   self.galleryImages = [];
-                   params['failure'](self, msg);
+                   params.failure(msg);
                }
            });
         }
                }
            });
         }
index 6fb77fb..fc5fac3 100644 (file)
@@ -28,6 +28,7 @@ urlpatterns = [
     path('depot/', include('depot.urls')),
     path('wlxml/', include('wlxml.urls')),
     path('isbn/', include('isbn.urls')),
     path('depot/', include('depot.urls')),
     path('wlxml/', include('wlxml.urls')),
     path('isbn/', include('isbn.urls')),
+    path('sources/', include('sources.urls')),
 
     path('api/', include('redakcja.api.urls')),
 ]
 
     path('api/', include('redakcja.api.urls')),
 ]
diff --git a/src/sources/__init__.py b/src/sources/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/sources/admin.py b/src/sources/admin.py
new file mode 100644 (file)
index 0000000..97e008f
--- /dev/null
@@ -0,0 +1,14 @@
+from django.contrib import admin
+from . import models
+
+
+@admin.register(models.Source)
+class SourceAdmin(admin.ModelAdmin):
+    list_display = ['name', 'modified_at', 'processed_at']
+    exclude = ['wikisource']
+
+    
+@admin.register(models.BookSource)
+class BookSourceAdmin(admin.ModelAdmin):
+    list_display = ['source', 'page_start', 'page_end', 'book']
+    raw_id_fields = ['source', 'book']
diff --git a/src/sources/apps.py b/src/sources/apps.py
new file mode 100644 (file)
index 0000000..a5fe4e4
--- /dev/null
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class SourcesConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "sources"
diff --git a/src/sources/conversion.py b/src/sources/conversion.py
new file mode 100644 (file)
index 0000000..e1a6a9f
--- /dev/null
@@ -0,0 +1,29 @@
+import subprocess
+
+
+def resize_image(source_file, out_dir):
+    subprocess.run([
+        'convert',
+        source_file,
+        '-thumbnail', '1000x1000>',
+        out_dir + '/' + source_file.rsplit('/', 1)[-1]
+    ])
+    
+
+def convert_image(source_file, out_dir):
+    subprocess.run([
+        'convert',
+        source_file,
+        '-resize', '1000x1000',
+        out_dir + '/' + source_file.rsplit('/', 1)[-1] + '.jpg'
+    ])
+
+
+def convert_pdf(source_file, out_dir):
+    # TODO
+    pass
+
+
+def convert_djvu(source_file, view_dir):
+    # TODO
+    pass
diff --git a/src/sources/document.py b/src/sources/document.py
new file mode 100644 (file)
index 0000000..e46ce2f
--- /dev/null
@@ -0,0 +1,74 @@
+import os
+from librarian import RDFNS, DCNS
+from lxml import etree
+from datetime import date
+from . import ocr
+from django.conf import settings
+
+
+def build_document_texts(book_source):
+    texts = []
+    for builder in text_builders:
+        root = etree.Element('utwor')
+        # add meta
+        add_rdf(root, book_source)
+
+        # add master
+        master = etree.SubElement(root, 'powiesc')
+
+        for page in book_source.get_ocr_files():
+            builder(master, page)
+    
+        texts.append(etree.tostring(root, encoding='unicode', pretty_print=True))
+    return texts
+
+
+text_builders = [
+    ocr.add_page_to_master,
+    ocr.add_page_to_master_as_stanzas,
+    ocr.add_page_to_master_as_p,
+]
+
+
+def add_rdf(root, book_source):
+    book = book_source.book
+    
+    # TODO: to librarian
+    rdf = etree.SubElement(root, RDFNS('RDF'))
+    desc = etree.SubElement(rdf, RDFNS('Description'), **{})
+
+    # author
+    for author in book.authors.all():
+        etree.SubElement(desc, DCNS('creator')).text = f'{author.last_name_pl}, {author.first_name_pl}'
+    # translator
+    for tr in book.translators.all():
+        etree.SubElement(desc, DCNS('contributor.translator')).text = f'{tr.last_name_pl}, {tr.first_name_pl}'
+    # title
+    etree.SubElement(desc, DCNS('title')).text = book.title
+    # created_at
+    etree.SubElement(desc, DCNS('date')).text = date.today().isoformat()
+    # date.pd
+    etree.SubElement(desc, DCNS('date.pd')).text = book.pd_year
+    #publisher
+    etree.SubElement(desc, DCNS('publisher')). text = 'Fundacja Wolne Lektury'
+    #language
+    etree.SubElement(desc, DCNS('language')).text = book.language # 3to2?
+    #description
+    #source_name
+    etree.SubElement(desc, DCNS('source')).text = book_source.source.name
+    #url
+    etree.SubElement(desc, DCNS('identifier.url')).text = f'https://wolnelektury.pl/katalog/lektura/{book.slug}/'
+    #license?
+    #license_description?
+    etree.SubElement(desc, DCNS('rights')).text = ''
+    #epochs
+    for tag in book.epochs.all():
+        etree.SubElement(desc, DCNS('subject.period')).text = tag.name
+    #kinds
+    for tag in book.kinds.all():
+        etree.SubElement(desc, DCNS('subject.type')).text = tag.name
+    #genres
+    for tag in book.genres.all():
+        etree.SubElement(desc, DCNS('subject.genre')).text = tag.name
+
+    
diff --git a/src/sources/migrations/0001_initial.py b/src/sources/migrations/0001_initial.py
new file mode 100644 (file)
index 0000000..08f1d4f
--- /dev/null
@@ -0,0 +1,63 @@
+# Generated by Django 4.1.9 on 2023-09-08 14:03
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+        ("catalogue", "0050_audience_woblink"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="Source",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("name", models.CharField(max_length=1024, verbose_name="name")),
+                ("notes", models.TextField(blank=True, help_text="private")),
+            ],
+        ),
+        migrations.CreateModel(
+            name="BookSource",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("page_start", models.IntegerField(blank=True, null=True)),
+                ("page_end", models.IntegerField(blank=True, null=True)),
+                (
+                    "book",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, to="catalogue.book"
+                    ),
+                ),
+                (
+                    "source",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, to="sources.source"
+                    ),
+                ),
+            ],
+            options={
+                "ordering": ("page_start",),
+            },
+        ),
+    ]
diff --git a/src/sources/migrations/0002_source_wikisource.py b/src/sources/migrations/0002_source_wikisource.py
new file mode 100644 (file)
index 0000000..196b2d3
--- /dev/null
@@ -0,0 +1,18 @@
+# Generated by Django 4.1.9 on 2023-09-19 10:44
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("sources", "0001_initial"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="source",
+            name="wikisource",
+            field=models.CharField(blank=True, max_length=1024),
+        ),
+    ]
diff --git a/src/sources/migrations/0003_source_modified_at_source_processed_at.py b/src/sources/migrations/0003_source_modified_at_source_processed_at.py
new file mode 100644 (file)
index 0000000..fbb1c6d
--- /dev/null
@@ -0,0 +1,23 @@
+# Generated by Django 4.1.9 on 2023-10-03 12:13
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("sources", "0002_source_wikisource"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="source",
+            name="modified_at",
+            field=models.DateTimeField(blank=True, null=True),
+        ),
+        migrations.AddField(
+            model_name="source",
+            name="processed_at",
+            field=models.DateTimeField(blank=True, null=True),
+        ),
+    ]
diff --git a/src/sources/migrations/__init__.py b/src/sources/migrations/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/sources/models.py b/src/sources/models.py
new file mode 100644 (file)
index 0000000..0b55c89
--- /dev/null
@@ -0,0 +1,160 @@
+import os
+import subprocess
+import uuid
+from django.apps import apps
+from django.conf import settings
+from django.db import models
+from django.urls import reverse
+from django.utils.timezone import now
+from django.utils.translation import gettext_lazy as _
+from . import conversion
+from . import document
+from . import utils
+
+
+class Source(models.Model):
+    name = models.CharField(_('name'), max_length=1024)
+    notes = models.TextField(blank=True, help_text=_('private'))
+    wikisource = models.CharField(max_length=1024, blank=True)
+    modified_at = models.DateTimeField(null=True, blank=True)
+    processed_at = models.DateTimeField(null=True, blank=True)
+
+    def __str__(self):
+        return self.name
+
+    def get_absolute_url(self):
+        return reverse('source', args=[self.pk])
+
+    def touch(self):
+        self.modified_at = now()
+        self.save(update_fields=['modified_at'])
+    
+    def get_upload_directory(self):
+        return f'sources/upload/{self.pk}/'
+
+    def get_view_directory(self):
+        return f'sources/view/{self.pk}/'
+
+    def get_ocr_directory(self):
+        return f'sources/ocr/{self.pk}/'
+
+    def get_view_files(self):
+        d = self.get_view_directory()
+        return [
+            d + name
+            for name in sorted(os.listdir(
+                    os.path.join(settings.MEDIA_ROOT, d)
+            ))
+        ]
+
+    def get_ocr_files(self):
+        d = os.path.join(settings.MEDIA_ROOT, self.get_ocr_directory())
+        return [
+            d + name
+            for name in sorted(os.listdir(d))
+        ]
+
+    def process(self):
+        updir = os.path.join(
+            settings.MEDIA_ROOT,
+            self.get_upload_directory()
+        )
+        view_dir = os.path.join(
+            settings.MEDIA_ROOT,
+            self.get_view_directory()
+        )
+        ocr_dir = os.path.join(
+            settings.MEDIA_ROOT,
+            self.get_ocr_directory()
+        )
+        with utils.replace_dir(view_dir) as d:
+            self.build_view_directory(updir, d)
+        with utils.replace_dir(ocr_dir) as d:
+            self.build_ocr_directory(updir, d)
+        self.processed_at = now()
+        self.save(update_fields=['processed_at'])
+    
+    def build_view_directory(self, srcpath, targetpath):
+        for source_file_name in os.listdir(srcpath):
+            print(source_file_name)
+            src = os.path.join(srcpath, source_file_name)
+            ext = source_file_name.rsplit('.', 1)[-1].lower()
+            if ext in ('png', 'jpg', 'jpeg'):
+                conversion.resize_image(src, targetpath)
+                # cp?
+                # maybe resize
+            elif ext in ('tiff', 'tif'):
+                conversion.convert_image(src, targetpath)
+            elif ext == 'pdf':
+                conversion.convert_pdf(src, targetpath)
+            elif ext == 'djvu':
+                conversion.convert_djvu(src, targetpath)
+            else:
+                pass
+
+    def build_ocr_directory(self, srcpath, targetpath):
+        for source_file_name in os.listdir(srcpath):
+            print(source_file_name)
+            subprocess.run([
+                'tesseract',
+                os.path.join(srcpath, source_file_name),
+                os.path.join(targetpath, source_file_name),
+                '-l', 'pol'
+            ])
+
+
+class BookSource(models.Model):
+    book = models.ForeignKey('catalogue.Book', models.CASCADE)
+    source = models.ForeignKey(Source, models.CASCADE)
+    page_start = models.IntegerField(null=True, blank=True)
+    page_end = models.IntegerField(null=True, blank=True)
+        
+    class Meta:
+        ordering = ('page_start',)
+
+    def __str__(self):
+        return f'{self.source} -> {self.book}'
+
+    def get_absolute_url(self):
+        return reverse('source_book_prepare', args=[self.pk])
+
+    def get_view_files(self):
+        # TODO: won't work for PDFs.
+        files = self.source.get_view_files()
+        if self.page_end:
+            files = files[:self.page_end]
+        if self.page_start:
+            files = files[self.page_start - 1:]
+        return files
+
+    def get_ocr_files(self):
+        # TODO: won't work for PDFs.
+        files = self.source.get_ocr_files()
+        if self.page_end:
+            files = files[:self.page_end]
+        if self.page_start:
+            files = files[self.page_start - 1:]
+        return files
+
+    def get_document(self):
+        return self.book.document_books.first()
+        
+    def prepare_document(self, user=None):
+        DBook = apps.get_model('documents', 'Book')
+        texts = document.build_document_texts(self)
+
+        dbook = self.get_document()
+        if dbook is None:
+            dbook = DBook.create(
+                user, texts[0],
+                title=self.book.title,
+                slug=str(uuid.uuid4()),
+            )
+        else:
+            dbook[0].commit(text=texts[0], description='OCR', author=user)
+        for text in texts[1:]:
+            dbook[0].commit(text=text, description='OCR', author=user)
+
+        dbook[0].head.set_publishable(True)
+        return dbook
+
diff --git a/src/sources/ocr.py b/src/sources/ocr.py
new file mode 100644 (file)
index 0000000..43b7d1c
--- /dev/null
@@ -0,0 +1,37 @@
+from lxml import etree
+
+
+def add_page_to_master(master, ocr_filename):
+    """ Simplest implementation: just dump text to an akap. """
+    with open(ocr_filename) as f:
+        txt = f.read()
+
+    txt = txt.strip()
+
+    if len(master):
+        master[-1].tail = (master[-1].tail or '') + '\n\n' + txt + '\n\n'
+    else:
+        master.text = (master.text or '') + '\n\n' + txt + '\n\n'
+
+
+def add_page_to_master_as_stanzas(master, ocr_filename):
+    """ Simplest implementation: just dump text to an akap. """
+    with open(ocr_filename) as f:
+        txt = f.read()
+
+    strofa = etree.SubElement(master, 'strofa')
+    strofa.text="\n"
+    for piece in txt.split('\n'):
+        if not piece.strip(): continue
+        strofa.text += piece + '/\n'
+    
+
+def add_page_to_master_as_p(master, ocr_filename):
+    """ Simplest implementation: just dump text to an akap. """
+    with open(ocr_filename) as f:
+        txt = f.read()
+
+    for piece in txt.strip().split('\n\n'):
+        if not piece.strip(): continue
+        p = etree.SubElement(master, 'akap')
+        p.text = piece
diff --git a/src/sources/templates/sources/prepare.html b/src/sources/templates/sources/prepare.html
new file mode 100644 (file)
index 0000000..07a6194
--- /dev/null
@@ -0,0 +1,21 @@
+{% extends "documents/base.html" %}
+
+{% block content %}
+  <h1>{{ book_source.book }} z: {{ book_source.source }}</h1>
+
+  {% with doc=book_source.get_document %}
+    {% if doc %}
+      Tekst:
+      <a href="{{ doc.get_absolute_url }}">
+        {{ doc }}
+      </a>
+      </a>
+    {% endif %}
+  {% endwith %}
+
+  <form method="post">
+    {% csrf_token %}
+    <button class="btn btn-primary">Utwórz tekst</button>
+  </form>
+
+{% endblock %}
diff --git a/src/sources/templates/sources/source_detail.html b/src/sources/templates/sources/source_detail.html
new file mode 100644 (file)
index 0000000..240a2b3
--- /dev/null
@@ -0,0 +1,26 @@
+{% extends 'documents/base.html' %}
+{% load sorl_thumbnail %}
+
+{% block content %}
+  <h1>{{ source }}</h1>
+
+  <p>
+  <a href="{% url 'source_upload' source.pk %}">
+    Skany
+  </a>
+  </p>
+
+  <h2>Przypisania do książek:</h2>
+  {% for bs in source.booksource_set.all %}
+    <div>
+      <a href="{% url 'source_book_prepare' bs.pk %}">
+        {{ bs.book }} {{ bs.page_start }}—{{ bs.page_end }}
+      </a>
+    </div>
+    {% for f in bs.get_files %}
+      {% thumbnail f '200x200' as c %}
+      <a href="{{ MEDIA_URL }}{{ f }}"><img src="{{ c.url }}"></a>
+      {% endthumbnail %}
+    {% endfor %}
+  {% endfor %}
+{% endblock %}
diff --git a/src/sources/urls.py b/src/sources/urls.py
new file mode 100644 (file)
index 0000000..49e2763
--- /dev/null
@@ -0,0 +1,9 @@
+from django.urls import path
+from . import views
+
+
+urlpatterns = [
+    path('source/<int:pk>/', views.SourceView.as_view(), name='source'),
+    path('upload/<int:sid>/', views.SourceUploadView.as_view(), name='source_upload'),
+    path('prepare/<int:bsid>/', views.prepare, name='source_book_prepare'),
+]
diff --git a/src/sources/utils.py b/src/sources/utils.py
new file mode 100644 (file)
index 0000000..1326ea3
--- /dev/null
@@ -0,0 +1,25 @@
+from contextlib import contextmanager
+import os
+import shutil
+from time import time
+
+
+@contextmanager
+def replace_dir(d):
+    # create tmp dir
+    d = d.rstrip('/')
+    ts = int(time())
+    dnew = f'{d}.{ts}.new'
+    dold = f'{d}.{ts}.old'
+    os.makedirs(dnew)
+    try:
+        yield dnew
+    except:
+        shutil.rmtree(dnew)
+        raise
+    else:
+        if os.path.exists(d):
+            shutil.move(d, dold)
+        shutil.move(dnew, d)
+        if os.path.exists(dold):
+            shutil.rmtree(dold)
diff --git a/src/sources/views.py b/src/sources/views.py
new file mode 100644 (file)
index 0000000..a5664f7
--- /dev/null
@@ -0,0 +1,53 @@
+from django.http import HttpResponse
+from django.shortcuts import render, get_object_or_404, redirect
+from django.utils.translation import gettext as _
+from django.views.generic import DetailView
+from fileupload.views import UploadView
+from . import models
+
+
+# TODO 
+class SourceView(DetailView):
+    model = models.Source
+
+
+class SourceUploadView(UploadView):
+    def get_object(self, request, sid):
+        source = get_object_or_404(models.Source, id=sid)
+        return source
+
+    def breadcrumbs(self):
+        return [
+            (_('sources'),),
+            (self.object.name, self.object.get_absolute_url()),
+            (_('upload'),)
+        ]
+
+    def get_directory(self):
+        return self.object.get_upload_directory()
+
+    def form_valid(self, form):
+        response = super().form_valid(form)
+        self.object.touch()
+        return response
+
+    def delete(self, request, *args, **kwargs):
+        response = super().delete(request, *args, **kwargs)
+        self.object.touch()
+        return response
+
+
+def prepare(request, bsid):
+    bs = get_object_or_404(models.BookSource, id=bsid)
+
+    if request.POST:
+        dbook = bs.prepare_document(request.user)
+        return redirect('wiki_editor', dbook.slug, dbook[0].slug)
+    else:
+        return render(
+            request,
+            'sources/prepare.html',
+            {
+                'book_source': bs,
+            }
+        )
index 18290a1..a7009c5 100644 (file)
@@ -26,6 +26,7 @@
        data-chunk-id="{{ chunk.pk }}" style="display:none">
 
     <span data-key="book-slug">{{ chunk.book.slug }}</span>
        data-chunk-id="{{ chunk.pk }}" style="display:none">
 
     <span data-key="book-slug">{{ chunk.book.slug }}</span>
+    <span data-key="scans">{{ chunk.book.catalogue_book.scans_gallery }}</span>
     <span data-key="gallery">{{ chunk.book.gallery }}</span>
     <span data-key="gallery-start">{% if chunk.gallery_start %}{{ chunk.gallery_start }}{% endif %}</span>
     <span data-key="revision">{{ revision }}</span>
     <span data-key="gallery">{{ chunk.book.gallery }}</span>
     <span data-key="gallery-start">{% if chunk.gallery_start %}{{ chunk.gallery_start }}{% endif %}</span>
     <span data-key="revision">{{ revision }}</span>
index 002bdb6..bf7f5b8 100644 (file)
@@ -15,6 +15,7 @@ urlpatterns = [
          views.editor_readonly, name="wiki_editor_readonly"),
 
     path('gallery/<directory>/', views.gallery, name="wiki_gallery"),
          views.editor_readonly, name="wiki_editor_readonly"),
 
     path('gallery/<directory>/', views.gallery, name="wiki_gallery"),
+    path('scans/<int:pk>/', views.scans_list, name="wiki_scans"),
     path('history/<int:chunk_id>/', views.history, name="wiki_history"),
     path('rev/<int:chunk_id>/', views.revision, name="wiki_revision"),
     path('text/<int:chunk_id>/', views.text, name="wiki_text"),
     path('history/<int:chunk_id>/', views.history, name="wiki_history"),
     path('rev/<int:chunk_id>/', views.revision, name="wiki_revision"),
     path('text/<int:chunk_id>/', views.text, name="wiki_text"),
index 062a566..26f031b 100644 (file)
@@ -21,6 +21,7 @@ from django.shortcuts import get_object_or_404, render
 from sorl.thumbnail import get_thumbnail
 
 from documents.models import Book, Chunk
 from sorl.thumbnail import get_thumbnail
 
 from documents.models import Book, Chunk
+import sources.models
 from . import nice_diff
 from wiki import forms
 from wiki.helpers import (JSONResponse, JSONFormInvalid, JSONServerError,
 from . import nice_diff
 from wiki import forms
 from wiki.helpers import (JSONResponse, JSONFormInvalid, JSONServerError,
@@ -250,6 +251,19 @@ def gallery(request, directory):
         raise http.Http404
 
 
         raise http.Http404
 
 
+@never_cache
+def scans_list(request, pk):
+    bs = get_object_or_404(sources.models.BookSource, pk=pk)
+    def map_to_url(filename):
+        return quote(("%s/%s" % (settings.MEDIA_URL, filename)))
+    images = [
+        {
+            "url": map_to_url(f),
+        } for f in bs.get_view_files()
+    ]
+    return JSONResponse(images)
+
+
 @never_cache
 def diff(request, chunk_id):
     revA = int(request.GET.get('from', 0))
 @never_cache
 def diff(request, chunk_id):
     revA = int(request.GET.get('from', 0))