Librarian.
[wolnelektury.git] / src / dictionary / models.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from django.db import models, transaction
5 from celery import shared_task
6 from sortify import sortify
7 from celery.utils.log import get_task_logger
8
9 from catalogue.models import Book
10
11 task_logger = get_task_logger(__name__)
12
13
14 class Qualifier(models.Model):
15     qualifier = models.CharField(max_length=128, db_index=True, unique=True)
16     name = models.CharField(max_length=255)
17
18     class Meta:
19         ordering = ['qualifier']
20
21     def __str__(self):
22         return self.name or self.qualifier
23
24
25 class Note(models.Model):
26     """Represents a single annotation from a book."""
27     html = models.TextField()
28     sort_key = models.CharField(max_length=128, db_index=True)
29     fn_type = models.CharField(max_length=10, db_index=True)
30     qualifiers = models.ManyToManyField(Qualifier)
31     language = models.CharField(max_length=10, db_index=True)
32
33     class Meta:
34         ordering = ['sort_key']
35
36
37 class NoteSource(models.Model):
38     """Represents a single annotation from a book."""
39     note = models.ForeignKey(Note, models.CASCADE)
40     book = models.ForeignKey(Book, models.CASCADE)
41     anchor = models.CharField(max_length=64)
42
43     class Meta:
44         ordering = ['book']
45
46
47 @shared_task(ignore_result=True)
48 def build_notes(book):
49     if not book.findable:
50         return
51     task_logger.info(book.slug)
52     with transaction.atomic():
53         book.notesource_set.all().delete()
54         if book.html_file:
55             from librarian import html
56             from librarian.fn_qualifiers import FN_QUALIFIERS
57
58             for anchor, fn_type, qualifiers, text_str, html_str in \
59                     html.extract_annotations(book.html_file.path):
60                 sort_key = sortify(text_str).strip()[:128]
61
62                 language = book.language
63                 notes = Note.objects.filter(sort_key=sort_key, fn_type=fn_type, language=language, html=html_str)
64                 if notes:
65                     note = notes[0]
66                 else:
67                     note = Note.objects.create(
68                         sort_key=sort_key,
69                         html=html_str,
70                         fn_type=fn_type,
71                         language=language
72                         )
73                 qualifier_objects = []
74                 for qualifier in qualifiers:
75                     obj, created = Qualifier.objects.get_or_create(
76                         qualifier=qualifier, defaults={
77                             'name': FN_QUALIFIERS.get(qualifier, '')
78                         })
79                     qualifier_objects.append(obj)
80                 note.qualifiers.set(qualifier_objects)
81                 note.notesource_set.create(book=book, anchor=anchor)
82
83         Note.objects.filter(notesource=None).delete()
84
85
86 def notes_from_book(sender, instance, **kwargs):
87     if instance.findable:
88         build_notes.delay(instance)
89 Book.html_built.connect(notes_from_book)