Remove ssify.
[wolnelektury.git] / src / dictionary / models.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from django.db import models, transaction
5 from celery.task import task
6 from sortify import sortify
7 from celery.utils.log import get_task_logger
8
9 from catalogue.models import Book
10
11 task_logger = get_task_logger(__name__)
12
13
14 class Qualifier(models.Model):
15     qualifier = models.CharField(max_length=128, db_index=True, unique=True)
16     name = models.CharField(max_length=255)
17
18     class Meta:
19         ordering = ['qualifier']
20
21     def __str__(self):
22         return self.name or self.qualifier
23
24
25 class Note(models.Model):
26     """Represents a single annotation from a book."""
27     html = models.TextField()
28     sort_key = models.CharField(max_length=128, db_index=True)
29     fn_type = models.CharField(max_length=10, db_index=True)
30     qualifiers = models.ManyToManyField(Qualifier)
31     language = models.CharField(max_length=10, db_index=True)
32
33     class Meta:
34         ordering = ['sort_key']
35
36
37 class NoteSource(models.Model):
38     """Represents a single annotation from a book."""
39     note = models.ForeignKey(Note, models.CASCADE)
40     book = models.ForeignKey(Book, models.CASCADE)
41     anchor = models.CharField(max_length=64)
42
43     class Meta:
44         ordering = ['book']
45
46
47 @task(ignore_result=True)
48 def build_notes(book):
49     task_logger.info(book.slug)
50     with transaction.atomic():
51         book.notesource_set.all().delete()
52         if book.html_file:
53             from librarian import html
54             from librarian.fn_qualifiers import FN_QUALIFIERS
55
56             for anchor, fn_type, qualifiers, text_str, html_str in \
57                     html.extract_annotations(book.html_file.path):
58                 sort_key = sortify(text_str).strip()[:128]
59
60                 language = book.language
61                 notes = Note.objects.filter(sort_key=sort_key, fn_type=fn_type, language=language, html=html_str)
62                 if notes:
63                     note = notes[0]
64                 else:
65                     note = Note.objects.create(
66                         sort_key=sort_key,
67                         html=html_str,
68                         fn_type=fn_type,
69                         language=language
70                         )
71
72                 qualifier_objects = []
73                 for qualifier in qualifiers:
74                     obj, created = Qualifier.objects.get_or_create(
75                         qualifier=qualifier, defaults={
76                             'name': FN_QUALIFIERS.get(qualifier, '')
77                         })
78                     qualifier_objects.append(obj)
79                 note.qualifiers = qualifier_objects
80                 note.notesource_set.create(book=book, anchor=anchor)
81
82         Note.objects.filter(notesource=None).delete()
83
84
85 def notes_from_book(sender, instance, **kwargs):
86     build_notes.delay(instance)
87 Book.html_built.connect(notes_from_book)