librarian
[wolnelektury.git] / src / dictionary / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models, transaction
6 from celery.task import task
7 from sortify import sortify
8 from celery.utils.log import get_task_logger
9
10 from catalogue.models import Book
11
12 task_logger = get_task_logger(__name__)
13
14
15 class Qualifier(models.Model):
16     qualifier = models.CharField(max_length=128, db_index=True, unique=True)
17     name = models.CharField(max_length=255)
18
19     class Meta:
20         ordering = ['qualifier']
21
22     def __unicode__(self):
23         return self.name or self.qualifier
24
25
26 class Note(models.Model):
27     """Represents a single annotation from a book."""
28     html = models.TextField()
29     sort_key = models.CharField(max_length=128, db_index=True)
30     fn_type = models.CharField(max_length=10, db_index=True)
31     qualifiers = models.ManyToManyField(Qualifier)
32     language = models.CharField(max_length=10, db_index=True)
33
34     class Meta:
35         ordering = ['sort_key']
36
37
38 class NoteSource(models.Model):
39     """Represents a single annotation from a book."""
40     note = models.ForeignKey(Note)
41     book = models.ForeignKey(Book)
42     anchor = models.CharField(max_length=64)
43
44     class Meta:
45         ordering = ['book']
46
47
48 @task(ignore_result=True)
49 def build_notes(book):
50     task_logger.info(book.slug)
51     with transaction.atomic():
52         book.notesource_set.all().delete()
53         if book.html_file:
54             from librarian import html
55             from librarian.fn_qualifiers import FN_QUALIFIERS
56
57             for anchor, fn_type, qualifiers, text_str, html_str in \
58                     html.extract_annotations(book.html_file.path):
59                 sort_key = sortify(text_str).strip()[:128]
60
61                 language = book.language
62                 notes = Note.objects.filter(sort_key=sort_key, fn_type=fn_type, language=language, html=html_str)
63                 if notes:
64                     note = notes[0]
65                 else:
66                     note = Note.objects.create(
67                         sort_key=sort_key,
68                         html=html_str,
69                         fn_type=fn_type,
70                         language=language
71                         )
72
73                 qualifier_objects = []
74                 for qualifier in qualifiers:
75                     obj, created = Qualifier.objects.get_or_create(
76                         qualifier=qualifier, defaults={
77                             'name': FN_QUALIFIERS.get(qualifier, '')
78                         })
79                     qualifier_objects.append(obj)
80                 note.qualifiers = qualifier_objects
81                 note.notesource_set.create(book=book, anchor=anchor)
82
83         Note.objects.filter(notesource=None).delete()
84
85
86 def notes_from_book(sender, instance, **kwargs):
87     build_notes.delay(instance)
88 Book.html_built.connect(notes_from_book)