Obey length limits for wikidata import.
[redakcja.git] / src / depot / publishers / woblink.py
1 from datetime import date
2 import io
3 import json
4 import re
5 from time import sleep
6 from django.conf import settings
7 from django.utils.html import escape, format_html
8 from django.utils.safestring import mark_safe
9 from librarian.builders.html import SnippetHtmlBuilder
10 from librarian.functions import lang_code_3to2
11 from catalogue.models import Audience, Author, Thema
12 from .. import models
13 from .base import BasePublisher
14 from .woblink_constants import WOBLINK_CATEGORIES
15
16
17 class WoblinkError(ValueError):
18     pass
19
20 class NoPrice(WoblinkError):
21     def as_html(self):
22         return format_html(
23             'Brak <a href="/admin/depot/site/{site}">określonej ceny</a>.',
24             site=self.args[0].id
25         )
26
27 class NoIsbn(WoblinkError):
28     def as_html(self):
29         return 'Brak ISBN.'
30
31 class AuthorLiteralForeign(WoblinkError):
32     def as_html(self):
33         return format_html(
34             'Nie obsługiwane: autor „{author}” w języku {lang}.',
35             author=str(self.args[0]),
36             lang=self.args[0].lang,
37         )
38
39 class AuthorNotInCatalogue(WoblinkError):
40     def as_html(self):
41         return format_html(
42             'Brak autora „{author}” w katalogu.',
43             author=str(self.args[0])
44         )
45
46 class AuthorNoWoblink(WoblinkError):
47     def as_html(self):
48         return format_html(
49             'Autor <a href="/admin/catalogue/author/{author_id}/">{author}</a> bez identyfikatora Woblink.',
50             author_id=self.args[0].id,
51             author=self.args[0].name
52         )
53
54 class NoThema(WoblinkError):
55     def as_html(self):
56         return format_html('Brak Thema.')
57
58 class UnknownThema(WoblinkError):
59     def as_html(self):
60         return format_html(
61             'Nieznana Thema {code}.',
62             code=self.args[0]
63         )
64
65
66 class ThemaUnknownWoblink(WoblinkError):
67     def as_html(self):
68         return format_html(
69             'Thema <a href="/admin/catalogue/thema/{id}/">{code}</a> przypisana do nieznanej kategorii Woblink.',
70             id=self.args[0].id,
71             code=self.args[0].code,
72         )
73
74 class NoWoblinkCategory(WoblinkError):
75     def as_html(self):
76         return 'Brak kategorii Woblink.'
77
78 class WoblinkWarning(Warning):
79     pass
80
81 class NoMainThemaWarning(WoblinkWarning):
82     def as_html(self):
83         return format_html(
84             'Brak głównej kategorii Thema.'
85         )
86
87 class ThemaNoWoblink(WoblinkWarning):
88     def as_html(self):
89         return format_html(
90             'Thema <a href="/admin/catalogue/thema/{id}/">{code}</a> nie przypisana do kategorii Woblink.',
91             id=self.args[0].id,
92             code=self.args[0].code,
93         )
94
95 class AuthorLiteralForeignWarning(WoblinkWarning):
96     def as_html(self):
97         return format_html(
98             'Nie obsługiwane: autor „{author}” w języku {lang}.',
99             author=str(self.args[0]),
100             lang=self.args[0].lang,
101         )
102
103 class AuthorNotInCatalogueWarning(WoblinkWarning):
104     def as_html(self):
105         return format_html(
106             'Brak autora „{author}” w katalogu.',
107             author=str(self.args[0])
108         )
109
110 class AuthorNoWoblinkWarning(WoblinkWarning):
111     def as_html(self):
112         return format_html(
113             'Autor <a href="/admin/catalogue/author/{author_id}/">{author}</a> bez identyfikatora Woblink.',
114             author_id=self.args[0].id,
115             author=self.args[0].name
116         )
117
118
119
120
121 class Woblink(BasePublisher):
122     BASE_URL = 'https://publisher.woblink.com/'
123     ADD_URL = BASE_URL + 'catalog/add'
124     STEP1_URL = BASE_URL + 'catalog/edit/%s'
125     STEP2_URL = BASE_URL + 'catalog/edit/%s/2'
126     STEP3_URL = BASE_URL + 'catalog/edit/%s/3'
127     STEP4_URL = BASE_URL + 'catalog/edit/%s/4'
128     STEP5_URL = BASE_URL + 'catalog/edit/%s/5'
129     UPLOAD_URL = BASE_URL + 'file/upload-%s'
130     JOB_STATUS_URL = BASE_URL + 'task/status'
131     GENERATE_DEMO_URL = BASE_URL + 'task/run/generate-%s-demo/%s/%d'
132     CHECK_DEMO_URL = BASE_URL + 'task/run/check-%s-demo/%s'
133
134     SEARCH_CATALOGUE_URL = BASE_URL + '{category}/autocomplete/{term}'
135
136     ROLE_AUTHOR = 1
137     ROLE_TRANSLATOR = 4
138
139     def login(self):
140         response = self.session.get('https://publisher.woblink.com/login')
141         token = re.search(
142             r'name="_csrf_token" value="([^"]+)"',
143             response.text
144         ).group(1)
145         data = {
146             '_csrf_token': token,
147             '_username': self.username,
148             '_password': self.password,
149         }
150         response = self.session.post(
151             'https://publisher.woblink.com/login_check',
152             data=data,
153         )
154
155     def search_catalogue(self, category, term):
156         return self.session.get(
157             self.SEARCH_CATALOGUE_URL.format(category=category, term=term)
158         ).json()
159
160     def search_author_catalogue(self, term):
161         return [
162             {
163                 'id': item['autId'],
164                 'text': item['autFullname']
165             }
166             for item in self.search_catalogue('author', term)
167         ]
168     def search_series_catalogue(self, term):
169         return [
170             {
171                 'id': item['id'],
172                 'text': item['name']
173             }
174             for item in self.search_catalogue('series', term)
175         ]
176         
177     def get_isbn(self, meta, errors=None):
178         if not meta.isbn_epub:
179             if errors is not None:
180                 errors.append(NoIsbn())
181         return meta.isbn_epub
182
183     def get_authors_data(self, meta, errors=None):
184         authors = []
185         for role, items, obligatory in [
186                 (self.ROLE_AUTHOR, meta.authors, True),
187                 (self.ROLE_TRANSLATOR, meta.translators, False)
188         ]:
189             for person_literal in items:
190                 if person_literal is None: continue
191                 if person_literal.lang != 'pl':
192                     if errors is not None:
193                         if obligatory:
194                              errors.append(AuthorLiteralForeign(person_literal))
195                         else:
196                             errors.append(AuthorLiteralForeignWarning(person_literal))
197                     continue
198                 aobj = Author.get_by_literal(str(person_literal))
199                 if aobj is None:
200                     if errors is not None:
201                         if obligatory:
202                              errors.append(AuthorNotInCatalogue(person_literal))
203                         else:
204                             errors.append(AuthorNotInCatalogueWarning(person_literal))
205                     continue
206                 if not aobj.woblink:
207                     if errors is not None:
208                         if obligatory:
209                              errors.append(AuthorNoWoblink(aobj))
210                         else:
211                             errors.append(AuthorNoWoblinkWarning(aobj))
212                     continue
213                 authors.append((role, aobj.woblink))
214         return authors
215
216     def get_genres(self, meta, errors=None):
217         thema_codes = []
218         if meta.thema_main:
219             thema_codes.append(meta.thema_main)
220         else:
221             if errors is not None:
222                 errors.append(NoMainThemaWarning())
223         thema_codes.extend(meta.thema)
224
225         thema_codes.extend(
226             Audience.objects.filter(code__in=meta.audiences).exclude(
227                 thema='').values_list('thema', flat=True)
228         )
229
230         if not thema_codes:
231             if errors is not None:
232                 errors.append(NoThema())
233         category_ids = []
234         for code in thema_codes:
235             try:
236                 thema = Thema.objects.get(code=code)
237             except Thema.DoesNotExist:
238                 if errors is not None:
239                     errors.append(UnknownThema(code))
240             else:
241                 if thema.woblink_category is None:
242                     if errors is not None:
243                         errors.append(ThemaNoWoblink(thema))
244                 elif thema.woblink_category not in WOBLINK_CATEGORIES:
245                     if errors is not None:
246                         errors.append(ThemaUnknownWoblink(thema))
247                 elif thema.woblink_category not in category_ids:
248                     category_ids.append(thema.woblink_category)
249         if not category_ids:
250             if errors is not None:
251                 errors.append(NoWoblinkCategory())
252         return category_ids
253
254     def get_series(self, meta, errors=None):
255         return list(Audience.objects.filter(code__in=meta.audiences).exclude(
256             woblink=None).values_list('woblink', flat=True))
257
258     def get_abstract(self, wldoc, errors=None, description_add=None):
259         description = self.get_description(wldoc, description_add)
260         parts = description.split('\n', 1)
261         if len(parts) == 1 or len(parts[0]) > 240:
262             # No newline found here.
263             # Try to find last sentence end..
264             parts = re.split(r' \.', description[240::-1], 1)
265             if len(parts) == 2:
266                 p1 = parts[1][::-1] + '.'
267                 p2 = description[len(p1) + 1:]
268             else:
269                 # No sentence end found.
270                 # Just find a space.
271                 p1 = description[:240].rsplit(' ', 1)[0]
272                 p2 = description[len(p1) + 1:]
273                 p1 += '…'
274                 p2 = '…' + p2
275             parts = [p1, p2]
276
277         m = re.search(r'<[^>]+$', parts[0])
278         if m is not None:
279             parts[0] = parts[0][:-len(m.group(0))]
280             parts[1] = m.group(0) + parts[1]
281
282         opened = []
283         for tag in re.findall(r'<[^>]*[^/>]>', parts[0]):
284             if tag[1] == '/':
285                 opened.pop()
286             else:
287                 opened.append(tag)
288         for tag in reversed(opened):
289             parts[0] += '</' + tag[1:-1].split()[0] + '>'
290             parts[1] = tag + parts[1]
291         return {
292             'header': parts[0],
293             'rest': parts[1],
294         }
295
296     def get_lang2code(self, meta, errors=None):
297         return lang_code_3to2(meta.language)
298
299     def get_price(self, site, wldoc, errors=None):
300         try:
301             stats = wldoc.get_statistics()['total']
302         except:
303             if errors:
304                 errors.append(NoPrice(site))
305             return 0
306         words = stats['words_with_fn']
307         pages = stats['chars_with_fn'] / 1800
308         price = site.get_price(words, pages)
309         if price is None:
310             if errors:
311                 errors.append(NoPrice(site))
312             return 0
313
314         return price
315
316     def can_publish(self, site, book):
317         d = {
318             'warnings': [],
319             'errors': [],
320             'info': [],
321         }
322         try:
323             wldoc = book.wldocument(librarian2=True)
324         except:
325             d['errors'].append('Nieprawidłowy dokument.')
326             return d
327         errors = []
328         book_data = self.get_book_data(site, wldoc, errors)
329         for error in errors:
330             if not isinstance(error, Warning):
331                 errlist = d['errors']
332             else:
333                 errlist = d['warnings']
334             errlist.append(error.as_html())
335
336         if book_data.get('isbn'):
337             d['info'].append(format_html(
338                 'ISBN: {isbn}',
339                 isbn=book_data['isbn'],
340             ))
341
342         if book_data.get('genres'):
343             d['info'].append(format_html(
344                 'W kategoriach: {cat} ({price} zł)',
345                 cat=', '.join(self.describe_category(g) for g in book_data['genres']),
346                 price=book_data['price'],
347             ))
348         d['info'].append(mark_safe(
349             '<strong>' + book_data['abstract']['header'] +
350             '</strong><br/>' + book_data['abstract']['rest']
351         ))
352
353         return d
354
355     def describe_category(self, category):
356         t = []
357         while category:
358             c = WOBLINK_CATEGORIES[category]
359             t.append(c['name'])
360             category = c.get('parent')
361         return ' / '.join(reversed(t))
362
363     def create_book(self, isbn):
364         isbn = ''.join(c for c in isbn if c.isdigit())
365         assert len(isbn) == 13
366         response = self.session.post(
367             self.ADD_URL,
368             data={
369                 'AddPublication[pubType]': 'ebook',
370                 'AddPublication[pubHasIsbn]': '1',
371                 'AddPublication[pubIsbn]': isbn,
372                  ##AddPubation[save]
373             }
374         )
375         m = re.search(r'/(\d+)$', response.url)
376         if m is not None:
377             return m.group(1)
378
379     def send_book(self, site_book_publish, changes=None):
380         site_book = site_book_publish.site_book
381         book = site_book.book
382         site = site_book.site
383         wldoc = book.wldocument(librarian2=True, changes=changes, publishable=False) # TODO pub
384         meta = wldoc.meta
385
386         book_data = self.get_book_data(site, wldoc)
387
388         if not site_book.external_id:
389             woblink_id = self.create_book(book_data['isbn'])
390             assert woblink_id
391             site_book.external_id = woblink_id
392             site_book.save(update_fields=['external_id'])
393         woblink_id = site_book.external_id
394
395         self.edit_step1(woblink_id, book_data)
396         self.edit_step2(woblink_id, book_data)
397         self.edit_step3(woblink_id, book_data)
398         cover_id = self.send_cover(woblink_id, wldoc)
399
400         texts = site.get_texts()
401         epub_id, epub_demo = self.send_epub(
402             woblink_id, wldoc, book.gallery_path(),
403             fundraising=texts
404         )
405         mobi_id, mobi_demo = self.send_mobi(
406             woblink_id, wldoc, book.gallery_path(),
407             fundraising=texts
408         )
409         self.edit_step4(
410             woblink_id, book_data,
411             cover_id, epub_id, epub_demo, mobi_id, mobi_demo,
412         )
413         self.edit_step5(woblink_id, book_data)
414
415     def get_book_data(self, site, wldoc, errors=None):
416         return {
417             "title": wldoc.meta.title,
418             "isbn": self.get_isbn(wldoc.meta, errors=errors),
419             "authors": self.get_authors_data(wldoc.meta, errors=errors),
420             "abstract": self.get_abstract(
421                 wldoc, errors=errors, description_add=site.description_add
422             ),
423             "lang2code": self.get_lang2code(wldoc.meta, errors=errors),
424             "genres": self.get_genres(wldoc.meta, errors=errors),
425             "price": self.get_price(site, wldoc, errors=errors),
426             "series": self.get_series(wldoc.meta, errors=errors),
427         }
428
429     def with_form_name(self, data, name):
430         return {
431             f"{name}[{k}]": v
432             for (k, v) in data.items()
433         }
434
435     def edit_step1(self, woblink_id, book_data):
436         data = book_data
437
438         authors_data = [
439             {
440                 "AhpPubId": woblink_id,
441                 "AhpAutId": author_id,
442                 "AhpType": author_type,
443             }
444             for (author_type, author_id) in data['authors']
445         ]
446
447         series_data = [
448             {
449                 'PublicationId': woblink_id,
450                 'SeriesId': series_id,
451             }
452             for series_id in data['series']
453         ]
454
455         d = {
456             'pubTitle': book_data['title'],
457             'npwAuthorHasPublications': json.dumps(authors_data),
458             'pubShortNote': data['abstract']['header'],
459             'pubNote': data['abstract']['rest'],
460             'pubCulture': data['lang2code'],
461             'npwPublicationHasAwards': '[]',
462             'npwPublicationHasSeriess': json.dumps(series_data),
463         }
464         d = self.with_form_name(d, 'EditPublicationStep1')
465         d['roles'] = [author_type for (author_type, author_id) in data['authors']]
466         r = self.session.post(self.STEP1_URL % woblink_id, data=d)
467         return r
468
469
470     def edit_step2(self, woblink_id, book_data):
471         gd = {}
472         legacy = None
473         for i, g in enumerate(book_data['genres']):
474             gdata = WOBLINK_CATEGORIES[g]
475             if legacy is None:
476                 legacy = gdata.get('legacy')
477             if p := gdata.get('parent'):
478                 gd.setdefault(p, {'isMain': False})
479                 gd[p].setdefault('children', [])
480                 gd[p]['children'].append(str(g))
481                 gd[p].setdefault('mainChild', str(g))
482                 if legacy is None:
483                     legacy = WOBLINK_CATEGORIES[p].get('legacy')
484             else:
485                 gd.setdefault(g, {})
486                 gd[g]['isMain'] = True
487         gd = [
488             {
489                 "pubId": woblink_id,
490                 "category": str(k),
491                 **v
492             }
493             for k, v in gd.items()
494         ]
495
496         data = {
497             'npwPublicationHasNewGenres': json.dumps(gd),
498             'genre': legacy or '',
499         }
500         data = self.with_form_name(data, 'AddPublicationStep2')
501         return self.session.post(self.STEP2_URL % woblink_id, data=data)
502
503     def edit_step3(self, woblink_id, book_data):
504         d = {
505             'pubBasePrice': book_data['price'],
506             'pubPremiereDate': date.today().isoformat(),
507             'pubIsLicenseIndefinite': '1',
508             'pubFileFormat': 'epub+mobi',
509             'pubIsAcs': '0',
510             'pubPublisherIndex': '',
511             'save_and_continue': '',
512         }
513         d = self.with_form_name(d, 'EditPublicationStep3')
514         return self.session.post(self.STEP3_URL % woblink_id, data=d)
515
516     def edit_step4(self, woblink_id, book_data, cover_id, epub_id, epub_demo, mobi_id, mobi_demo):
517         d = {
518             'pubCoverResId': cover_id,
519             'pubEpubResId': epub_id,
520             'pubEpubDemoResId': epub_demo,
521             'pubMobiResId': mobi_id,
522             'pubMobiDemoResId': mobi_demo,
523             'pubFileFormat': 'epub+mobi',
524             'pubId': woblink_id,
525             'save_and_continue': '',
526         }
527         d = self.with_form_name(d, 'EditPublicationStep4')
528         return self.session.post(self.STEP4_URL % woblink_id, data=d)
529
530     def edit_step5(self, woblink_id, book_data):
531         d = {'save': ''}
532         d = self.with_form_name(d, 'EditPublicationStep5')
533         return self.session.post(self.STEP5_URL % woblink_id, data=d)
534
535     def wait_for_job(self, job_id):
536         while True:
537             response = self.session.post(
538                 self.JOB_STATUS_URL,
539                 data={'ids[]': job_id}
540             )
541             data = response.json()[job_id]
542             if data['ready']:
543                 assert data['successful']
544                 return data.get('returnValue')
545             sleep(2)
546
547     def upload_file(self, woblink_id, filename, content, field_name, mime_type):
548         form_name = f'Upload{field_name}'
549         id_field = f'pub{field_name}ResId'
550         field_name = field_name.lower()
551
552         data = {
553             'pubId': woblink_id,
554         }
555         files = {
556             field_name: (filename, content, mime_type)
557         }
558         
559         response = self.session.post(
560             self.UPLOAD_URL % field_name,
561             data=self.with_form_name(data, form_name),
562             files=self.with_form_name(files, form_name),
563         )
564         resp_data = response.json()
565         assert resp_data['success'] is True
566         file_id = resp_data[id_field]
567         if 'jobId' in resp_data:
568             self.wait_for_job(resp_data['jobId'])
569         return file_id
570
571     def generate_demo(self, woblink_id, file_format, check=True):
572         percent = 10
573         while True:
574             job_id = self.session.get(
575                 self.GENERATE_DEMO_URL % (file_format, woblink_id, percent),
576             ).json()['jobId']
577             try:
578                 file_id = self.wait_for_job(job_id)
579                 if check:
580                     self.wait_for_job(
581                         self.session.get(
582                             self.CHECK_DEMO_URL % (file_format, woblink_id)
583                         ).json()['jobId']
584                     )
585             except AssertionError:
586                 if percent < 50:
587                     percent += 10
588                 else:
589                     raise
590             else:
591                 break
592
593         return file_id
594
595     def send_epub(self, woblink_id, doc, gallery_path, fundraising=None):
596         from librarian.builders import EpubBuilder
597         content = EpubBuilder(
598             base_url='file://' + gallery_path + '/',
599             fundraising=fundraising or [],
600         ).build(doc).get_file()
601         file_id = self.upload_file(
602             woblink_id,
603             doc.meta.url.slug + '.epub',
604             content,
605             'Epub',
606             'application/epub+zip'
607         )
608         demo_id = self.generate_demo(woblink_id, 'epub')
609         return file_id, demo_id
610
611     def send_mobi(self, woblink_id, doc, gallery_path, fundraising=None):
612         from librarian.builders import MobiBuilder
613         content = MobiBuilder(
614             base_url='file://' + gallery_path + '/',
615             fundraising=fundraising or [],
616         ).build(doc).get_file()
617         file_id = self.upload_file(
618             woblink_id,
619             doc.meta.url.slug + '.mobi',
620             content,
621             'Mobi',
622             'application/x-mobipocket-ebook'
623         )
624         demo_id = self.generate_demo(woblink_id, 'mobi', check=False)
625         return file_id, demo_id
626
627     def send_cover(self, woblink_id, doc):
628         from librarian.cover import make_cover
629         # TODO Labe
630         # A5 @ 300ppi.
631         cover = make_cover(doc.meta, cover_class='m-label', width=1748, height=2480)
632         content = io.BytesIO()
633         cover.final_image().save(content, cover.format)
634         content.seek(0)
635         file_id = self.upload_file(
636             woblink_id,
637             doc.meta.url.slug + '.jpeg',
638             content,
639             'Cover',
640             cover.mime_type()
641         )
642         return file_id