8472a180b6169506ec932ad702e934f1fc1d6fd7
[redakcja.git] / src / depot / publishers / woblink.py
1 from datetime import date
2 import io
3 import json
4 import re
5 from time import sleep
6 from django.conf import settings
7 from django.utils.html import escape, format_html
8 from django.utils.safestring import mark_safe
9 from librarian.builders.html import SnippetHtmlBuilder
10 from librarian.functions import lang_code_3to2
11 from catalogue.models import Audience, Author, Thema
12 from .. import models
13 from .base import BasePublisher
14 from .woblink_constants import WOBLINK_CATEGORIES
15
16
17 class WoblinkError(ValueError):
18     pass
19
20 class NoPrice(WoblinkError):
21     def as_html(self):
22         return format_html(
23             'Brak <a href="/admin/depot/shop/{price}">określonej ceny</a>.',
24             price=self.args[0].id
25         )
26
27 class NoIsbn(WoblinkError):
28     def as_html(self):
29         return 'Brak ISBN.'
30
31 class AuthorLiteralForeign(WoblinkError):
32     def as_html(self):
33         return format_html(
34             'Nie obsługiwane: autor „{author}” w języku {lang}.',
35             author=str(self.args[0]),
36             lang=self.args[0].lang,
37         )
38
39 class AuthorNotInCatalogue(WoblinkError):
40     def as_html(self):
41         return format_html(
42             'Brak autora „{author}” w katalogu.',
43             author=str(self.args[0])
44         )
45
46 class AuthorNoWoblink(WoblinkError):
47     def as_html(self):
48         return format_html(
49             'Autor <a href="/admin/catalogue/author/{author_id}/">{author}</a> bez identyfikatora Woblink.',
50             author_id=self.args[0].id,
51             author=self.args[0].name
52         )
53
54 class NoThema(WoblinkError):
55     def as_html(self):
56         return format_html('Brak Thema.')
57
58 class UnknownThema(WoblinkError):
59     def as_html(self):
60         return format_html(
61             'Nieznana Thema {code}.',
62             code=self.args[0]
63         )
64
65
66 class ThemaUnknownWoblink(WoblinkError):
67     def as_html(self):
68         return format_html(
69             'Thema <a href="/admin/catalogue/thema/{id}/">{code}</a> przypisana do nieznanej kategorii Woblink.',
70             id=self.args[0].id,
71             code=self.args[0].code,
72         )
73
74 class NoWoblinkCategory(WoblinkError):
75     def as_html(self):
76         return 'Brak kategorii Woblink.'
77
78 class WoblinkWarning(Warning):
79     pass
80
81 class NoMainThemaWarning(WoblinkWarning):
82     def as_html(self):
83         return format_html(
84             'Brak głównej kategorii Thema.'
85         )
86
87 class ThemaNoWoblink(WoblinkWarning):
88     def as_html(self):
89         return format_html(
90             'Thema <a href="/admin/catalogue/thema/{id}/">{code}</a> nie przypisana do kategorii Woblink.',
91             id=self.args[0].id,
92             code=self.args[0].code,
93         )
94
95 class AuthorLiteralForeignWarning(WoblinkWarning):
96     def as_html(self):
97         return format_html(
98             'Nie obsługiwane: autor „{author}” w języku {lang}.',
99             author=str(self.args[0]),
100             lang=self.args[0].lang,
101         )
102
103 class AuthorNotInCatalogueWarning(WoblinkWarning):
104     def as_html(self):
105         return format_html(
106             'Brak autora „{author}” w katalogu.',
107             author=str(self.args[0])
108         )
109
110 class AuthorNoWoblinkWarning(WoblinkWarning):
111     def as_html(self):
112         return format_html(
113             'Autor <a href="/admin/catalogue/author/{author_id}/">{author}</a> bez identyfikatora Woblink.',
114             author_id=self.args[0].id,
115             author=self.args[0].name
116         )
117
118
119
120
121 class Woblink(BasePublisher):
122     BASE_URL = 'https://publisher.woblink.com/'
123     ADD_URL = BASE_URL + 'catalog/add'
124     STEP1_URL = BASE_URL + 'catalog/edit/%s'
125     STEP2_URL = BASE_URL + 'catalog/edit/%s/2'
126     STEP3_URL = BASE_URL + 'catalog/edit/%s/3'
127     UPLOAD_URL = BASE_URL + 'file/upload-%s'
128     JOB_STATUS_URL = BASE_URL + 'task/status'
129     GENERATE_DEMO_URL = BASE_URL + 'task/run/generate-%s-demo/%s/%d'
130     CHECK_DEMO_URL = BASE_URL + 'task/run/check-%s-demo/%s'
131
132     SEARCH_CATALOGUE_URL = BASE_URL + '{category}/autocomplete/{term}'
133
134     ROLE_AUTHOR = 1
135     ROLE_TRANSLATOR = 4
136
137     def login(self):
138         response = self.session.get('https://publisher.woblink.com/login')
139         token = re.search(
140             r'name="_csrf_token" value="([^"]+)"',
141             response.text
142         ).group(1)
143         data = {
144             '_csrf_token': token,
145             '_username': self.username,
146             '_password': self.password,
147         }
148         response = self.session.post(
149             'https://publisher.woblink.com/login_check',
150             data=data,
151         )
152
153     def search_catalogue(self, category, term):
154         return self.session.get(
155             self.SEARCH_CATALOGUE_URL.format(category=category, term=term)
156         ).json()
157
158     def search_author_catalogue(self, term):
159         return [
160             {
161                 'id': item['autId'],
162                 'text': item['autFullname']
163             }
164             for item in self.search_catalogue('author', term)
165         ]
166     def search_series_catalogue(self, term):
167         return [
168             {
169                 'id': item['id'],
170                 'text': item['name']
171             }
172             for item in self.search_catalogue('series', term)
173         ]
174         
175     def get_isbn(self, meta, errors=None):
176         if not meta.isbn_epub:
177             if errors is not None:
178                 errors.append(NoIsbn())
179         return meta.isbn_epub
180
181     def get_authors_data(self, meta, errors=None):
182         authors = []
183         for role, items, obligatory in [
184                 (self.ROLE_AUTHOR, meta.authors, True),
185                 (self.ROLE_TRANSLATOR, meta.translators, False)
186         ]:
187             for person_literal in items:
188                 if person_literal.lang != 'pl':
189                     if errors is not None:
190                         if obligatory:
191                              errors.append(AuthorLiteralForeign(person_literal))
192                         else:
193                             errors.append(AuthorLiteralForeignWarning(person_literal))
194                     continue
195                 aobj = Author.get_by_literal(str(person_literal))
196                 if aobj is None:
197                     if errors is not None:
198                         if obligatory:
199                              errors.append(AuthorNotInCatalogue(person_literal))
200                         else:
201                             errors.append(AuthorNotInCatalogueWarning(person_literal))
202                     continue
203                 if not aobj.woblink:
204                     if errors is not None:
205                         if obligatory:
206                              errors.append(AuthorNoWoblink(aobj))
207                         else:
208                             errors.append(AuthorNoWoblinkWarning(aobj))
209                     continue
210                 authors.append((role, aobj.woblink))
211         return authors
212
213     def get_genres(self, meta, errors=None):
214         thema_codes = []
215         if meta.thema_main:
216             thema_codes.append(meta.thema_main)
217         else:
218             if errors is not None:
219                 errors.append(NoMainThemaWarning())
220         thema_codes.extend(meta.thema)
221         if not thema_codes:
222             if errors is not None:
223                 errors.append(NoThema())
224         category_ids = []
225         for code in thema_codes:
226             try:
227                 thema = Thema.objects.get(code=code)
228             except Thema.DoesNotExist:
229                 if errors is not None:
230                     errors.append(UnknownThema(code))
231             else:
232                 if thema.woblink_category is None:
233                     if errors is not None:
234                         errors.append(ThemaNoWoblink(thema))
235                 elif thema.woblink_category not in WOBLINK_CATEGORIES:
236                     if errors is not None:
237                         errors.append(ThemaUnknownWoblink(thema))
238                 elif thema.woblink_category not in category_ids:
239                     category_ids.append(thema.woblink_category)
240         if not category_ids:
241             if errors is not None:
242                 errors.append(NoWoblinkCategory())
243         return category_ids
244
245     def get_series(self, meta, errors=None):
246         return list(Audience.objects.filter(code__in=meta.audiences).exclude(
247             woblink=None).values_list('woblink', flat=True))
248
249     def get_abstract(self, wldoc, errors=None, description_add=None):
250         description = self.get_description(wldoc, description_add)
251         parts = description.split('\n', 1)
252         if len(parts) == 1 or len(parts[0]) > 240:
253             # No newline found here.
254             # Try to find last sentence end..
255             parts = re.split(r' \.', description[240::-1], 1)
256             if len(parts) == 2:
257                 p1 = parts[1][::-1] + '.'
258                 p2 = description[len(p1) + 1:]
259             else:
260                 # No sentence end found.
261                 # Just find a space.
262                 p1 = description[:240].rsplit(' ', 1)[0]
263                 p2 = description[len(p1) + 1:]
264                 p1 += '…'
265                 p2 = '…' + p2
266             parts = [p1, p2]
267
268         m = re.search(r'<[^>]+$', parts[0])
269         if m is not None:
270             parts[0] = parts[0][:-len(m.group(0))]
271             parts[1] = m.group(0) + parts[1]
272
273         opened = []
274         for tag in re.findall(r'<[^>]*[^/>]>', parts[0]):
275             if tag[1] == '/':
276                 opened.pop()
277             else:
278                 opened.append(tag)
279         for tag in reversed(opened):
280             parts[0] += '</' + tag[1:-1].split()[0] + '>'
281             parts[1] = tag + parts[1]
282         return {
283             'header': parts[0],
284             'rest': parts[1],
285         }
286
287     def get_lang2code(self, meta, errors=None):
288         return lang_code_3to2(meta.language)
289
290     def get_price(self, shop, wldoc, errors=None):
291         try:
292             stats = wldoc.get_statistics()['total']
293         except:
294             if errors:
295                 errors.append(NoPrice(shop))
296             return 0
297         words = stats['words_with_fn']
298         pages = stats['chars_with_fn'] / 1800
299         price = shop.get_price(words, pages)
300         if price is None:
301             if errors:
302                 errors.append(NoPrice(shop))
303             return 0
304
305         return price
306
307     def can_publish(self, shop, book):
308         wldoc = book.wldocument(librarian2=True)
309         d = {
310             'warnings': [],
311             'errors': [],
312         }
313         errors = []
314         book_data = self.get_book_data(shop, wldoc, errors)
315         for error in errors:
316             if not isinstance(error, Warning):
317                 errlist = d['errors']
318             else:
319                 errlist = d['warnings']
320             errlist.append(error.as_html())
321
322         if book_data.get('genres'):
323             d['comment'] = format_html(
324                 'W kategoriach: {cat} ({price} zł)',
325                 cat=', '.join(self.describe_category(g) for g in book_data['genres']),
326                 price=book_data['price']
327             )
328
329         return d
330
331     def describe_category(self, category):
332         t = []
333         while category:
334             c = WOBLINK_CATEGORIES[category]
335             t.append(c['name'])
336             category = c.get('parent')
337         return ' / '.join(reversed(t))
338
339     def create_book(self, isbn):
340         isbn = ''.join(c for c in isbn if c.isdigit())
341         assert len(isbn) == 13
342         response = self.session.post(
343             self.ADD_URL,
344             data={
345                 'AddPublication[pubType]': 'ebook',
346                 'AddPublication[pubHasIsbn]': '1',
347                 'AddPublication[pubIsbn]': isbn,
348                  ##AddPubation[save]
349             }
350         )
351         m = re.search(r'/(\d+)$', response.url)
352         if m is not None:
353             return m.group(1)
354
355     def send_book(self, shop, book, changes=None):
356         wldoc = book.wldocument(librarian2=True, changes=changes, publishable=False) # TODO pub
357         meta = wldoc.meta
358
359         book_data = self.get_book_data(shop, wldoc)
360
361         if not book.woblink_id:
362             #book.woblink_id = 2959868
363             woblink_id = self.create_book(book_data['isbn'])
364             assert woblink_id
365             book.woblink_id = woblink_id
366             book.save(update_fields=['woblink_id'])
367
368         self.edit_step1(book.woblink_id, book_data)
369         self.edit_step2(book.woblink_id, book_data)
370         self.edit_step3(book.woblink_id, book_data)
371         self.send_cover(book.woblink_id, wldoc)
372         texts = shop.get_texts()
373         self.send_epub(
374             book.woblink_id, wldoc, book.gallery_path(),
375             fundraising=texts
376         )
377         self.send_mobi(
378             book.woblink_id, wldoc, book.gallery_path(),
379             fundraising=texts
380         )
381
382     def get_book_data(self, shop, wldoc, errors=None):
383         return {
384             "title": wldoc.meta.title,
385             "isbn": self.get_isbn(wldoc.meta, errors=errors),
386             "authors": self.get_authors_data(wldoc.meta, errors=errors),
387             "abstract": self.get_abstract(
388                 wldoc, errors=errors, description_add=shop.description_add
389             ),
390             "lang2code": self.get_lang2code(wldoc.meta, errors=errors),
391             "genres": self.get_genres(wldoc.meta, errors=errors),
392             "price": self.get_price(shop, wldoc, errors=errors),
393             "series": self.get_series(wldoc.meta, errors=errors),
394         }
395
396     def with_form_name(self, data, name):
397         return {
398             f"{name}[{k}]": v
399             for (k, v) in data.items()
400         }
401
402     def edit_step1(self, woblink_id, book_data):
403         data = book_data
404
405         authors_data = [
406             {
407                 "AhpPubId": woblink_id,
408                 "AhpAutId": author_id,
409                 "AhpType": author_type,
410             }
411             for (author_type, author_id) in data['authors']
412         ]
413
414         series_data = [
415             {
416                 'PublicationId': woblink_id,
417                 'SeriesId': series_id,
418             }
419             for series_id in data['series']
420         ]
421
422         d = {
423             'pubTitle': book_data['title'],
424             'npwAuthorHasPublications': json.dumps(authors_data),
425             'pubShortNote': data['abstract']['header'],
426             'pubNote': data['abstract']['rest'],
427             'pubCulture': data['lang2code'],
428             'npwPublicationHasAwards': '[]',
429             'npwPublicationHasSeriess': json.dumps(series_data),
430         }
431         d = self.with_form_name(d, 'EditPublicationStep1')
432         d['roles'] = [author_type for (author_type, author_id) in data['authors']]
433         r = self.session.post(self.STEP1_URL % woblink_id, data=d)
434         return r
435
436
437     def edit_step2(self, woblink_id, book_data):
438         gd = {}
439         legacy = None
440         for i, g in enumerate(book_data['genres']):
441             gdata = WOBLINK_CATEGORIES[g]
442             if legacy is None:
443                 legacy = gdata.get('legacy')
444             if p := gdata.get('parent'):
445                 gd.setdefault(p, {'isMain': False})
446                 gd[p].setdefault('children', [])
447                 gd[p]['children'].append(str(g))
448                 gd[p].setdefault('mainChild', str(g))
449                 if legacy is None:
450                     legacy = WOBLINK_CATEGORIES[p].get('legacy')
451             else:
452                 gd.setdefault(g, {})
453                 gd[g]['isMain'] = True
454         gd = [
455             {
456                 "pubId": woblink_id,
457                 "category": str(k),
458                 **v
459             }
460             for k, v in gd.items()
461         ]
462
463         data = {
464             'npwPublicationHasNewGenres': json.dumps(gd),
465             'genre': legacy or '',
466         }
467         data = self.with_form_name(data, 'AddPublicationStep2')
468         return self.session.post(self.STEP2_URL % woblink_id, data=data)
469
470     def edit_step3(self, woblink_id, book_data):
471         d = {
472             'pubBasePrice': book_data['price'],
473             'pubPremiereDate': date.today().isoformat(),
474             'pubIsLicenseIndefinite': '1',
475             'pubFileFormat': 'epub+mobi',
476             'pubIsAcs': '0',
477             'pubPublisherIndex': '',
478         }
479         d = self.with_form_name(d, 'EditPublicationStep3')
480         return self.session.post(self.STEP3_URL % woblink_id, data=d)
481
482     def wait_for_job(self, job_id):
483         while True:
484             response = self.session.post(
485                 self.JOB_STATUS_URL,
486                 data={'ids[]': job_id}
487             )
488             data = response.json()[job_id]
489             if data['ready']:
490                 assert data['successful']
491                 return
492             sleep(2)
493
494     def upload_file(self, woblink_id, filename, content, form_name, field_name, mime_type):
495         data = {
496             'pubId': woblink_id,
497         }
498         files = {
499             field_name: (filename, content, mime_type)
500         }
501         response = self.session.post(
502             self.UPLOAD_URL % field_name,
503             data=self.with_form_name(data, form_name),
504             files=self.with_form_name(files, form_name),
505         )
506         resp_data = response.json()
507         assert resp_data['success'] is True
508         if 'jobId' in resp_data:
509             self.wait_for_job(resp_data['jobId'])
510
511     def generate_demo(self, woblink_id, file_format, check=True):
512         percent = 10
513         while True:
514             job_id = self.session.get(
515                 self.GENERATE_DEMO_URL % (file_format, woblink_id, percent),
516             ).json()['jobId']
517             try:
518                 self.wait_for_job(job_id)
519             except AssertionError:
520                 if percent < 50:
521                     percent += 10
522                 else:
523                     raise
524             else:
525                 break
526
527         if check:
528             self.wait_for_job(
529                 self.session.get(
530                     self.CHECK_DEMO_URL % (file_format, woblink_id)
531                 ).json()['jobId']
532             )
533
534     def send_epub(self, woblink_id, doc, gallery_path, fundraising=None):
535         from librarian.builders import EpubBuilder
536         content = EpubBuilder(
537             base_url='file://' + gallery_path + '/',
538             fundraising=fundraising or [],
539         ).build(doc).get_file()
540         self.upload_file(
541             woblink_id,
542             doc.meta.url.slug + '.epub',
543             content,
544             'UploadEpub',
545             'epub',
546             'application/epub+zip'
547         )
548         self.generate_demo(woblink_id, 'epub')
549
550     def send_mobi(self, woblink_id, doc, gallery_path, fundraising=None):
551         from librarian.builders import MobiBuilder
552         content = MobiBuilder(
553             base_url='file://' + gallery_path + '/',
554             fundraising=fundraising or [],
555         ).build(doc).get_file()
556         self.upload_file(
557             woblink_id,
558             doc.meta.url.slug + '.mobi',
559             content,
560             'UploadMobi',
561             'mobi',
562             'application/x-mobipocket-ebook'
563         )
564         self.generate_demo(woblink_id, 'mobi', check=False)
565
566     def send_cover(self, woblink_id, doc):
567         from librarian.cover import make_cover
568         # TODO Labe
569         # A5 @ 300ppi.
570         cover = make_cover(doc.meta, cover_class='m-label', width=1748, height=2480)
571         content = io.BytesIO()
572         cover.final_image().save(content, cover.format)
573         content.seek(0)
574         self.upload_file(
575             woblink_id,
576             doc.meta.url.slug + '.jpeg',
577             content,
578             'UploadCover',
579             'cover',
580             cover.mime_type()
581         )