00dca5ec22eb96aee95803b07fdfdd187452fe3e
[redakcja.git] / src / depot / publishers / woblink.py
1 from datetime import date
2 import io
3 import json
4 import re
5 from time import sleep
6 from django.conf import settings
7 from django.utils.html import escape, format_html
8 from django.utils.safestring import mark_safe
9 from librarian.builders.html import SnippetHtmlBuilder
10 from librarian.functions import lang_code_3to2
11 from catalogue.models import Author, Thema
12 from .. import models
13 from .base import BasePublisher
14 from .woblink_constants import WOBLINK_CATEGORIES
15
16
17 class WoblinkError(ValueError):
18     pass
19
20 class NoPrice(WoblinkError):
21     def as_html(self):
22         return format_html(
23             'Brak <a href="/admin/depot/shop/{price}">określonej ceny</a>.',
24             price=self.args[0].id
25         )
26
27 class NoIsbn(WoblinkError):
28     def as_html(self):
29         return 'Brak ISBN.'
30
31 class AuthorLiteralForeign(WoblinkError):
32     def as_html(self):
33         return format_html(
34             'Nie obsługiwane: autor „{author}” w języku {lang}.',
35             author=str(self.args[0]),
36             lang=self.args[0].lang,
37         )
38
39 class AuthorNotInCatalogue(WoblinkError):
40     def as_html(self):
41         return format_html(
42             'Brak autora „{author}” w katalogu.',
43             author=str(self.args[0])
44         )
45
46 class AuthorNoWoblink(WoblinkError):
47     def as_html(self):
48         return format_html(
49             'Autor <a href="/admin/catalogue/author/{author_id}/">{author}</a> bez identyfikatora Woblink.',
50             author_id=self.args[0].id,
51             author=self.args[0].name
52         )
53
54 class NoThema(WoblinkError):
55     def as_html(self):
56         return format_html('Brak Thema.')
57
58 class UnknownThema(WoblinkError):
59     def as_html(self):
60         return format_html(
61             'Nieznana Thema {code}.',
62             code=self.args[0]
63         )
64
65
66 class ThemaUnknownWoblink(WoblinkError):
67     def as_html(self):
68         return format_html(
69             'Thema <a href="/admin/catalogue/thema/{id}/">{code}</a> przypisana do nieznanej kategorii Woblink.',
70             id=self.args[0].id,
71             code=self.args[0].code,
72         )
73
74 class NoWoblinkCategory(WoblinkError):
75     def as_html(self):
76         return 'Brak kategorii Woblink.'
77
78 class WoblinkWarning(Warning):
79     pass
80
81 class NoMainThemaWarning(WoblinkWarning):
82     def as_html(self):
83         return format_html(
84             'Brak głównej kategorii Thema.'
85         )
86
87 class ThemaNoWoblink(WoblinkWarning):
88     def as_html(self):
89         return format_html(
90             'Thema <a href="/admin/catalogue/thema/{id}/">{code}</a> nie przypisana do kategorii Woblink.',
91             id=self.args[0].id,
92             code=self.args[0].code,
93         )
94
95 class AuthorLiteralForeignWarning(WoblinkWarning):
96     def as_html(self):
97         return format_html(
98             'Nie obsługiwane: autor „{author}” w języku {lang}.',
99             author=str(self.args[0]),
100             lang=self.args[0].lang,
101         )
102
103 class AuthorNotInCatalogueWarning(WoblinkWarning):
104     def as_html(self):
105         return format_html(
106             'Brak autora „{author}” w katalogu.',
107             author=str(self.args[0])
108         )
109
110 class AuthorNoWoblinkWarning(WoblinkWarning):
111     def as_html(self):
112         return format_html(
113             'Autor <a href="/admin/catalogue/author/{author_id}/">{author}</a> bez identyfikatora Woblink.',
114             author_id=self.args[0].id,
115             author=self.args[0].name
116         )
117
118
119
120
121 class Woblink(BasePublisher):
122     BASE_URL = 'https://publisher.woblink.com/'
123     ADD_URL = BASE_URL + 'catalog/add'
124     STEP1_URL = BASE_URL + 'catalog/edit/%s'
125     STEP2_URL = BASE_URL + 'catalog/edit/%s/2'
126     STEP3_URL = BASE_URL + 'catalog/edit/%s/3'
127     UPLOAD_URL = BASE_URL + 'file/upload-%s'
128     JOB_STATUS_URL = BASE_URL + 'task/status'
129     GENERATE_DEMO_URL = BASE_URL + 'task/run/generate-%s-demo/%s/%d'
130     CHECK_DEMO_URL = BASE_URL + 'task/run/check-%s-demo/%s'
131
132     ROLE_AUTHOR = 1
133     ROLE_TRANSLATOR = 4
134
135     def login(self):
136         response = self.session.get('https://publisher.woblink.com/login')
137         token = re.search(
138             r'name="_csrf_token" value="([^"]+)"',
139             response.text
140         ).group(1)
141         data = {
142             '_csrf_token': token,
143             '_username': self.username,
144             '_password': self.password,
145         }
146         response = self.session.post(
147             'https://publisher.woblink.com/login_check',
148             data=data,
149         )
150
151     def get_isbn(self, meta, errors=None):
152         if not meta.isbn_epub:
153             if errors is not None:
154                 errors.append(NoIsbn())
155         return meta.isbn_epub
156
157     def get_authors_data(self, meta, errors=None):
158         authors = []
159         for role, items, obligatory in [
160                 (self.ROLE_AUTHOR, meta.authors, True),
161                 (self.ROLE_TRANSLATOR, meta.translators, False)
162         ]:
163             for person_literal in items:
164                 if person_literal.lang != 'pl':
165                     if errors is not None:
166                         if obligatory:
167                              errors.append(AuthorLiteralForeign(person_literal))
168                         else:
169                             errors.append(AuthorLiteralForeignWarning(person_literal))
170                     continue
171                 aobj = Author.get_by_literal(str(person_literal))
172                 if aobj is None:
173                     if errors is not None:
174                         if obligatory:
175                              errors.append(AuthorNotInCatalogue(person_literal))
176                         else:
177                             errors.append(AuthorNotInCatalogueWarning(person_literal))
178                     continue
179                 if not aobj.woblink:
180                     if errors is not None:
181                         if obligatory:
182                              errors.append(AuthorNoWoblink(aobj))
183                         else:
184                             errors.append(AuthorNoWoblinkWarning(aobj))
185                     continue
186                 authors.append((role, aobj.woblink))
187         return authors
188
189     def get_genres(self, meta, errors=None):
190         thema_codes = []
191         if meta.thema_main:
192             thema_codes.append(meta.thema_main)
193         else:
194             if errors is not None:
195                 errors.append(NoMainThemaWarning())
196         thema_codes.extend(meta.thema)
197         if not thema_codes:
198             if errors is not None:
199                 errors.append(NoThema())
200         category_ids = []
201         for code in thema_codes:
202             try:
203                 thema = Thema.objects.get(code=code)
204             except Thema.DoesNotExist:
205                 if errors is not None:
206                     errors.append(UnknownThema(code))
207             else:
208                 if thema.woblink_category is None:
209                     if errors is not None:
210                         errors.append(ThemaNoWoblink(thema))
211                 elif thema.woblink_category not in WOBLINK_CATEGORIES:
212                     if errors is not None:
213                         errors.append(ThemaUnknownWoblink(thema))
214                 elif thema.woblink_category not in category_ids:
215                     category_ids.append(thema.woblink_category)
216         if not category_ids:
217             if errors is not None:
218                 errors.append(NoWoblinkCategory())
219         return category_ids
220
221     def get_series(self, meta, errors=None):
222         pass
223
224     def get_abstract(self, wldoc, errors=None, description_add=None):
225         description = self.get_description(wldoc, description_add)
226         parts = description.split('\n', 1)
227         if len(parts) == 1 or len(parts[0]) > 200:
228             p1 = description[:200].rsplit(' ', 1)[0]
229             p2 = description[len(p1):]
230             p1 += '…'
231             p2 = '…' + p2
232             parts = [p1, p2]
233
234         m = re.search(r'<[^>]+$', parts[0])
235         if m is not None:
236             parts[0] = parts[:-len(m.group(0))]
237             parts[1] = m.group(0) + parts[1]
238
239         opened = []
240         for tag in re.findall(r'<[^>]+[^/>]>', parts[0]):
241             if tag[1] == '/':
242                 opened.pop()
243             else:
244                 opened.append(tag)
245         for tag in reversed(opened):
246             parts[0] += '</' + tag[1:-1].split()[0] + '>'
247             parts[1] = tag + parts[1]
248         return {
249             'header': parts[0],
250             'rest': parts[1],
251         }
252
253     def get_lang2code(self, meta, errors=None):
254         return lang_code_3to2(meta.language)
255
256     def get_price(self, shop, wldoc, errors=None):
257         stats = wldoc.get_statistics()['total']
258         words = stats['words_with_fn']
259         pages = stats['chars_with_fn'] / 1800
260         price = shop.get_price(words, pages)
261         if price is None:
262             if errors:
263                 errors.append(NoPrice(shop))
264             return 0
265
266         return price
267
268     def can_publish(self, shop, book):
269         wldoc = book.wldocument(librarian2=True)
270         d = {
271             'warnings': [],
272             'errors': [],
273         }
274         errors = []
275         book_data = self.get_book_data(shop, wldoc, errors)
276         for error in errors:
277             if not isinstance(error, Warning):
278                 errlist = d['errors']
279             else:
280                 errlist = d['warnings']
281             errlist.append(error.as_html())
282
283         if book_data.get('genres'):
284             d['comment'] = format_html(
285                 'W kategoriach: {cat} ({price} zł)',
286                 cat=', '.join(self.describe_category(g) for g in book_data['genres']),
287                 price=book_data['price']
288             )
289
290         return d
291
292     def describe_category(self, category):
293         t = []
294         while category:
295             c = WOBLINK_CATEGORIES[category]
296             t.append(c['name'])
297             category = c.get('parent')
298         return ' / '.join(reversed(t))
299
300     def create_book(self, isbn):
301         isbn = ''.join(c for c in isbn if c.isdigit())
302         assert len(isbn) == 13
303         response = self.session.post(
304             self.ADD_URL,
305             data={
306                 'AddPublication[pubType]': 'ebook',
307                 'AddPublication[pubHasIsbn]': '1',
308                 'AddPublication[pubIsbn]': isbn,
309                  ##AddPubation[save]
310             }
311         )
312         m = re.search(r'/(\d+)$', response.url)
313         if m is not None:
314             return m.group(1)
315
316     def send_book(self, shop, book, changes=None):
317         wldoc = book.wldocument(librarian2=True, changes=changes, publishable=False) # TODO pub
318         meta = wldoc.meta
319
320         book_data = self.get_book_data(shop, wldoc)
321
322         if not book.woblink_id:
323             #book.woblink_id = 2959868
324             woblink_id = self.create_book(book_data['isbn'])
325             assert woblink_id
326             book.woblink_id = woblink_id
327             book.save(update_fields=['woblink_id'])
328
329         self.edit_step1(book.woblink_id, book_data)
330         self.edit_step2(book.woblink_id, book_data)
331         self.edit_step3(book.woblink_id, book_data)
332         self.send_cover(book.woblink_id, wldoc)
333         texts = shop.get_texts()
334         self.send_epub(
335             book.woblink_id, wldoc, book.gallery_path(),
336             fundraising=texts
337         )
338         self.send_mobi(
339             book.woblink_id, wldoc, book.gallery_path(),
340             fundraising=texts
341         )
342
343     def get_book_data(self, shop, wldoc, errors=None):
344         return {
345             "title": wldoc.meta.title,
346             "isbn": self.get_isbn(wldoc.meta, errors=errors),
347             "authors": self.get_authors_data(wldoc.meta, errors=errors),
348             "abstract": self.get_abstract(
349                 wldoc, errors=errors, description_add=shop.description_add
350             ),
351             "lang2code": self.get_lang2code(wldoc.meta, errors=errors),
352             "genres": self.get_genres(wldoc.meta, errors=errors),
353             "price": self.get_price(shop, wldoc, errors=errors),
354         }
355
356     def with_form_name(self, data, name):
357         return {
358             f"{name}[{k}]": v
359             for (k, v) in data.items()
360         }
361
362     def edit_step1(self, woblink_id, book_data):
363         data = book_data
364
365         authors_data = [
366             {
367                 "AhpPubId": woblink_id,
368                 "AhpAutId": author_id,
369                 "AhpType": author_type,
370             }
371             for (author_type, author_id) in data['authors']
372         ]
373
374         d = {
375             'pubTitle': book_data['title'],
376             'npwAuthorHasPublications': json.dumps(authors_data),
377             'pubShortNote': data['abstract']['header'],
378             'pubNote': data['abstract']['rest'],
379             'pubCulture': data['lang2code'],
380             'npwPublicationHasAwards': '[]',
381             'npwPublicationHasSeriess': '[]', # TODO
382                 # "[{\"Id\":6153,\"PublicationId\":73876,\"SeriesId\":1615,\"Tome\":null}]"
383         }
384         d = self.with_form_name(d, 'EditPublicationStep1')
385         d['roles'] = [author_type for (author_type, author_id) in data['authors']]
386         r = self.session.post(self.STEP1_URL % woblink_id, data=d)
387         return r
388
389
390     def edit_step2(self, woblink_id, book_data):
391         gd = {}
392         legacy = None
393         for i, g in enumerate(book_data['genres']):
394             gdata = WOBLINK_CATEGORIES[g]
395             if legacy is None:
396                 legacy = gdata.get('legacy')
397             if p := gdata.get('parent'):
398                 gd.setdefault(p, {'isMain': False})
399                 gd[p].setdefault('children', [])
400                 gd[p]['children'].append(str(g))
401                 gd[p].setdefault('mainChild', str(g))
402                 if legacy is None:
403                     legacy = WOBLINK_CATEGORIES[p].get('legacy')
404             else:
405                 gd.setdefault(p, {})
406                 ds[p]['isMain'] = True
407         gd = [
408             {
409                 "pubId": woblink_id,
410                 "category": str(k),
411                 **v
412             }
413             for k, v in gd.items()
414         ]
415
416         data = {
417             'npwPublicationHasNewGenres': json.dumps(gd),
418             'genre': legacy or '',
419         }
420         data = self.with_form_name(data, 'AddPublicationStep2')
421         return self.session.post(self.STEP2_URL % woblink_id, data=data)
422
423     def edit_step3(self, woblink_id, book_data):
424         d = {
425             'pubBasePrice': book_data['price'],
426             'pubPremiereDate': '2023-08-09', #date.today().isoformat(),
427             'pubIsLicenseIndefinite': '1',
428             'pubFileFormat': 'epub+mobi',
429             'pubIsAcs': '0',
430             'pubPublisherIndex': '',
431         }
432         d = self.with_form_name(d, 'EditPublicationStep3')
433         return self.session.post(self.STEP3_URL % woblink_id, data=d)
434
435     def wait_for_job(self, job_id):
436         while True:
437             response = self.session.post(
438                 self.JOB_STATUS_URL,
439                 data={'ids[]': job_id}
440             )
441             data = response.json()[job_id]
442             if data['ready']:
443                 assert data['successful']
444                 return
445             sleep(2)
446
447     def upload_file(self, woblink_id, filename, content, form_name, field_name, mime_type):
448         data = {
449             'pubId': woblink_id,
450         }
451         files = {
452             field_name: (filename, content, mime_type)
453         }
454         response = self.session.post(
455             self.UPLOAD_URL % field_name,
456             data=self.with_form_name(data, form_name),
457             files=self.with_form_name(files, form_name),
458         )
459         resp_data = response.json()
460         assert resp_data['success'] is True
461         if 'jobId' in resp_data:
462             self.wait_for_job(resp_data['jobId'])
463
464     def generate_demo(self, woblink_id, file_format, check=True):
465         percent = 10
466         while True:
467             job_id = self.session.get(
468                 self.GENERATE_DEMO_URL % (file_format, woblink_id, percent),
469             ).json()['jobId']
470             try:
471                 self.wait_for_job(job_id)
472             except AssertionError:
473                 if percent < 50:
474                     percent += 10
475                 else:
476                     raise
477             else:
478                 break
479
480         if check:
481             self.wait_for_job(
482                 self.session.get(
483                     self.CHECK_DEMO_URL % (file_format, woblink_id)
484                 ).json()['jobId']
485             )
486
487     def send_epub(self, woblink_id, doc, gallery_path, fundraising=None):
488         from librarian.builders import EpubBuilder
489         content = EpubBuilder(
490             base_url='file://' + gallery_path + '/',
491             fundraising=fundraising or [],
492         ).build(doc).get_file()
493         self.upload_file(
494             woblink_id,
495             doc.meta.url.slug + '.epub',
496             content,
497             'UploadEpub',
498             'epub',
499             'application/epub+zip'
500         )
501         self.generate_demo(woblink_id, 'epub')
502
503     def send_mobi(self, woblink_id, doc, gallery_path, fundraising=None):
504         from librarian.builders import MobiBuilder
505         content = MobiBuilder(
506             base_url='file://' + gallery_path + '/',
507             fundraising=fundraising or [],
508         ).build(doc).get_file()
509         self.upload_file(
510             woblink_id,
511             doc.meta.url.slug + '.mobi',
512             content,
513             'UploadMobi',
514             'mobi',
515             'application/x-mobipocket-ebook'
516         )
517         self.generate_demo(woblink_id, 'mobi', check=False)
518
519     def send_cover(self, woblink_id, doc):
520         from librarian.cover import make_cover
521         # TODO Labe
522         # A5 @ 300ppi.
523         cover = make_cover(doc.meta, cover_class='m-label', width=1748, height=2480)
524         content = io.BytesIO()
525         cover.final_image().save(content, cover.format)
526         content.seek(0)
527         self.upload_file(
528             woblink_id,
529             doc.meta.url.slug + '.jpeg',
530             content,
531             'UploadCover',
532             'cover',
533             cover.mime_type()
534         )