src/catalogue/utils.py

   1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
   3 #
   4 import hashlib
   5 import os.path
   6 import random
   7 import re
   8 import time
   9 from base64 import urlsafe_b64encode
  10 from collections import defaultdict
  11 from errno import EEXIST, ENOENT
  12 from fcntl import flock, LOCK_EX
  13 from os import mkdir, path, unlink
  14 from urllib.parse import urljoin
  15 from zipfile import ZipFile
  16
  17 from django.apps import apps
  18 from django.conf import settings
  19 from django.core.files.storage import DefaultStorage
  20 from django.core.files.uploadedfile import UploadedFile
  21 from django.http import HttpResponse
  22 from django.utils.encoding import force_str
  23
  24 from reporting.utils import read_chunks
  25
  26 # Use the system (hardware-based) random number generator if it exists.
  27 if hasattr(random, 'SystemRandom'):
  28     randrange = random.SystemRandom().randrange
  29 else:
  30     randrange = random.randrange
  31 MAX_SESSION_KEY = 18446744073709551616     # 2 << 63
  32
  33
  34 def get_random_hash(seed):
  35     sha_digest = hashlib.sha1((
  36         '%s%s%s%s' % (
  37             randrange(0, MAX_SESSION_KEY),
  38             time.time(),
  39             str(seed).encode('utf-8', 'replace'),
  40             settings.SECRET_KEY
  41         )
  42     ).encode('utf-8')).digest()
  43     return urlsafe_b64encode(sha_digest).decode('latin1').replace('=', '').replace('_', '-').lower()
  44
  45
  46 def split_tags(*tag_lists):
  47     if len(tag_lists) == 1:
  48         result = defaultdict(list)
  49         for tag in tag_lists[0]:
  50             result[tag.category].append(tag)
  51     else:
  52         result = defaultdict(dict)
  53         for tag_list in tag_lists:
  54             for tag in tag_list:
  55                 try:
  56                     result[tag.category][tag.pk].count += tag.count
  57                 except KeyError:
  58                     result[tag.category][tag.pk] = tag
  59         for k, v in result.items():
  60             result[k] = sorted(v.values(), key=lambda tag: tag.sort_key)
  61     return result
  62
  63
  64 class ExistingFile(UploadedFile):
  65
  66     def __init__(self, path, *args, **kwargs):
  67         self.path = path
  68         super(ExistingFile, self).__init__(*args, **kwargs)
  69
  70     def temporary_file_path(self):
  71         return self.path
  72
  73     def close(self):
  74         pass
  75
  76
  77 class LockFile(object):
  78     """
  79     A file lock monitor class; createas an ${objname}.lock
  80     file in directory dir, and locks it exclusively.
  81     To be used in 'with' construct.
  82     """
  83     def __init__(self, dir, objname):
  84         self.lockname = path.join(dir, objname + ".lock")
  85
  86     def __enter__(self):
  87         self.lock = open(self.lockname, 'w')
  88         flock(self.lock, LOCK_EX)
  89
  90     def __exit__(self, *err):
  91         try:
  92             unlink(self.lockname)
  93         except OSError as oe:
  94             if oe.errno != ENOENT:
  95                 raise oe
  96         self.lock.close()
  97
  98
  99 # @task
 100 def create_zip(paths, zip_slug, file_contents=None):
 101     """
 102     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
 103     Resulting archive filename is ${zip_slug}.zip
 104     Returns it's path relative to MEDIA_ROOT (no initial slash)
 105     """
 106     # directory to store zip files
 107     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 108
 109     try:
 110         mkdir(zip_path)
 111     except OSError as oe:
 112         if oe.errno != EEXIST:
 113             raise oe
 114     zip_filename = zip_slug + ".zip"
 115
 116     with LockFile(zip_path, zip_slug):
 117         if not path.exists(path.join(zip_path, zip_filename)):
 118             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 119             try:
 120                 for arcname, p in paths:
 121                     if arcname is None:
 122                         arcname = path.basename(p)
 123                     zipf.write(p, arcname)
 124                 if file_contents:
 125                     for arcname, content in file_contents.items():
 126                         zipf.writestr(arcname, content)
 127             finally:
 128                 zipf.close()
 129
 130         return 'zip/' + zip_filename
 131
 132
 133 def remove_zip(zip_slug):
 134     """
 135     removes the ${zip_slug}.zip file from zip store.
 136     """
 137     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 138     try:
 139         unlink(zip_file)
 140     except OSError as oe:
 141         if oe.errno != ENOENT:
 142             raise oe
 143
 144
 145 class AttachmentHttpResponse(HttpResponse):
 146     """Response serving a file to be downloaded.
 147     """
 148     def __init__(self, file_path, file_name, mimetype):
 149         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 150         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 151         self.file_path = file_path
 152         self.file_name = file_name
 153
 154         with open(DefaultStorage().path(self.file_path)) as f:
 155             for chunk in read_chunks(f):
 156                 self.write(chunk)
 157
 158
 159 class MultiQuerySet(object):
 160     def __init__(self, *args, **kwargs):
 161         self.querysets = args
 162         self._count = None
 163
 164     def count(self):
 165         if not self._count:
 166             self._count = sum(len(qs) for qs in self.querysets)
 167         return self._count
 168
 169     def __len__(self):
 170         return self.count()
 171
 172     def __getitem__(self, item):
 173         try:
 174             (offset, stop, step) = item.indices(self.count())
 175         except AttributeError:
 176             # it's not a slice - make it one
 177             return self[item:item + 1][0]
 178         items = []
 179         total_len = stop - offset
 180         for qs in self.querysets:
 181             if len(qs) < offset:
 182                 offset -= len(qs)
 183             else:
 184                 items += list(qs[offset:stop])
 185                 if len(items) >= total_len:
 186                     return items
 187                 else:
 188                     offset = 0
 189                     stop = total_len - len(items)
 190                     continue
 191
 192
 193 def truncate_html_words(s, num, end_text='...'):
 194     """Truncates HTML to a certain number of words (not counting tags and
 195     comments). Closes opened tags if they were correctly closed in the given
 196     html. Takes an optional argument of what should be used to notify that the
 197     string has been truncated, defaulting to ellipsis (...).
 198
 199     Newlines in the HTML are preserved.
 200
 201     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 202     """
 203     s = force_str(s)
 204     length = int(num)
 205     if length <= 0:
 206         return ''
 207     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 208     # Set up regular expressions
 209     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 210     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 211     # Count non-HTML words and keep note of open tags
 212     pos = 0
 213     end_text_pos = 0
 214     words = 0
 215     open_tags = []
 216     while words <= length:
 217         m = re_words.search(s, pos)
 218         if not m:
 219             # Checked through whole string
 220             break
 221         pos = m.end(0)
 222         if m.group(1):
 223             # It's an actual non-HTML word
 224             words += 1
 225             if words == length:
 226                 end_text_pos = pos
 227             continue
 228         # Check for tag
 229         tag = re_tag.match(m.group(0))
 230         if not tag or end_text_pos:
 231             # Don't worry about non tags or tags after our truncate point
 232             continue
 233         closing_tag, tagname, self_closing = tag.groups()
 234         tagname = tagname.lower()  # Element names are always case-insensitive
 235         if self_closing or tagname in html4_singlets:
 236             pass
 237         elif closing_tag:
 238             # Check for match in open tags list
 239             try:
 240                 i = open_tags.index(tagname)
 241             except ValueError:
 242                 pass
 243             else:
 244                 # SGML: An end tag closes, back to the matching start tag,
 245                 # all unclosed intervening start tags with omitted end tags
 246                 open_tags = open_tags[i+1:]
 247         else:
 248             # Add it to the start of the open tags list
 249             open_tags.insert(0, tagname)
 250     if words <= length:
 251         # Don't try to close tags if we don't need to truncate
 252         return s
 253     out = s[:end_text_pos]
 254     if end_text:
 255         out += end_text
 256     # Close any tags still open
 257     for tag in open_tags:
 258         out += '</%s>' % tag
 259     # Return string
 260     return out
 261
 262
 263 def customizations_hash(customizations):
 264     customizations.sort()
 265     return hash(tuple(customizations))
 266
 267
 268 def get_customized_pdf_path(book, customizations):
 269     """
 270     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 271     """
 272     h = customizations_hash(customizations)
 273     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 274
 275
 276 def clear_custom_pdf(book):
 277     """
 278     Returns a list of paths to generated customized pdf of a book
 279     """
 280     from waiter.utils import clear_cache
 281     clear_cache('book/%s' % book.slug)
 282
 283
 284 class AppSettings(object):
 285     """Allows specyfying custom settings for an app, with default values.
 286
 287     Just subclass, set some properties and instantiate with a prefix.
 288     Getting a SETTING from an instance will check for prefix_SETTING
 289     in project settings if set, else take the default. The value will be
 290     then filtered through _more_SETTING method, if there is one.
 291
 292     """
 293     def __init__(self, prefix):
 294         self._prefix = prefix
 295
 296     def __getattribute__(self, name):
 297         if name.startswith('_'):
 298             return object.__getattribute__(self, name)
 299         value = getattr(settings, "%s_%s" % (self._prefix, name), object.__getattribute__(self, name))
 300         more = "_more_%s" % name
 301         if hasattr(self, more):
 302             value = getattr(self, more)(value)
 303         return value
 304
 305
 306 def delete_from_cache_by_language(cache, key_template):
 307     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])
 308
 309
 310 def gallery_path(slug):
 311     return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, slug) + '/'
 312
 313
 314 def gallery_url(slug):
 315     return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, slug)
 316
 317
 318 def absolute_url(url):
 319     Site = apps.get_model('sites', 'Site')
 320     site = Site.objects.get_current()
 321     base_url = '%s://%s' % (
 322         'https' if settings.SESSION_COOKIE_SECURE else 'http',
 323         site.domain
 324     )
 325     return urljoin(base_url, url)
 326
 327
 328 def get_mp3_length(path):
 329     from mutagen.mp3 import MP3
 330     return int(MP3(path).info.length)
 331
 332
 333 def set_file_permissions(self, fieldfile):
 334     if fieldfile.instance.preview:
 335         fieldfile.set_readable(False)
 336