src/catalogue/utils.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 import hashlib
   5 import os.path
   6 import random
   7 import re
   8 import time
   9 from base64 import urlsafe_b64encode
  10 from collections import defaultdict
  11 from errno import EEXIST, ENOENT
  12 from fcntl import flock, LOCK_EX
  13 from os import mkdir, path, unlink
  14 from urllib.parse import urljoin
  15 from zipfile import ZipFile
  16
  17 from django.apps import apps
  18 from django.conf import settings
  19 from django.core.files.storage import DefaultStorage
  20 from django.core.files.uploadedfile import UploadedFile
  21 from django.http import HttpResponse
  22 from django.utils.encoding import force_text
  23
  24 from reporting.utils import read_chunks
  25
  26 # Use the system (hardware-based) random number generator if it exists.
  27 if hasattr(random, 'SystemRandom'):
  28     randrange = random.SystemRandom().randrange
  29 else:
  30     randrange = random.randrange
  31 MAX_SESSION_KEY = 18446744073709551616     # 2 << 63
  32
  33
  34 def get_random_hash(seed):
  35     sha_digest = hashlib.sha1((
  36         '%s%s%s%s' % (
  37             randrange(0, MAX_SESSION_KEY),
  38             time.time(),
  39             str(seed).encode('utf-8', 'replace'),
  40             settings.SECRET_KEY
  41         )
  42     ).encode('utf-8')).digest()
  43     return urlsafe_b64encode(sha_digest).decode('latin1').replace('=', '').replace('_', '-').lower()
  44
  45
  46 def split_tags(*tag_lists):
  47     if len(tag_lists) == 1:
  48         result = defaultdict(list)
  49         for tag in tag_lists[0]:
  50             result[tag.category].append(tag)
  51     else:
  52         result = defaultdict(dict)
  53         for tag_list in tag_lists:
  54             for tag in tag_list:
  55                 try:
  56                     result[tag.category][tag.pk].count += tag.count
  57                 except KeyError:
  58                     result[tag.category][tag.pk] = tag
  59         for k, v in result.items():
  60             result[k] = sorted(v.values(), key=lambda tag: tag.sort_key)
  61     return result
  62
  63
  64 class ExistingFile(UploadedFile):
  65
  66     def __init__(self, path, *args, **kwargs):
  67         self.path = path
  68         super(ExistingFile, self).__init__(*args, **kwargs)
  69
  70     def temporary_file_path(self):
  71         return self.path
  72
  73     def close(self):
  74         pass
  75
  76
  77 class LockFile(object):
  78     """
  79     A file lock monitor class; createas an ${objname}.lock
  80     file in directory dir, and locks it exclusively.
  81     To be used in 'with' construct.
  82     """
  83     def __init__(self, dir, objname):
  84         self.lockname = path.join(dir, objname + ".lock")
  85
  86     def __enter__(self):
  87         self.lock = open(self.lockname, 'w')
  88         flock(self.lock, LOCK_EX)
  89
  90     def __exit__(self, *err):
  91         try:
  92             unlink(self.lockname)
  93         except OSError as oe:
  94             if oe.errno != ENOENT:
  95                 raise oe
  96         self.lock.close()
  97
  98
  99 # @task
 100 def create_zip(paths, zip_slug):
 101     """
 102     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
 103     Resulting archive filename is ${zip_slug}.zip
 104     Returns it's path relative to MEDIA_ROOT (no initial slash)
 105     """
 106     # directory to store zip files
 107     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 108
 109     try:
 110         mkdir(zip_path)
 111     except OSError as oe:
 112         if oe.errno != EEXIST:
 113             raise oe
 114     zip_filename = zip_slug + ".zip"
 115
 116     with LockFile(zip_path, zip_slug):
 117         if not path.exists(path.join(zip_path, zip_filename)):
 118             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 119             try:
 120                 for arcname, p in paths:
 121                     if arcname is None:
 122                         arcname = path.basename(p)
 123                     zipf.write(p, arcname)
 124             finally:
 125                 zipf.close()
 126
 127         return 'zip/' + zip_filename
 128
 129
 130 def remove_zip(zip_slug):
 131     """
 132     removes the ${zip_slug}.zip file from zip store.
 133     """
 134     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 135     try:
 136         unlink(zip_file)
 137     except OSError as oe:
 138         if oe.errno != ENOENT:
 139             raise oe
 140
 141
 142 class AttachmentHttpResponse(HttpResponse):
 143     """Response serving a file to be downloaded.
 144     """
 145     def __init__(self, file_path, file_name, mimetype):
 146         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 147         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 148         self.file_path = file_path
 149         self.file_name = file_name
 150
 151         with open(DefaultStorage().path(self.file_path)) as f:
 152             for chunk in read_chunks(f):
 153                 self.write(chunk)
 154
 155
 156 class MultiQuerySet(object):
 157     def __init__(self, *args, **kwargs):
 158         self.querysets = args
 159         self._count = None
 160
 161     def count(self):
 162         if not self._count:
 163             self._count = sum(len(qs) for qs in self.querysets)
 164         return self._count
 165
 166     def __len__(self):
 167         return self.count()
 168
 169     def __getitem__(self, item):
 170         try:
 171             (offset, stop, step) = item.indices(self.count())
 172         except AttributeError:
 173             # it's not a slice - make it one
 174             return self[item:item + 1][0]
 175         items = []
 176         total_len = stop - offset
 177         for qs in self.querysets:
 178             if len(qs) < offset:
 179                 offset -= len(qs)
 180             else:
 181                 items += list(qs[offset:stop])
 182                 if len(items) >= total_len:
 183                     return items
 184                 else:
 185                     offset = 0
 186                     stop = total_len - len(items)
 187                     continue
 188
 189
 190 def truncate_html_words(s, num, end_text='...'):
 191     """Truncates HTML to a certain number of words (not counting tags and
 192     comments). Closes opened tags if they were correctly closed in the given
 193     html. Takes an optional argument of what should be used to notify that the
 194     string has been truncated, defaulting to ellipsis (...).
 195
 196     Newlines in the HTML are preserved.
 197
 198     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 199     """
 200     s = force_text(s)
 201     length = int(num)
 202     if length <= 0:
 203         return ''
 204     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 205     # Set up regular expressions
 206     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 207     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 208     # Count non-HTML words and keep note of open tags
 209     pos = 0
 210     end_text_pos = 0
 211     words = 0
 212     open_tags = []
 213     while words <= length:
 214         m = re_words.search(s, pos)
 215         if not m:
 216             # Checked through whole string
 217             break
 218         pos = m.end(0)
 219         if m.group(1):
 220             # It's an actual non-HTML word
 221             words += 1
 222             if words == length:
 223                 end_text_pos = pos
 224             continue
 225         # Check for tag
 226         tag = re_tag.match(m.group(0))
 227         if not tag or end_text_pos:
 228             # Don't worry about non tags or tags after our truncate point
 229             continue
 230         closing_tag, tagname, self_closing = tag.groups()
 231         tagname = tagname.lower()  # Element names are always case-insensitive
 232         if self_closing or tagname in html4_singlets:
 233             pass
 234         elif closing_tag:
 235             # Check for match in open tags list
 236             try:
 237                 i = open_tags.index(tagname)
 238             except ValueError:
 239                 pass
 240             else:
 241                 # SGML: An end tag closes, back to the matching start tag,
 242                 # all unclosed intervening start tags with omitted end tags
 243                 open_tags = open_tags[i+1:]
 244         else:
 245             # Add it to the start of the open tags list
 246             open_tags.insert(0, tagname)
 247     if words <= length:
 248         # Don't try to close tags if we don't need to truncate
 249         return s
 250     out = s[:end_text_pos]
 251     if end_text:
 252         out += end_text
 253     # Close any tags still open
 254     for tag in open_tags:
 255         out += '</%s>' % tag
 256     # Return string
 257     return out
 258
 259
 260 def customizations_hash(customizations):
 261     customizations.sort()
 262     return hash(tuple(customizations))
 263
 264
 265 def get_customized_pdf_path(book, customizations):
 266     """
 267     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 268     """
 269     h = customizations_hash(customizations)
 270     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 271
 272
 273 def clear_custom_pdf(book):
 274     """
 275     Returns a list of paths to generated customized pdf of a book
 276     """
 277     from waiter.utils import clear_cache
 278     clear_cache('book/%s' % book.slug)
 279
 280
 281 class AppSettings(object):
 282     """Allows specyfying custom settings for an app, with default values.
 283
 284     Just subclass, set some properties and instantiate with a prefix.
 285     Getting a SETTING from an instance will check for prefix_SETTING
 286     in project settings if set, else take the default. The value will be
 287     then filtered through _more_SETTING method, if there is one.
 288
 289     """
 290     def __init__(self, prefix):
 291         self._prefix = prefix
 292
 293     def __getattribute__(self, name):
 294         if name.startswith('_'):
 295             return object.__getattribute__(self, name)
 296         value = getattr(settings, "%s_%s" % (self._prefix, name), object.__getattribute__(self, name))
 297         more = "_more_%s" % name
 298         if hasattr(self, more):
 299             value = getattr(self, more)(value)
 300         return value
 301
 302
 303 def delete_from_cache_by_language(cache, key_template):
 304     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])
 305
 306
 307 def gallery_path(slug):
 308     return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, slug) + '/'
 309
 310
 311 def gallery_url(slug):
 312     return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, slug)
 313
 314
 315 def absolute_url(url):
 316     Site = apps.get_model('sites', 'Site')
 317     site = Site.objects.get_current()
 318     base_url = '%s://%s' % (
 319         'https' if settings.SESSION_COOKIE_SECURE else 'http',
 320         site.domain
 321     )
 322     return urljoin(base_url, url)
 323
 324
 325 def get_mp3_length(path):
 326     from mutagen.mp3 import MP3
 327     return int(MP3(path).info.length)
 328
 329
 330 def set_file_permissions(self, fieldfile):
 331     if fieldfile.instance.preview:
 332         fieldfile.set_readable(False)
 333