src/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import hashlib
   6 import os.path
   7 import random
   8 import re
   9 import time
  10 from base64 import urlsafe_b64encode
  11 from collections import defaultdict
  12 from errno import EEXIST, ENOENT
  13 from fcntl import flock, LOCK_EX
  14 from os import mkdir, path, unlink
  15 from zipfile import ZipFile
  16
  17 from django.conf import settings
  18 from django.core.files.storage import DefaultStorage
  19 from django.core.files.uploadedfile import UploadedFile
  20 from django.http import HttpResponse
  21 from django.utils.encoding import force_text
  22
  23 from reporting.utils import read_chunks
  24
  25 # Use the system (hardware-based) random number generator if it exists.
  26 if hasattr(random, 'SystemRandom'):
  27     randrange = random.SystemRandom().randrange
  28 else:
  29     randrange = random.randrange
  30 MAX_SESSION_KEY = 18446744073709551616     # 2 << 63
  31
  32
  33 def get_random_hash(seed):
  34     sha_digest = hashlib.sha1((
  35         '%s%s%s%s' % (
  36             randrange(0, MAX_SESSION_KEY),
  37             time.time(),
  38             str(seed).encode('utf-8', 'replace'),
  39             settings.SECRET_KEY
  40         )
  41     ).encode('utf-8')).digest()
  42     return urlsafe_b64encode(sha_digest).decode('latin1').replace('=', '').replace('_', '-').lower()
  43
  44
  45 def split_tags(*tag_lists):
  46     if len(tag_lists) == 1:
  47         result = defaultdict(list)
  48         for tag in tag_lists[0]:
  49             result[tag.category].append(tag)
  50     else:
  51         result = defaultdict(dict)
  52         for tag_list in tag_lists:
  53             for tag in tag_list:
  54                 try:
  55                     result[tag.category][tag.pk].count += tag.count
  56                 except KeyError:
  57                     result[tag.category][tag.pk] = tag
  58         for k, v in result.items():
  59             result[k] = sorted(v.values(), key=lambda tag: tag.sort_key)
  60     return result
  61
  62
  63 class ExistingFile(UploadedFile):
  64
  65     def __init__(self, path, *args, **kwargs):
  66         self.path = path
  67         super(ExistingFile, self).__init__(*args, **kwargs)
  68
  69     def temporary_file_path(self):
  70         return self.path
  71
  72     def close(self):
  73         pass
  74
  75
  76 class LockFile(object):
  77     """
  78     A file lock monitor class; createas an ${objname}.lock
  79     file in directory dir, and locks it exclusively.
  80     To be used in 'with' construct.
  81     """
  82     def __init__(self, dir, objname):
  83         self.lockname = path.join(dir, objname + ".lock")
  84
  85     def __enter__(self):
  86         self.lock = open(self.lockname, 'w')
  87         flock(self.lock, LOCK_EX)
  88
  89     def __exit__(self, *err):
  90         try:
  91             unlink(self.lockname)
  92         except OSError as oe:
  93             if oe.errno != ENOENT:
  94                 raise oe
  95         self.lock.close()
  96
  97
  98 # @task
  99 def create_zip(paths, zip_slug):
 100     """
 101     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
 102     Resulting archive filename is ${zip_slug}.zip
 103     Returns it's path relative to MEDIA_ROOT (no initial slash)
 104     """
 105     # directory to store zip files
 106     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 107
 108     try:
 109         mkdir(zip_path)
 110     except OSError as oe:
 111         if oe.errno != EEXIST:
 112             raise oe
 113     zip_filename = zip_slug + ".zip"
 114
 115     with LockFile(zip_path, zip_slug):
 116         if not path.exists(path.join(zip_path, zip_filename)):
 117             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 118             try:
 119                 for arcname, p in paths:
 120                     if arcname is None:
 121                         arcname = path.basename(p)
 122                     zipf.write(p, arcname)
 123             finally:
 124                 zipf.close()
 125
 126         return 'zip/' + zip_filename
 127
 128
 129 def remove_zip(zip_slug):
 130     """
 131     removes the ${zip_slug}.zip file from zip store.
 132     """
 133     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 134     try:
 135         unlink(zip_file)
 136     except OSError as oe:
 137         if oe.errno != ENOENT:
 138             raise oe
 139
 140
 141 class AttachmentHttpResponse(HttpResponse):
 142     """Response serving a file to be downloaded.
 143     """
 144     def __init__(self, file_path, file_name, mimetype):
 145         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 146         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 147         self.file_path = file_path
 148         self.file_name = file_name
 149
 150         with open(DefaultStorage().path(self.file_path)) as f:
 151             for chunk in read_chunks(f):
 152                 self.write(chunk)
 153
 154
 155 class MultiQuerySet(object):
 156     def __init__(self, *args, **kwargs):
 157         self.querysets = args
 158         self._count = None
 159
 160     def count(self):
 161         if not self._count:
 162             self._count = sum(len(qs) for qs in self.querysets)
 163         return self._count
 164
 165     def __len__(self):
 166         return self.count()
 167
 168     def __getitem__(self, item):
 169         try:
 170             (offset, stop, step) = item.indices(self.count())
 171         except AttributeError:
 172             # it's not a slice - make it one
 173             return self[item:item + 1][0]
 174         items = []
 175         total_len = stop - offset
 176         for qs in self.querysets:
 177             if len(qs) < offset:
 178                 offset -= len(qs)
 179             else:
 180                 items += list(qs[offset:stop])
 181                 if len(items) >= total_len:
 182                     return items
 183                 else:
 184                     offset = 0
 185                     stop = total_len - len(items)
 186                     continue
 187
 188
 189 class SortedMultiQuerySet(MultiQuerySet):
 190     def __init__(self, *args, **kwargs):
 191         self.order_by = kwargs.pop('order_by', None)
 192         self.sortfn = kwargs.pop('sortfn', None)
 193         if self.order_by is not None:
 194             self.sortfn = lambda a, b: cmp((getattr(a, f) for f in self.order_by),
 195                                            (getattr(b, f) for f in self.order_by))
 196         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 197
 198     def __getitem__(self, item):
 199         sort_heads = [0] * len(self.querysets)
 200         try:
 201             (offset, stop, step) = item.indices(self.count())
 202         except AttributeError:
 203             # it's not a slice - make it one
 204             return self[item:item + 1][0]
 205         items = []
 206         total_len = stop - offset
 207         skipped = 0
 208         i_s = range(len(sort_heads))
 209
 210         while len(items) < total_len:
 211             candidate = None
 212             candidate_i = None
 213             for i in i_s:
 214                 def get_next():
 215                     return self.querysets[i][sort_heads[i]]
 216                 try:
 217                     if candidate is None:
 218                         candidate = get_next()
 219                         candidate_i = i
 220                     else:
 221                         competitor = get_next()
 222                         if self.sortfn(candidate, competitor) > 0:
 223                             candidate = competitor
 224                             candidate_i = i
 225                 except IndexError:
 226                     continue  # continue next sort_head
 227             # we have no more elements:
 228             if candidate is None:
 229                 break
 230             sort_heads[candidate_i] += 1
 231             if skipped < offset:
 232                 skipped += 1
 233                 continue  # continue next item
 234             items.append(candidate)
 235
 236         return items
 237
 238
 239 def truncate_html_words(s, num, end_text='...'):
 240     """Truncates HTML to a certain number of words (not counting tags and
 241     comments). Closes opened tags if they were correctly closed in the given
 242     html. Takes an optional argument of what should be used to notify that the
 243     string has been truncated, defaulting to ellipsis (...).
 244
 245     Newlines in the HTML are preserved.
 246
 247     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 248     """
 249     s = force_text(s)
 250     length = int(num)
 251     if length <= 0:
 252         return u''
 253     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 254     # Set up regular expressions
 255     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 256     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 257     # Count non-HTML words and keep note of open tags
 258     pos = 0
 259     end_text_pos = 0
 260     words = 0
 261     open_tags = []
 262     while words <= length:
 263         m = re_words.search(s, pos)
 264         if not m:
 265             # Checked through whole string
 266             break
 267         pos = m.end(0)
 268         if m.group(1):
 269             # It's an actual non-HTML word
 270             words += 1
 271             if words == length:
 272                 end_text_pos = pos
 273             continue
 274         # Check for tag
 275         tag = re_tag.match(m.group(0))
 276         if not tag or end_text_pos:
 277             # Don't worry about non tags or tags after our truncate point
 278             continue
 279         closing_tag, tagname, self_closing = tag.groups()
 280         tagname = tagname.lower()  # Element names are always case-insensitive
 281         if self_closing or tagname in html4_singlets:
 282             pass
 283         elif closing_tag:
 284             # Check for match in open tags list
 285             try:
 286                 i = open_tags.index(tagname)
 287             except ValueError:
 288                 pass
 289             else:
 290                 # SGML: An end tag closes, back to the matching start tag,
 291                 # all unclosed intervening start tags with omitted end tags
 292                 open_tags = open_tags[i+1:]
 293         else:
 294             # Add it to the start of the open tags list
 295             open_tags.insert(0, tagname)
 296     if words <= length:
 297         # Don't try to close tags if we don't need to truncate
 298         return s
 299     out = s[:end_text_pos]
 300     if end_text:
 301         out += end_text
 302     # Close any tags still open
 303     for tag in open_tags:
 304         out += '</%s>' % tag
 305     # Return string
 306     return out
 307
 308
 309 def customizations_hash(customizations):
 310     customizations.sort()
 311     return hash(tuple(customizations))
 312
 313
 314 def get_customized_pdf_path(book, customizations):
 315     """
 316     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 317     """
 318     h = customizations_hash(customizations)
 319     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 320
 321
 322 def clear_custom_pdf(book):
 323     """
 324     Returns a list of paths to generated customized pdf of a book
 325     """
 326     from waiter.utils import clear_cache
 327     clear_cache('book/%s' % book.slug)
 328
 329
 330 class AppSettings(object):
 331     """Allows specyfying custom settings for an app, with default values.
 332
 333     Just subclass, set some properties and instantiate with a prefix.
 334     Getting a SETTING from an instance will check for prefix_SETTING
 335     in project settings if set, else take the default. The value will be
 336     then filtered through _more_SETTING method, if there is one.
 337
 338     """
 339     def __init__(self, prefix):
 340         self._prefix = prefix
 341
 342     def __getattribute__(self, name):
 343         if name.startswith('_'):
 344             return object.__getattribute__(self, name)
 345         value = getattr(settings, "%s_%s" % (self._prefix, name), object.__getattribute__(self, name))
 346         more = "_more_%s" % name
 347         if hasattr(self, more):
 348             value = getattr(self, more)(value)
 349         return value
 350
 351
 352 def delete_from_cache_by_language(cache, key_template):
 353     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])
 354
 355
 356 def gallery_path(slug):
 357     return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, slug)
 358
 359
 360 def gallery_url(slug):
 361     return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, slug)
 362
 363
 364 def get_mp3_length(path):
 365     from mutagen.mp3 import MP3
 366     return int(MP3(path).info.length)