src/catalogue/utils.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 import hashlib
   5 import os.path
   6 import random
   7 import re
   8 import time
   9 from base64 import urlsafe_b64encode
  10 from collections import defaultdict
  11 from errno import EEXIST, ENOENT
  12 from fcntl import flock, LOCK_EX
  13 from os import mkdir, path, unlink
  14 from zipfile import ZipFile
  15
  16 from django.conf import settings
  17 from django.core.files.storage import DefaultStorage
  18 from django.core.files.uploadedfile import UploadedFile
  19 from django.http import HttpResponse
  20 from django.utils.encoding import force_text
  21
  22 from reporting.utils import read_chunks
  23
  24 # Use the system (hardware-based) random number generator if it exists.
  25 if hasattr(random, 'SystemRandom'):
  26     randrange = random.SystemRandom().randrange
  27 else:
  28     randrange = random.randrange
  29 MAX_SESSION_KEY = 18446744073709551616     # 2 << 63
  30
  31
  32 def get_random_hash(seed):
  33     sha_digest = hashlib.sha1((
  34         '%s%s%s%s' % (
  35             randrange(0, MAX_SESSION_KEY),
  36             time.time(),
  37             str(seed).encode('utf-8', 'replace'),
  38             settings.SECRET_KEY
  39         )
  40     ).encode('utf-8')).digest()
  41     return urlsafe_b64encode(sha_digest).decode('latin1').replace('=', '').replace('_', '-').lower()
  42
  43
  44 def split_tags(*tag_lists):
  45     if len(tag_lists) == 1:
  46         result = defaultdict(list)
  47         for tag in tag_lists[0]:
  48             result[tag.category].append(tag)
  49     else:
  50         result = defaultdict(dict)
  51         for tag_list in tag_lists:
  52             for tag in tag_list:
  53                 try:
  54                     result[tag.category][tag.pk].count += tag.count
  55                 except KeyError:
  56                     result[tag.category][tag.pk] = tag
  57         for k, v in result.items():
  58             result[k] = sorted(v.values(), key=lambda tag: tag.sort_key)
  59     return result
  60
  61
  62 class ExistingFile(UploadedFile):
  63
  64     def __init__(self, path, *args, **kwargs):
  65         self.path = path
  66         super(ExistingFile, self).__init__(*args, **kwargs)
  67
  68     def temporary_file_path(self):
  69         return self.path
  70
  71     def close(self):
  72         pass
  73
  74
  75 class LockFile(object):
  76     """
  77     A file lock monitor class; createas an ${objname}.lock
  78     file in directory dir, and locks it exclusively.
  79     To be used in 'with' construct.
  80     """
  81     def __init__(self, dir, objname):
  82         self.lockname = path.join(dir, objname + ".lock")
  83
  84     def __enter__(self):
  85         self.lock = open(self.lockname, 'w')
  86         flock(self.lock, LOCK_EX)
  87
  88     def __exit__(self, *err):
  89         try:
  90             unlink(self.lockname)
  91         except OSError as oe:
  92             if oe.errno != ENOENT:
  93                 raise oe
  94         self.lock.close()
  95
  96
  97 # @task
  98 def create_zip(paths, zip_slug):
  99     """
 100     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
 101     Resulting archive filename is ${zip_slug}.zip
 102     Returns it's path relative to MEDIA_ROOT (no initial slash)
 103     """
 104     # directory to store zip files
 105     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 106
 107     try:
 108         mkdir(zip_path)
 109     except OSError as oe:
 110         if oe.errno != EEXIST:
 111             raise oe
 112     zip_filename = zip_slug + ".zip"
 113
 114     with LockFile(zip_path, zip_slug):
 115         if not path.exists(path.join(zip_path, zip_filename)):
 116             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 117             try:
 118                 for arcname, p in paths:
 119                     if arcname is None:
 120                         arcname = path.basename(p)
 121                     zipf.write(p, arcname)
 122             finally:
 123                 zipf.close()
 124
 125         return 'zip/' + zip_filename
 126
 127
 128 def remove_zip(zip_slug):
 129     """
 130     removes the ${zip_slug}.zip file from zip store.
 131     """
 132     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 133     try:
 134         unlink(zip_file)
 135     except OSError as oe:
 136         if oe.errno != ENOENT:
 137             raise oe
 138
 139
 140 class AttachmentHttpResponse(HttpResponse):
 141     """Response serving a file to be downloaded.
 142     """
 143     def __init__(self, file_path, file_name, mimetype):
 144         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 145         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 146         self.file_path = file_path
 147         self.file_name = file_name
 148
 149         with open(DefaultStorage().path(self.file_path)) as f:
 150             for chunk in read_chunks(f):
 151                 self.write(chunk)
 152
 153
 154 class MultiQuerySet(object):
 155     def __init__(self, *args, **kwargs):
 156         self.querysets = args
 157         self._count = None
 158
 159     def count(self):
 160         if not self._count:
 161             self._count = sum(len(qs) for qs in self.querysets)
 162         return self._count
 163
 164     def __len__(self):
 165         return self.count()
 166
 167     def __getitem__(self, item):
 168         try:
 169             (offset, stop, step) = item.indices(self.count())
 170         except AttributeError:
 171             # it's not a slice - make it one
 172             return self[item:item + 1][0]
 173         items = []
 174         total_len = stop - offset
 175         for qs in self.querysets:
 176             if len(qs) < offset:
 177                 offset -= len(qs)
 178             else:
 179                 items += list(qs[offset:stop])
 180                 if len(items) >= total_len:
 181                     return items
 182                 else:
 183                     offset = 0
 184                     stop = total_len - len(items)
 185                     continue
 186
 187
 188 class SortedMultiQuerySet(MultiQuerySet):
 189     def __init__(self, *args, **kwargs):
 190         self.order_by = kwargs.pop('order_by', None)
 191         self.sortfn = kwargs.pop('sortfn', None)
 192         if self.order_by is not None:
 193             self.sortfn = lambda a, b: cmp((getattr(a, f) for f in self.order_by),
 194                                            (getattr(b, f) for f in self.order_by))
 195         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 196
 197     def __getitem__(self, item):
 198         sort_heads = [0] * len(self.querysets)
 199         try:
 200             (offset, stop, step) = item.indices(self.count())
 201         except AttributeError:
 202             # it's not a slice - make it one
 203             return self[item:item + 1][0]
 204         items = []
 205         total_len = stop - offset
 206         skipped = 0
 207         i_s = range(len(sort_heads))
 208
 209         while len(items) < total_len:
 210             candidate = None
 211             candidate_i = None
 212             for i in i_s:
 213                 def get_next():
 214                     return self.querysets[i][sort_heads[i]]
 215                 try:
 216                     if candidate is None:
 217                         candidate = get_next()
 218                         candidate_i = i
 219                     else:
 220                         competitor = get_next()
 221                         if self.sortfn(candidate, competitor) > 0:
 222                             candidate = competitor
 223                             candidate_i = i
 224                 except IndexError:
 225                     continue  # continue next sort_head
 226             # we have no more elements:
 227             if candidate is None:
 228                 break
 229             sort_heads[candidate_i] += 1
 230             if skipped < offset:
 231                 skipped += 1
 232                 continue  # continue next item
 233             items.append(candidate)
 234
 235         return items
 236
 237
 238 def truncate_html_words(s, num, end_text='...'):
 239     """Truncates HTML to a certain number of words (not counting tags and
 240     comments). Closes opened tags if they were correctly closed in the given
 241     html. Takes an optional argument of what should be used to notify that the
 242     string has been truncated, defaulting to ellipsis (...).
 243
 244     Newlines in the HTML are preserved.
 245
 246     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 247     """
 248     s = force_text(s)
 249     length = int(num)
 250     if length <= 0:
 251         return ''
 252     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 253     # Set up regular expressions
 254     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 255     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 256     # Count non-HTML words and keep note of open tags
 257     pos = 0
 258     end_text_pos = 0
 259     words = 0
 260     open_tags = []
 261     while words <= length:
 262         m = re_words.search(s, pos)
 263         if not m:
 264             # Checked through whole string
 265             break
 266         pos = m.end(0)
 267         if m.group(1):
 268             # It's an actual non-HTML word
 269             words += 1
 270             if words == length:
 271                 end_text_pos = pos
 272             continue
 273         # Check for tag
 274         tag = re_tag.match(m.group(0))
 275         if not tag or end_text_pos:
 276             # Don't worry about non tags or tags after our truncate point
 277             continue
 278         closing_tag, tagname, self_closing = tag.groups()
 279         tagname = tagname.lower()  # Element names are always case-insensitive
 280         if self_closing or tagname in html4_singlets:
 281             pass
 282         elif closing_tag:
 283             # Check for match in open tags list
 284             try:
 285                 i = open_tags.index(tagname)
 286             except ValueError:
 287                 pass
 288             else:
 289                 # SGML: An end tag closes, back to the matching start tag,
 290                 # all unclosed intervening start tags with omitted end tags
 291                 open_tags = open_tags[i+1:]
 292         else:
 293             # Add it to the start of the open tags list
 294             open_tags.insert(0, tagname)
 295     if words <= length:
 296         # Don't try to close tags if we don't need to truncate
 297         return s
 298     out = s[:end_text_pos]
 299     if end_text:
 300         out += end_text
 301     # Close any tags still open
 302     for tag in open_tags:
 303         out += '</%s>' % tag
 304     # Return string
 305     return out
 306
 307
 308 def customizations_hash(customizations):
 309     customizations.sort()
 310     return hash(tuple(customizations))
 311
 312
 313 def get_customized_pdf_path(book, customizations):
 314     """
 315     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 316     """
 317     h = customizations_hash(customizations)
 318     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 319
 320
 321 def clear_custom_pdf(book):
 322     """
 323     Returns a list of paths to generated customized pdf of a book
 324     """
 325     from waiter.utils import clear_cache
 326     clear_cache('book/%s' % book.slug)
 327
 328
 329 class AppSettings(object):
 330     """Allows specyfying custom settings for an app, with default values.
 331
 332     Just subclass, set some properties and instantiate with a prefix.
 333     Getting a SETTING from an instance will check for prefix_SETTING
 334     in project settings if set, else take the default. The value will be
 335     then filtered through _more_SETTING method, if there is one.
 336
 337     """
 338     def __init__(self, prefix):
 339         self._prefix = prefix
 340
 341     def __getattribute__(self, name):
 342         if name.startswith('_'):
 343             return object.__getattribute__(self, name)
 344         value = getattr(settings, "%s_%s" % (self._prefix, name), object.__getattribute__(self, name))
 345         more = "_more_%s" % name
 346         if hasattr(self, more):
 347             value = getattr(self, more)(value)
 348         return value
 349
 350
 351 def delete_from_cache_by_language(cache, key_template):
 352     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])
 353
 354
 355 def gallery_path(slug):
 356     return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, slug)
 357
 358
 359 def gallery_url(slug):
 360     return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, slug)
 361
 362
 363 def get_mp3_length(path):
 364     from mutagen.mp3 import MP3
 365     return int(MP3(path).info.length)