src/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from collections import defaultdict
   6 import hashlib
   7 import random
   8 import re
   9 import time
  10 from base64 import urlsafe_b64encode
  11
  12 from django.http import HttpResponse
  13 from django.core.files.uploadedfile import UploadedFile
  14 from django.core.files.storage import DefaultStorage
  15 from django.utils.encoding import force_unicode
  16 from django.conf import settings
  17 from os import mkdir, path, unlink
  18 from errno import EEXIST, ENOENT
  19 from fcntl import flock, LOCK_EX
  20 from zipfile import ZipFile
  21
  22 from reporting.utils import read_chunks
  23
  24 # Use the system (hardware-based) random number generator if it exists.
  25 if hasattr(random, 'SystemRandom'):
  26     randrange = random.SystemRandom().randrange
  27 else:
  28     randrange = random.randrange
  29 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  30
  31
  32 def get_random_hash(seed):
  33     sha_digest = hashlib.sha1('%s%s%s%s' % (
  34         randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'), settings.SECRET_KEY)
  35     ).digest()
  36     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  37
  38
  39 def split_tags(*tag_lists):
  40     if len(tag_lists) == 1:
  41         result = defaultdict(list)
  42         for tag in tag_lists[0]:
  43             result[tag.category].append(tag)
  44     else:
  45         result = defaultdict(dict)
  46         for tag_list in tag_lists:
  47             for tag in tag_list:
  48                 try:
  49                     result[tag.category][tag.pk].count += tag.count
  50                 except KeyError:
  51                     result[tag.category][tag.pk] = tag
  52         for k, v in result.items():
  53             result[k] = sorted(v.values(), key=lambda tag: tag.sort_key)
  54     return result
  55
  56
  57 class ExistingFile(UploadedFile):
  58
  59     def __init__(self, path, *args, **kwargs):
  60         self.path = path
  61         super(ExistingFile, self).__init__(*args, **kwargs)
  62
  63     def temporary_file_path(self):
  64         return self.path
  65
  66     def close(self):
  67         pass
  68
  69
  70 class LockFile(object):
  71     """
  72     A file lock monitor class; createas an ${objname}.lock
  73     file in directory dir, and locks it exclusively.
  74     To be used in 'with' construct.
  75     """
  76     def __init__(self, dir, objname):
  77         self.lockname = path.join(dir, objname + ".lock")
  78
  79     def __enter__(self):
  80         self.lock = open(self.lockname, 'w')
  81         flock(self.lock, LOCK_EX)
  82
  83     def __exit__(self, *err):
  84         try:
  85             unlink(self.lockname)
  86         except OSError as oe:
  87             if oe.errno != EEXIST:
  88                 raise oe
  89         self.lock.close()
  90
  91
  92 # @task
  93 def create_zip(paths, zip_slug):
  94     """
  95     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
  96     Resulting archive filename is ${zip_slug}.zip
  97     Returns it's path relative to MEDIA_ROOT (no initial slash)
  98     """
  99     # directory to store zip files
 100     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 101
 102     try:
 103         mkdir(zip_path)
 104     except OSError as oe:
 105         if oe.errno != EEXIST:
 106             raise oe
 107     zip_filename = zip_slug + ".zip"
 108
 109     with LockFile(zip_path, zip_slug):
 110         if not path.exists(path.join(zip_path, zip_filename)):
 111             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 112             try:
 113                 for arcname, p in paths:
 114                     if arcname is None:
 115                         arcname = path.basename(p)
 116                     zipf.write(p, arcname)
 117             finally:
 118                 zipf.close()
 119
 120         return 'zip/' + zip_filename
 121
 122
 123 def remove_zip(zip_slug):
 124     """
 125     removes the ${zip_slug}.zip file from zip store.
 126     """
 127     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 128     try:
 129         unlink(zip_file)
 130     except OSError as oe:
 131         if oe.errno != ENOENT:
 132             raise oe
 133
 134
 135 class AttachmentHttpResponse(HttpResponse):
 136     """Response serving a file to be downloaded.
 137     """
 138     def __init__(self, file_path, file_name, mimetype):
 139         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 140         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 141         self.file_path = file_path
 142         self.file_name = file_name
 143
 144         with open(DefaultStorage().path(self.file_path)) as f:
 145             for chunk in read_chunks(f):
 146                 self.write(chunk)
 147
 148
 149 class MultiQuerySet(object):
 150     def __init__(self, *args, **kwargs):
 151         self.querysets = args
 152         self._count = None
 153
 154     def count(self):
 155         if not self._count:
 156             self._count = sum(len(qs) for qs in self.querysets)
 157         return self._count
 158
 159     def __len__(self):
 160         return self.count()
 161
 162     def __getitem__(self, item):
 163         try:
 164             (offset, stop, step) = item.indices(self.count())
 165         except AttributeError:
 166             # it's not a slice - make it one
 167             return self[item:item + 1][0]
 168         items = []
 169         total_len = stop - offset
 170         for qs in self.querysets:
 171             if len(qs) < offset:
 172                 offset -= len(qs)
 173             else:
 174                 items += list(qs[offset:stop])
 175                 if len(items) >= total_len:
 176                     return items
 177                 else:
 178                     offset = 0
 179                     stop = total_len - len(items)
 180                     continue
 181
 182
 183 class SortedMultiQuerySet(MultiQuerySet):
 184     def __init__(self, *args, **kwargs):
 185         self.order_by = kwargs.pop('order_by', None)
 186         self.sortfn = kwargs.pop('sortfn', None)
 187         if self.order_by is not None:
 188             self.sortfn = lambda a, b: cmp((getattr(a, f) for f in self.order_by),
 189                                            (getattr(b, f) for f in self.order_by))
 190         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 191
 192     def __getitem__(self, item):
 193         sort_heads = [0] * len(self.querysets)
 194         try:
 195             (offset, stop, step) = item.indices(self.count())
 196         except AttributeError:
 197             # it's not a slice - make it one
 198             return self[item:item + 1][0]
 199         items = []
 200         total_len = stop - offset
 201         skipped = 0
 202         i_s = range(len(sort_heads))
 203
 204         while len(items) < total_len:
 205             candidate = None
 206             candidate_i = None
 207             for i in i_s:
 208                 def get_next():
 209                     return self.querysets[i][sort_heads[i]]
 210                 try:
 211                     if candidate is None:
 212                         candidate = get_next()
 213                         candidate_i = i
 214                     else:
 215                         competitor = get_next()
 216                         if self.sortfn(candidate, competitor) > 0:
 217                             candidate = competitor
 218                             candidate_i = i
 219                 except IndexError:
 220                     continue  # continue next sort_head
 221             # we have no more elements:
 222             if candidate is None:
 223                 break
 224             sort_heads[candidate_i] += 1
 225             if skipped < offset:
 226                 skipped += 1
 227                 continue  # continue next item
 228             items.append(candidate)
 229
 230         return items
 231
 232
 233 def truncate_html_words(s, num, end_text='...'):
 234     """Truncates HTML to a certain number of words (not counting tags and
 235     comments). Closes opened tags if they were correctly closed in the given
 236     html. Takes an optional argument of what should be used to notify that the
 237     string has been truncated, defaulting to ellipsis (...).
 238
 239     Newlines in the HTML are preserved.
 240
 241     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 242     """
 243     s = force_unicode(s)
 244     length = int(num)
 245     if length <= 0:
 246         return u''
 247     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 248     # Set up regular expressions
 249     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 250     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 251     # Count non-HTML words and keep note of open tags
 252     pos = 0
 253     end_text_pos = 0
 254     words = 0
 255     open_tags = []
 256     while words <= length:
 257         m = re_words.search(s, pos)
 258         if not m:
 259             # Checked through whole string
 260             break
 261         pos = m.end(0)
 262         if m.group(1):
 263             # It's an actual non-HTML word
 264             words += 1
 265             if words == length:
 266                 end_text_pos = pos
 267             continue
 268         # Check for tag
 269         tag = re_tag.match(m.group(0))
 270         if not tag or end_text_pos:
 271             # Don't worry about non tags or tags after our truncate point
 272             continue
 273         closing_tag, tagname, self_closing = tag.groups()
 274         tagname = tagname.lower()  # Element names are always case-insensitive
 275         if self_closing or tagname in html4_singlets:
 276             pass
 277         elif closing_tag:
 278             # Check for match in open tags list
 279             try:
 280                 i = open_tags.index(tagname)
 281             except ValueError:
 282                 pass
 283             else:
 284                 # SGML: An end tag closes, back to the matching start tag,
 285                 # all unclosed intervening start tags with omitted end tags
 286                 open_tags = open_tags[i+1:]
 287         else:
 288             # Add it to the start of the open tags list
 289             open_tags.insert(0, tagname)
 290     if words <= length:
 291         # Don't try to close tags if we don't need to truncate
 292         return s
 293     out = s[:end_text_pos]
 294     if end_text:
 295         out += end_text
 296     # Close any tags still open
 297     for tag in open_tags:
 298         out += '</%s>' % tag
 299     # Return string
 300     return out
 301
 302
 303 def customizations_hash(customizations):
 304     customizations.sort()
 305     return hash(tuple(customizations))
 306
 307
 308 def get_customized_pdf_path(book, customizations):
 309     """
 310     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 311     """
 312     h = customizations_hash(customizations)
 313     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 314
 315
 316 def clear_custom_pdf(book):
 317     """
 318     Returns a list of paths to generated customized pdf of a book
 319     """
 320     from waiter.utils import clear_cache
 321     clear_cache('book/%s' % book.slug)
 322
 323
 324 class AppSettings(object):
 325     """Allows specyfying custom settings for an app, with default values.
 326
 327     Just subclass, set some properties and instantiate with a prefix.
 328     Getting a SETTING from an instance will check for prefix_SETTING
 329     in project settings if set, else take the default. The value will be
 330     then filtered through _more_SETTING method, if there is one.
 331
 332     """
 333     def __init__(self, prefix):
 334         self._prefix = prefix
 335
 336     def __getattribute__(self, name):
 337         if name.startswith('_'):
 338             return object.__getattribute__(self, name)
 339         value = getattr(settings, "%s_%s" % (self._prefix, name), object.__getattribute__(self, name))
 340         more = "_more_%s" % name
 341         if hasattr(self, more):
 342             value = getattr(self, more)(value)
 343         return value
 344
 345
 346 def delete_from_cache_by_language(cache, key_template):
 347     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])