src/catalogue/utils.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import hashlib
   6 import os.path
   7 import random
   8 import re
   9 import time
  10 from base64 import urlsafe_b64encode
  11 from collections import defaultdict
  12 from errno import EEXIST, ENOENT
  13 from fcntl import flock, LOCK_EX
  14 from os import mkdir, path, unlink
  15 from zipfile import ZipFile
  16
  17 from django.conf import settings
  18 from django.core.files.storage import DefaultStorage
  19 from django.core.files.uploadedfile import UploadedFile
  20 from django.http import HttpResponse
  21 from django.utils.encoding import force_unicode
  22
  23 from reporting.utils import read_chunks
  24
  25 # Use the system (hardware-based) random number generator if it exists.
  26 if hasattr(random, 'SystemRandom'):
  27     randrange = random.SystemRandom().randrange
  28 else:
  29     randrange = random.randrange
  30 MAX_SESSION_KEY = 18446744073709551616L     # 2 << 63
  31
  32
  33 def get_random_hash(seed):
  34     sha_digest = hashlib.sha1('%s%s%s%s' % (
  35         randrange(0, MAX_SESSION_KEY), time.time(), unicode(seed).encode('utf-8', 'replace'), settings.SECRET_KEY)
  36     ).digest()
  37     return urlsafe_b64encode(sha_digest).replace('=', '').replace('_', '-').lower()
  38
  39
  40 def split_tags(*tag_lists):
  41     if len(tag_lists) == 1:
  42         result = defaultdict(list)
  43         for tag in tag_lists[0]:
  44             result[tag.category].append(tag)
  45     else:
  46         result = defaultdict(dict)
  47         for tag_list in tag_lists:
  48             for tag in tag_list:
  49                 try:
  50                     result[tag.category][tag.pk].count += tag.count
  51                 except KeyError:
  52                     result[tag.category][tag.pk] = tag
  53         for k, v in result.items():
  54             result[k] = sorted(v.values(), key=lambda tag: tag.sort_key)
  55     return result
  56
  57
  58 class ExistingFile(UploadedFile):
  59
  60     def __init__(self, path, *args, **kwargs):
  61         self.path = path
  62         super(ExistingFile, self).__init__(*args, **kwargs)
  63
  64     def temporary_file_path(self):
  65         return self.path
  66
  67     def close(self):
  68         pass
  69
  70
  71 class LockFile(object):
  72     """
  73     A file lock monitor class; createas an ${objname}.lock
  74     file in directory dir, and locks it exclusively.
  75     To be used in 'with' construct.
  76     """
  77     def __init__(self, dir, objname):
  78         self.lockname = path.join(dir, objname + ".lock")
  79
  80     def __enter__(self):
  81         self.lock = open(self.lockname, 'w')
  82         flock(self.lock, LOCK_EX)
  83
  84     def __exit__(self, *err):
  85         try:
  86             unlink(self.lockname)
  87         except OSError as oe:
  88             if oe.errno != EEXIST:
  89                 raise oe
  90         self.lock.close()
  91
  92
  93 # @task
  94 def create_zip(paths, zip_slug):
  95     """
  96     Creates a zip in MEDIA_ROOT/zip directory containing files from path.
  97     Resulting archive filename is ${zip_slug}.zip
  98     Returns it's path relative to MEDIA_ROOT (no initial slash)
  99     """
 100     # directory to store zip files
 101     zip_path = path.join(settings.MEDIA_ROOT, 'zip')
 102
 103     try:
 104         mkdir(zip_path)
 105     except OSError as oe:
 106         if oe.errno != EEXIST:
 107             raise oe
 108     zip_filename = zip_slug + ".zip"
 109
 110     with LockFile(zip_path, zip_slug):
 111         if not path.exists(path.join(zip_path, zip_filename)):
 112             zipf = ZipFile(path.join(zip_path, zip_filename), 'w')
 113             try:
 114                 for arcname, p in paths:
 115                     if arcname is None:
 116                         arcname = path.basename(p)
 117                     zipf.write(p, arcname)
 118             finally:
 119                 zipf.close()
 120
 121         return 'zip/' + zip_filename
 122
 123
 124 def remove_zip(zip_slug):
 125     """
 126     removes the ${zip_slug}.zip file from zip store.
 127     """
 128     zip_file = path.join(settings.MEDIA_ROOT, 'zip', zip_slug + '.zip')
 129     try:
 130         unlink(zip_file)
 131     except OSError as oe:
 132         if oe.errno != ENOENT:
 133             raise oe
 134
 135
 136 class AttachmentHttpResponse(HttpResponse):
 137     """Response serving a file to be downloaded.
 138     """
 139     def __init__(self, file_path, file_name, mimetype):
 140         super(AttachmentHttpResponse, self).__init__(mimetype=mimetype)
 141         self['Content-Disposition'] = 'attachment; filename=%s' % file_name
 142         self.file_path = file_path
 143         self.file_name = file_name
 144
 145         with open(DefaultStorage().path(self.file_path)) as f:
 146             for chunk in read_chunks(f):
 147                 self.write(chunk)
 148
 149
 150 class MultiQuerySet(object):
 151     def __init__(self, *args, **kwargs):
 152         self.querysets = args
 153         self._count = None
 154
 155     def count(self):
 156         if not self._count:
 157             self._count = sum(len(qs) for qs in self.querysets)
 158         return self._count
 159
 160     def __len__(self):
 161         return self.count()
 162
 163     def __getitem__(self, item):
 164         try:
 165             (offset, stop, step) = item.indices(self.count())
 166         except AttributeError:
 167             # it's not a slice - make it one
 168             return self[item:item + 1][0]
 169         items = []
 170         total_len = stop - offset
 171         for qs in self.querysets:
 172             if len(qs) < offset:
 173                 offset -= len(qs)
 174             else:
 175                 items += list(qs[offset:stop])
 176                 if len(items) >= total_len:
 177                     return items
 178                 else:
 179                     offset = 0
 180                     stop = total_len - len(items)
 181                     continue
 182
 183
 184 class SortedMultiQuerySet(MultiQuerySet):
 185     def __init__(self, *args, **kwargs):
 186         self.order_by = kwargs.pop('order_by', None)
 187         self.sortfn = kwargs.pop('sortfn', None)
 188         if self.order_by is not None:
 189             self.sortfn = lambda a, b: cmp((getattr(a, f) for f in self.order_by),
 190                                            (getattr(b, f) for f in self.order_by))
 191         super(SortedMultiQuerySet, self).__init__(*args, **kwargs)
 192
 193     def __getitem__(self, item):
 194         sort_heads = [0] * len(self.querysets)
 195         try:
 196             (offset, stop, step) = item.indices(self.count())
 197         except AttributeError:
 198             # it's not a slice - make it one
 199             return self[item:item + 1][0]
 200         items = []
 201         total_len = stop - offset
 202         skipped = 0
 203         i_s = range(len(sort_heads))
 204
 205         while len(items) < total_len:
 206             candidate = None
 207             candidate_i = None
 208             for i in i_s:
 209                 def get_next():
 210                     return self.querysets[i][sort_heads[i]]
 211                 try:
 212                     if candidate is None:
 213                         candidate = get_next()
 214                         candidate_i = i
 215                     else:
 216                         competitor = get_next()
 217                         if self.sortfn(candidate, competitor) > 0:
 218                             candidate = competitor
 219                             candidate_i = i
 220                 except IndexError:
 221                     continue  # continue next sort_head
 222             # we have no more elements:
 223             if candidate is None:
 224                 break
 225             sort_heads[candidate_i] += 1
 226             if skipped < offset:
 227                 skipped += 1
 228                 continue  # continue next item
 229             items.append(candidate)
 230
 231         return items
 232
 233
 234 def truncate_html_words(s, num, end_text='...'):
 235     """Truncates HTML to a certain number of words (not counting tags and
 236     comments). Closes opened tags if they were correctly closed in the given
 237     html. Takes an optional argument of what should be used to notify that the
 238     string has been truncated, defaulting to ellipsis (...).
 239
 240     Newlines in the HTML are preserved.
 241
 242     This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
 243     """
 244     s = force_unicode(s)
 245     length = int(num)
 246     if length <= 0:
 247         return u''
 248     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 249     # Set up regular expressions
 250     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 251     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 252     # Count non-HTML words and keep note of open tags
 253     pos = 0
 254     end_text_pos = 0
 255     words = 0
 256     open_tags = []
 257     while words <= length:
 258         m = re_words.search(s, pos)
 259         if not m:
 260             # Checked through whole string
 261             break
 262         pos = m.end(0)
 263         if m.group(1):
 264             # It's an actual non-HTML word
 265             words += 1
 266             if words == length:
 267                 end_text_pos = pos
 268             continue
 269         # Check for tag
 270         tag = re_tag.match(m.group(0))
 271         if not tag or end_text_pos:
 272             # Don't worry about non tags or tags after our truncate point
 273             continue
 274         closing_tag, tagname, self_closing = tag.groups()
 275         tagname = tagname.lower()  # Element names are always case-insensitive
 276         if self_closing or tagname in html4_singlets:
 277             pass
 278         elif closing_tag:
 279             # Check for match in open tags list
 280             try:
 281                 i = open_tags.index(tagname)
 282             except ValueError:
 283                 pass
 284             else:
 285                 # SGML: An end tag closes, back to the matching start tag,
 286                 # all unclosed intervening start tags with omitted end tags
 287                 open_tags = open_tags[i+1:]
 288         else:
 289             # Add it to the start of the open tags list
 290             open_tags.insert(0, tagname)
 291     if words <= length:
 292         # Don't try to close tags if we don't need to truncate
 293         return s
 294     out = s[:end_text_pos]
 295     if end_text:
 296         out += end_text
 297     # Close any tags still open
 298     for tag in open_tags:
 299         out += '</%s>' % tag
 300     # Return string
 301     return out
 302
 303
 304 def customizations_hash(customizations):
 305     customizations.sort()
 306     return hash(tuple(customizations))
 307
 308
 309 def get_customized_pdf_path(book, customizations):
 310     """
 311     Returns a MEDIA_ROOT relative path for a customized pdf. The name will contain a hash of customization options.
 312     """
 313     h = customizations_hash(customizations)
 314     return 'book/%s/%s-custom-%s.pdf' % (book.slug, book.slug, h)
 315
 316
 317 def clear_custom_pdf(book):
 318     """
 319     Returns a list of paths to generated customized pdf of a book
 320     """
 321     from waiter.utils import clear_cache
 322     clear_cache('book/%s' % book.slug)
 323
 324
 325 class AppSettings(object):
 326     """Allows specyfying custom settings for an app, with default values.
 327
 328     Just subclass, set some properties and instantiate with a prefix.
 329     Getting a SETTING from an instance will check for prefix_SETTING
 330     in project settings if set, else take the default. The value will be
 331     then filtered through _more_SETTING method, if there is one.
 332
 333     """
 334     def __init__(self, prefix):
 335         self._prefix = prefix
 336
 337     def __getattribute__(self, name):
 338         if name.startswith('_'):
 339             return object.__getattribute__(self, name)
 340         value = getattr(settings, "%s_%s" % (self._prefix, name), object.__getattribute__(self, name))
 341         more = "_more_%s" % name
 342         if hasattr(self, more):
 343             value = getattr(self, more)(value)
 344         return value
 345
 346
 347 def delete_from_cache_by_language(cache, key_template):
 348     cache.delete_many([key_template % lc for lc, ln in settings.LANGUAGES])
 349
 350
 351 def gallery_path(slug):
 352     return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, slug)
 353
 354
 355 def gallery_url(slug):
 356     return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, slug)