X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/111104c8e3e9487965eac9e916200a12d43bf2be..d2b872dc09415d2a436e825673425d9a82afa034:/src/wolnelektury/utils.py diff --git a/src/wolnelektury/utils.py b/src/wolnelektury/utils.py index 72bc7d0c7..2657a5542 100644 --- a/src/wolnelektury/utils.py +++ b/src/wolnelektury/utils.py @@ -2,6 +2,9 @@ # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +import codecs +import csv +import cStringIO import json import os from functools import wraps @@ -9,6 +12,7 @@ from functools import wraps import pytz from inspect import getargspec +import re from django.core.mail import send_mail from django.http import HttpResponse from django.template import RequestContext @@ -115,3 +119,50 @@ def send_noreply_mail(subject, message, recipient_list, **kwargs): u'[WolneLektury] ' + subject, message + u"\n\n-- \n" + ugettext(u'Message sent automatically. Please do not reply.'), 'no-reply@wolnelektury.pl', recipient_list, **kwargs) + + +# source: https://docs.python.org/2/library/csv.html#examples +class UnicodeCSVWriter(object): + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + self.writer.writerow([s.encode("utf-8") for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) + + +# the original re.escape messes with unicode +def re_escape(s): + return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s) + + +BOT_BITS = ['bot', 'slurp', 'spider', 'facebook', 'crawler', 'parser', 'http'] + + +def is_crawler(request): + user_agent = request.META.get('HTTP_USER_AGENT') + if not user_agent: + return True + user_agent = user_agent.lower() + return any(bot_bit in user_agent for bot_bit in BOT_BITS)