import random
from django.conf import settings
+from django.http.response import HttpResponseForbidden
from django.template import RequestContext
from django.template.loader import render_to_string
from django.shortcuts import render_to_response, get_object_or_404, render, redirect
from catalogue.models import Book, Collection, Tag, Fragment
from catalogue.utils import split_tags
from catalogue.models.tag import prefetch_relations
+from wolnelektury.utils import is_crawler
staff_required = user_passes_test(lambda user: user.is_staff)
except ResponseInstead as e:
return e.response
+ if is_crawler(request) and len(tags) > 1:
+ return HttpResponseForbidden('address removed from crawling. check robots.txt')
+
if list_type == 'gallery' and any(tag.category == 'set' for tag in tags):
raise Http404
# the original re.escape messes with unicode
def re_escape(s):
return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s)
+
+
+BOT_BITS = ['bot', 'slurp', 'spider', 'facebook', 'crawler', 'parser', 'http']
+
+
+def is_crawler(request):
+ user_agent = request.META.get('HTTP_USER_AGENT')
+ if not user_agent:
+ return True
+ user_agent = user_agent.lower()
+ return any(bot_bit in user_agent for bot_bit in BOT_BITS)