From: Jan Szejko Date: Thu, 14 Dec 2017 11:34:29 +0000 (+0100) Subject: escape user-provided strings used in regular expressions X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/5105e426102784140df92c6a6ce1ffdf70866a4e?hp=63f861861578b33416a0b2d801252b27443fabde escape user-provided strings used in regular expressions --- diff --git a/src/api/handlers.py b/src/api/handlers.py index eb56e2014..88106260a 100644 --- a/src/api/handlers.py +++ b/src/api/handlers.py @@ -19,6 +19,7 @@ from picture.models import Picture from picture.forms import PictureImportForm from stats.utils import piwik_track +from wolnelektury.utils import re_escape from . import emitters # Register our emitters @@ -334,6 +335,7 @@ class FilterBooksHandler(AnonymousBooksHandler): if (search_string is not None) and len(search_string) < 3: search_string = None if search_string: + search_string = re_escape(search_string) books_author = books.filter(cached_author__iregex='\m' + search_string) books_title = books.filter(title__iregex='\m' + search_string) books_title = books_title.exclude(id__in=list(books_author.values_list('id', flat=True))) diff --git a/src/search/views.py b/src/search/views.py index 70a216e3b..b0f064119 100644 --- a/src/search/views.py +++ b/src/search/views.py @@ -17,6 +17,8 @@ from suggest.forms import PublishingSuggestForm import re import json +from wolnelektury.utils import re_escape + def match_word_re(word): if 'sqlite' in settings.DATABASES['default']['ENGINE']: @@ -29,7 +31,7 @@ query_syntax_chars = re.compile(r"[\\/*:(){}]") def remove_query_syntax_chars(query, replace=' '): - return query_syntax_chars.sub(' ', query) + return query_syntax_chars.sub(replace, query) def did_you_mean(query, tokens): @@ -64,7 +66,7 @@ def hint(request): if len(prefix) < 2: return JsonResponse([], safe=False) - prefix = remove_query_syntax_chars(prefix) + prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split())) try: limit = int(request.GET.get('max', '')) @@ -81,7 +83,7 @@ def hint(request): 'id': author.id, 'url': author.get_absolute_url(), } - for author in Tag.objects.filter(category='author', name__iregex='\m' + prefix)[:10] + for author in Tag.objects.filter(category='author', name__iregex=u'\m' + prefix)[:10] ] if len(data) < limit: data += [ diff --git a/src/wolnelektury/utils.py b/src/wolnelektury/utils.py index 8c5ead612..d20039cd2 100644 --- a/src/wolnelektury/utils.py +++ b/src/wolnelektury/utils.py @@ -12,6 +12,7 @@ from functools import wraps import pytz from inspect import getargspec +import re from django.core.mail import send_mail from django.http import HttpResponse from django.template import RequestContext @@ -149,3 +150,8 @@ class UnicodeCSVWriter(object): def writerows(self, rows): for row in rows: self.writerow(row) + + +# the original re.escape messes with unicode +def re_escape(s): + return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s)