escape user-provided strings used in regular expressions

author Jan Szejko <janek37@gmail.com>

Thu, 14 Dec 2017 11:34:29 +0000 (12:34 +0100)

committer Jan Szejko <janek37@gmail.com>

Thu, 14 Dec 2017 11:34:29 +0000 (12:34 +0100)
author Jan Szejko <janek37@gmail.com>
Thu, 14 Dec 2017 11:34:29 +0000 (12:34 +0100)
committer Jan Szejko <janek37@gmail.com>
Thu, 14 Dec 2017 11:34:29 +0000 (12:34 +0100)
diff --git a/src/api/handlers.py b/src/api/handlers.py

index eb56e20..8810626 100644 (file)
--- a/src/api/handlers.py
+++ b/src/api/handlers.py
@@ -19,6 +19,7 @@ from picture.models import Picture
  from picture.forms import PictureImportForm
  
  from stats.utils import piwik_track
+from wolnelektury.utils import re_escape
  
  from . import emitters  # Register our emitters
  
@@ -334,6 +335,7 @@ class FilterBooksHandler(AnonymousBooksHandler):
          if (search_string is not None) and len(search_string) < 3:
              search_string = None
          if search_string:
+            search_string = re_escape(search_string)
              books_author = books.filter(cached_author__iregex='\m' + search_string)
              books_title = books.filter(title__iregex='\m' + search_string)
              books_title = books_title.exclude(id__in=list(books_author.values_list('id', flat=True)))
diff --git a/src/search/views.py b/src/search/views.py

index 70a216e..b0f0641 100644 (file)
--- a/src/search/views.py
+++ b/src/search/views.py
@@ -17,6 +17,8 @@ from suggest.forms import PublishingSuggestForm
  import re
  import json
  
+from wolnelektury.utils import re_escape
+
  
  def match_word_re(word):
      if 'sqlite' in settings.DATABASES['default']['ENGINE']:
@@ -29,7 +31,7 @@ query_syntax_chars = re.compile(r"[\\/*:(){}]")
  
  
  def remove_query_syntax_chars(query, replace=' '):
-    return query_syntax_chars.sub(' ', query)
+    return query_syntax_chars.sub(replace, query)
  
  
  def did_you_mean(query, tokens):
@@ -64,7 +66,7 @@ def hint(request):
      if len(prefix) < 2:
          return JsonResponse([], safe=False)
  
-    prefix = remove_query_syntax_chars(prefix)
+    prefix = re_escape(' '.join(remove_query_syntax_chars(prefix).split()))
  
      try:
          limit = int(request.GET.get('max', ''))
@@ -81,7 +83,7 @@ def hint(request):
              'id': author.id,
              'url': author.get_absolute_url(),
          }
-        for author in Tag.objects.filter(category='author', name__iregex='\m' + prefix)[:10]
+        for author in Tag.objects.filter(category='author', name__iregex=u'\m' + prefix)[:10]
      ]
      if len(data) < limit:
          data += [
diff --git a/src/wolnelektury/utils.py b/src/wolnelektury/utils.py

index 8c5ead6..d20039c 100644 (file)
--- a/src/wolnelektury/utils.py
+++ b/src/wolnelektury/utils.py
@@ -12,6 +12,7 @@ from functools import wraps
  import pytz
  from inspect import getargspec
  
+import re
  from django.core.mail import send_mail
  from django.http import HttpResponse
  from django.template import RequestContext
@@ -149,3 +150,8 @@ class UnicodeCSVWriter(object):
      def writerows(self, rows):
          for row in rows:
              self.writerow(row)
+
+
+# the original re.escape messes with unicode
+def re_escape(s):
+    return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s)
author	Jan Szejko <janek37@gmail.com>
	Thu, 14 Dec 2017 11:34:29 +0000 (12:34 +0100)
committer	Jan Szejko <janek37@gmail.com>
	Thu, 14 Dec 2017 11:34:29 +0000 (12:34 +0100)
src/api/handlers.py		patch \| blob \| history
src/search/views.py		patch \| blob \| history
src/wolnelektury/utils.py		patch \| blob \| history