From: Radek Czajka Date: Mon, 17 May 2010 12:10:31 +0000 (+0200) Subject: allow search without diacritics (#287) X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/0962bb50bef94a051060dc90fd4bc2bbbf5938ff?ds=inline allow search without diacritics (#287) --- diff --git a/apps/catalogue/views.py b/apps/catalogue/views.py index 0a048af9b..b32a310b5 100644 --- a/apps/catalogue/views.py +++ b/apps/catalogue/views.py @@ -7,6 +7,7 @@ import zipfile import sys import pprint import traceback +import re from django.conf import settings from django.template import RequestContext @@ -171,16 +172,24 @@ def book_text(request, slug): # ========== # = Search = # ========== + +def _no_diacritics_regexp(query): + """ returns a regexp for searching for a query without diacritics + + should be locale-aware """ + names = {'a':u'ą', 'c':u'ć', 'e':u'ę', 'l': u'ł', 'n':u'ń', 'o':u'ó', 's':u'ś', 'z':u'ź|ż'} + def repl(m): + l = m.group() + return "(%s|%s)" % (l, names[l]) + return re.sub('[%s]'%(''.join(names.keys())), repl, query) + def _word_starts_with(name, prefix): """returns a Q object getting models having `name` contain a word starting with `prefix` """ kwargs = {} if settings.DATABASE_ENGINE in ('mysql', 'postgresql_psycopg2', 'postgresql'): - # we must escape `prefix` so that it only matches literally - for special in r'\^$.*+?|(){}[]': - prefix = prefix.replace(special, '\\' + special) - + prefix = _no_diacritics_regexp(re.escape(prefix)) # we could use a [[:<:]] (word start), # but we want both `xy` and `(xy` to catch `(xyz)` kwargs['%s__iregex' % name] = u"(^|[^[:alpha:]])%s" % prefix diff --git a/wolnelektury/static/js/jquery.autocomplete.js b/wolnelektury/static/js/jquery.autocomplete.js index 7cdb359d5..4e425e8bd 100644 --- a/wolnelektury/static/js/jquery.autocomplete.js +++ b/wolnelektury/static/js/jquery.autocomplete.js @@ -430,8 +430,22 @@ $.Autocompleter.defaults = { width: 0, multiple: false, multipleSeparator: ", ", + regex_escape: function(term) { + term = term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1"); + /* no polish diacritics; should be more locale-aware */ + term = term.replace(/a/g, '[aą]') + .replace(/c/g, '[cć]') + .replace(/e/g, '[eę]') + .replace(/l/g, '[lł]') + .replace(/n/g, '[nń]') + .replace(/o/g, '[oó]') + .replace(/s/g, '[sś]') + .replace(/z/g, '[zźż]'); + return term; + }, highlight: function(value, term) { - return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1") + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "$1"); + term = $.Autocompleter.defaults.regex_escape(term); + return value.replace(new RegExp("(?![^&;]+;)(?!<[^<>]*)(" + term + ")(?![^<>]*>)(?![^&;]+;)", "gi"), "$1"); }, scroll: true, scrollHeight: 180 @@ -447,7 +461,8 @@ $.Autocompleter.Cache = function(options) { s = s.toLowerCase(); var i = s.indexOf(sub); if (options.matchContains == "word"){ - i = s.toLowerCase().search("\\b" + sub.replace(/([\^\$\(\)\[\]\{\}\*\.\+\?\|\\])/gi, "\\$1").toLowerCase()); + query = $.Autocompleter.defaults.regex_escape(sub.toLowerCase()); + i = s.toLowerCase().search("\\b" + query); } if (i == -1) return false; return i == 0 || options.matchContains;