X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/db833ba4517084f61a64907c6d15606e7c881edd..6ab5e576c0bccd88cc5d4e6924373a5e393b0fa1:/apps/djangosphinx/manager.py?ds=sidebyside diff --git a/apps/djangosphinx/manager.py b/apps/djangosphinx/manager.py deleted file mode 100644 index 3fbcc9657..000000000 --- a/apps/djangosphinx/manager.py +++ /dev/null @@ -1,648 +0,0 @@ -import select -import socket -import time -import struct -import warnings -import operator -import apis.current as sphinxapi - -try: - import decimal -except ImportError: - from django.utils import _decimal as decimal # for Python 2.3 - -from django.db.models.query import QuerySet, Q -from django.conf import settings - -__all__ = ('SearchError', 'ConnectionError', 'SphinxSearch', 'SphinxRelation') - -from django.contrib.contenttypes.models import ContentType -from datetime import datetime, date - -# server settings -SPHINX_SERVER = getattr(settings, 'SPHINX_SERVER', 'localhost') -SPHINX_PORT = int(getattr(settings, 'SPHINX_PORT', 3312)) - -# These require search API 275 (Sphinx 0.9.8) -SPHINX_RETRIES = int(getattr(settings, 'SPHINX_RETRIES', 0)) -SPHINX_RETRIES_DELAY = int(getattr(settings, 'SPHINX_RETRIES_DELAY', 5)) - -MAX_INT = int(2**31-1) - -class SearchError(Exception): pass -class ConnectionError(Exception): pass - -class SphinxProxy(object): - """ - Acts exactly like a normal instance of an object except that - it will handle any special sphinx attributes in a _sphinx class. - """ - __slots__ = ('__dict__', '__instance__', '_sphinx') - - def __init__(self, instance, attributes): - object.__setattr__(self, '__instance__', instance) - object.__setattr__(self, '_sphinx', attributes) - - def _get_current_object(self): - """ - Return the current object. This is useful if you want the real object - behind the proxy at a time for performance reasons or because you want - to pass the object into a different context. - """ - return self.__instance__ - __current_object = property(_get_current_object) - - def __dict__(self): - try: - return self.__current_object.__dict__ - except RuntimeError: - return AttributeError('__dict__') - __dict__ = property(__dict__) - - def __repr__(self): - try: - obj = self.__current_object - except RuntimeError: - return '<%s unbound>' % self.__class__.__name__ - return repr(obj) - - def __nonzero__(self): - try: - return bool(self.__current_object) - except RuntimeError: - return False - - def __unicode__(self): - try: - return unicode(self.__current_oject) - except RuntimeError: - return repr(self) - - def __dir__(self): - try: - return dir(self.__current_object) - except RuntimeError: - return [] - - def __getattr__(self, name, value=None): - if name == '__members__': - return dir(self.__current_object) - elif name == '_sphinx': - return object.__getattr__(self, '_sphinx', value) - return getattr(self.__current_object, name) - - def __setattr__(self, name, value): - if name == '_sphinx': - return object.__setattr__(self, '_sphinx', value) - return setattr(self.__current_object, name, value) - - def __setitem__(self, key, value): - self.__current_object[key] = value - - def __delitem__(self, key): - del self.__current_object[key] - - def __setslice__(self, i, j, seq): - self.__current_object[i:j] = seq - - def __delslice__(self, i, j): - del self.__current_object[i:j] - - __delattr__ = lambda x, n: delattr(x.__current_object, n) - __str__ = lambda x: str(x.__current_object) - __unicode__ = lambda x: unicode(x.__current_object) - __lt__ = lambda x, o: x.__current_object < o - __le__ = lambda x, o: x.__current_object <= o - __eq__ = lambda x, o: x.__current_object == o - __ne__ = lambda x, o: x.__current_object != o - __gt__ = lambda x, o: x.__current_object > o - __ge__ = lambda x, o: x.__current_object >= o - __cmp__ = lambda x, o: cmp(x.__current_object, o) - __hash__ = lambda x: hash(x.__current_object) - # attributes are currently not callable - # __call__ = lambda x, *a, **kw: x.__current_object(*a, **kw) - __len__ = lambda x: len(x.__current_object) - __getitem__ = lambda x, i: x.__current_object[i] - __iter__ = lambda x: iter(x.__current_object) - __contains__ = lambda x, i: i in x.__current_object - __getslice__ = lambda x, i, j: x.__current_object[i:j] - __add__ = lambda x, o: x.__current_object + o - __sub__ = lambda x, o: x.__current_object - o - __mul__ = lambda x, o: x.__current_object * o - __floordiv__ = lambda x, o: x.__current_object // o - __mod__ = lambda x, o: x.__current_object % o - __divmod__ = lambda x, o: x.__current_object.__divmod__(o) - __pow__ = lambda x, o: x.__current_object ** o - __lshift__ = lambda x, o: x.__current_object << o - __rshift__ = lambda x, o: x.__current_object >> o - __and__ = lambda x, o: x.__current_object & o - __xor__ = lambda x, o: x.__current_object ^ o - __or__ = lambda x, o: x.__current_object | o - __div__ = lambda x, o: x.__current_object.__div__(o) - __truediv__ = lambda x, o: x.__current_object.__truediv__(o) - __neg__ = lambda x: -(x.__current_object) - __pos__ = lambda x: +(x.__current_object) - __abs__ = lambda x: abs(x.__current_object) - __invert__ = lambda x: ~(x.__current_object) - __complex__ = lambda x: complex(x.__current_object) - __int__ = lambda x: int(x.__current_object) - __long__ = lambda x: long(x.__current_object) - __float__ = lambda x: float(x.__current_object) - __oct__ = lambda x: oct(x.__current_object) - __hex__ = lambda x: hex(x.__current_object) - __index__ = lambda x: x.__current_object.__index__() - __coerce__ = lambda x, o: x.__coerce__(x, o) - __enter__ = lambda x: x.__enter__() - __exit__ = lambda x, *a, **kw: x.__exit__(*a, **kw) - -def to_sphinx(value): - "Convert a value into a sphinx query value" - if isinstance(value, date) or isinstance(value, datetime): - return int(time.mktime(value.timetuple())) - elif isinstance(value, decimal.Decimal) or isinstance(value, float): - return float(value) - return int(value) - -class SphinxQuerySet(object): - available_kwargs = ('rankmode', 'mode', 'weights', 'maxmatches') - - def __init__(self, model=None, **kwargs): - self._select_related = False - self._select_related_args = {} - self._select_related_fields = [] - self._filters = {} - self._excludes = {} - self._extra = {} - self._query = '' - self.__metadata = None - self._offset = 0 - self._limit = 20 - - self._groupby = None - self._sort = None - self._weights = [1, 100] - - self._maxmatches = 1000 - self._result_cache = None - self._mode = sphinxapi.SPH_MATCH_ALL - self._rankmode = getattr(sphinxapi, 'SPH_RANK_PROXIMITY_BM25', None) - self._model = model - self._anchor = {} - self.__metadata = {} - - self.set_options(**kwargs) - - if model: - self._index = kwargs.get('index', model._meta.db_table) - else: - self._index = kwargs.get('index') - - def __repr__(self): - if self._result_cache is not None: - return repr(self._get_data()) - else: - return '<%s instance>' % (self.__class__.__name__,) - - def __len__(self): - return len(self._get_data()) - - def __iter__(self): - return iter(self._get_data()) - - def __getitem__(self, k): - if not isinstance(k, (slice, int, long)): - raise TypeError - assert (not isinstance(k, slice) and (k >= 0)) \ - or (isinstance(k, slice) and (k.start is None or k.start >= 0) and (k.stop is None or k.stop >= 0)), \ - "Negative indexing is not supported." - if type(k) == slice: - if self._offset < k.start or k.stop-k.start > self._limit: - self._result_cache = None - else: - if k not in range(self._offset, self._limit+self._offset): - self._result_cache = None - if self._result_cache is None: - if type(k) == slice: - self._offset = k.start - self._limit = k.stop-k.start - return self._get_results() - else: - self._offset = k - self._limit = 1 - return self._get_results()[0] - else: - return self._result_cache[k] - - def set_options(self, **kwargs): - if 'rankmode' in kwargs: - if kwargs.get('rankmode') is None: - kwargs['rankmode'] = sphinxapi.SPH_RANK_NONE - for key in self.available_kwargs: - if key in kwargs: - setattr(self, '_%s' % (key,), kwargs[key]) - - def query(self, string): - return self._clone(_query=unicode(string).encode('utf-8')) - - def group_by(self, attribute, func, groupsort='@group desc'): - return self._clone(_groupby=attribute, _groupfunc=func, _groupsort=groupsort) - - def rank_none(self): - warnings.warn('`rank_none()` is deprecated. Use `set_options(rankmode=None)` instead.', DeprecationWarning) - return self._clone(_rankmode=sphinxapi.SPH_RANK_NONE) - - def mode(self, mode): - warnings.warn('`mode()` is deprecated. Use `set_options(mode='')` instead.', DeprecationWarning) - return self._clone(_mode=mode) - - def weights(self, weights): - warnings.warn('`mode()` is deprecated. Use `set_options(weights=[])` instead.', DeprecationWarning) - return self._clone(_weights=weights) - - def on_index(self, index): - warnings.warn('`mode()` is deprecated. Use `set_options(on_index=foo)` instead.', DeprecationWarning) - return self._clone(_index=index) - - # only works on attributes - def filter(self, **kwargs): - filters = self._filters.copy() - for k,v in kwargs.iteritems(): - if hasattr(v, 'next'): - v = list(v) - elif not (isinstance(v, list) or isinstance(v, tuple)): - v = [v,] - filters.setdefault(k, []).extend(map(to_sphinx, v)) - return self._clone(_filters=filters) - - def geoanchor(self, lat_attr, lng_attr, lat, lng): - assert(sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring.") - return self._clone(_anchor=(lat_attr, lng_attr, float(lat), float(lng))) - - # this actually does nothing, its just a passthru to - # keep things looking/working generally the same - def all(self): - return self - - # only works on attributes - def exclude(self, **kwargs): - filters = self._excludes.copy() - for k,v in kwargs.iteritems(): - if hasattr(v, 'next'): - v = list(v) - elif not (isinstance(v, list) or isinstance(v, tuple)): - v = [v,] - filters.setdefault(k, []).extend(map(to_sphinx, v)) - return self._clone(_excludes=filters) - - # you cannot order by @weight (it always orders in descending) - # keywords are @id, @weight, @rank, and @relevance - def order_by(self, *args): - sort_by = [] - for arg in args: - sort = 'ASC' - if arg[0] == '-': - arg = arg[1:] - sort = 'DESC' - if arg == 'id': - arg = '@id' - sort_by.append('%s %s' % (arg, sort)) - if sort_by: - return self._clone(_sort=(sphinxapi.SPH_SORT_EXTENDED, ', '.join(sort_by))) - return self - - # pass these thru on the queryset and let django handle it - def select_related(self, *args, **kwargs): - _args = self._select_related_fields[:] - _args.extend(args) - _kwargs = self._select_related_args.copy() - _kwargs.update(kwargs) - - return self._clone( - _select_related=True, - _select_related_fields=_args, - _select_related_args=_kwargs, - ) - - def extra(self, **kwargs): - extra = self._extra.copy() - extra.update(kwargs) - return self._clone(_extra=extra) - - def count(self): - return min(self._sphinx.get('total_found', 0), self._maxmatches) - - def reset(self): - return self.__class__(self._model, self._index) - - # Internal methods - def _clone(self, **kwargs): - # Clones the queryset passing any changed args - c = self.__class__() - c.__dict__.update(self.__dict__) - c.__dict__.update(kwargs) - return c - - def _sphinx(self): - if not self.__metadata: - # We have to force execution if this is accessed beforehand - self._get_data() - return self.__metadata - _sphinx = property(_sphinx) - - def _get_data(self): - assert(self._index) - # need to find a way to make this work yet - if self._result_cache is None: - self._result_cache = list(self._get_results()) - return self._result_cache - - def _get_sphinx_results(self): - assert(self._offset + self._limit <= self._maxmatches) - - client = sphinxapi.SphinxClient() - client.SetServer(SPHINX_SERVER, SPHINX_PORT) - - if self._sort: - client.SetSortMode(*self._sort) - - if isinstance(self._weights, dict): - client.SetFieldWeights(self._weights) - else: - # assume its a list - client.SetWeights(map(int, self._weights)) - - client.SetMatchMode(self._mode) - - # 0.97 requires you to reset it - if hasattr(client, 'ResetFilters'): - client.ResetFilters() - if hasattr(client, 'ResetGroupBy'): - client.ResetGroupBy() - - def _handle_filters(filter_list, exclude=False): - for name, values in filter_list.iteritems(): - parts = len(name.split('__')) - if parts > 2: - raise NotImplementedError, 'Related object and/or multiple field lookups not supported' - elif parts == 2: - # The float handling for __gt and __lt is kind of ugly.. - name, lookup = name.split('__', 1) - is_float = isinstance(values[0], float) - if lookup == 'gt': - value = is_float and values[0] + (1.0/MAX_INT) or values[0] - 1 - args = (name, value, MAX_INT, exclude) - elif lookup == 'gte': - args = (name, values[0], MAX_INT, exclude) - elif lookup == 'lt': - value = is_float and values[0] - (1.0/MAX_INT) or values[0] - 1 - args = (name, -MAX_INT, value, exclude) - elif lookup == 'lte': - args = (name, -MAX_INT, values[0], exclude) - elif lookup == 'range': - args = (name, values[0], values[1], exclude) - else: - raise NotImplementedError, 'Related object and/or field lookup "%s" not supported' % lookup - if is_float: - client.SetFilterFloatRange(*args) - elif not exclude and self._model and name == self._model._meta.pk.column: - client.SetIDRange(*args[1:3]) - else: - client.SetFilterRange(*args) - - else: - client.SetFilter(name, values, exclude) - - # Include filters - if self._filters: - _handle_filters(self._filters) - - # Exclude filters - if self._excludes: - _handle_filters(self._excludes, True) - - if self._groupby: - client.SetGroupBy(self._groupby, self._groupfunc, self._groupsort) - - if self._anchor: - client.SetGeoAnchor(*self._anchor) - - if self._rankmode: - client.SetRankingMode(self._rankmode) - - if not self._limit > 0: - # Fix for Sphinx throwing an assertion error when you pass it an empty limiter - return [] - - - if sphinxapi.VER_COMMAND_SEARCH >= 0x113: - client.SetRetries(SPHINX_RETRIES, SPHINX_RETRIES_DELAY) - - client.SetLimits(int(self._offset), int(self._limit), int(self._maxmatches)) - - results = client.Query(self._query, self._index) - - # The Sphinx API doesn't raise exceptions - if not results: - if client.GetLastError(): - raise SearchError, client.GetLastError() - elif client.GetLastWarning(): - raise SearchError, client.GetLastWarning() - return results - - def _get_results(self): - results = self._get_sphinx_results() - if not results or not results['matches']: - results = [] - elif self._model: - queryset = self._model.objects.all() - if self._select_related: - queryset = queryset.select_related(*self._select_related_fields, **self._select_related_args) - if self._extra: - queryset = queryset.extra(**self._extra) - pks = getattr(self._model._meta, 'pks', None) - if pks is None or len(pks) == 1: - queryset = queryset.filter(pk__in=[r['id'] for r in results['matches']]) - queryset = dict([(o.pk, o) for o in queryset]) - else: - for r in results['matches']: - r['id'] = ', '.join([unicode(r['attrs'][p.column]) for p in pks]) - q = reduce(operator.or_, [reduce(operator.and_, [Q(**{p.name: r['attrs'][p.column]}) for p in pks]) for r in results['matches']]) - if q: - queryset = queryset.filter(q) - queryset = dict([(', '.join([unicode(p) for p in o.pks]), o) for o in queryset]) - else: - queryset = None - - if queryset: - self.__metadata = { - 'total': results['total'], - 'total_found': results['total_found'], - 'words': results['words'], - } - results = [SphinxProxy(queryset[r['id']], r) for r in results['matches'] if r['id'] in queryset] - else: - results = [] - else: - "We did a query without a model, lets see if there's a content_type" - results['attrs'] = dict(results['attrs']) - if 'content_type' in results['attrs']: - "Now we have to do one query per content_type" - objcache = {} - for r in results['matches']: - ct = r['attrs']['content_type'] - if ct not in objcache: - objcache[ct] = {} - objcache[ct][r['id']] = None - for ct in objcache: - queryset = ContentType.objects.get(pk=ct).model_class().objects.filter(pk__in=objcache[ct]) - for o in queryset: - objcache[ct][o.id] = o - results = [objcache[r['attrs']['content_type']][r['id']] for r in results['matches']] - else: - results = results['matches'] - self._result_cache = results - return results - -class SphinxModelManager(object): - def __init__(self, model, **kwargs): - self._model = model - self._index = kwargs.pop('index', model._meta.db_table) - self._kwargs = kwargs - - def _get_query_set(self): - return SphinxQuerySet(self._model, index=self._index, **self._kwargs) - - def get_index(self): - return self._index - - def all(self): - return self._get_query_set() - - def filter(self, **kwargs): - return self._get_query_set().filter(**kwargs) - - def query(self, *args, **kwargs): - return self._get_query_set().query(*args, **kwargs) - - def on_index(self, *args, **kwargs): - return self._get_query_set().on_index(*args, **kwargs) - - def geoanchor(self, *args, **kwargs): - return self._get_query_set().geoanchor(*args, **kwargs) - -class SphinxInstanceManager(object): - """Collection of tools useful for objects which are in a Sphinx index.""" - def __init__(self, instance, index): - self._instance = instance - self._index = index - - def update(self, **kwargs): - assert(sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring.") - sphinxapi.UpdateAttributes(index, kwargs.keys(), dict(self.instance.pk, map(to_sphinx, kwargs.values()))) - - -class SphinxSearch(object): - def __init__(self, index=None, **kwargs): - self._kwargs = kwargs - self._sphinx = None - self._index = index - self.model = None - - def __call__(self, index, **kwargs): - warnings.warn('For non-model searches use a SphinxQuerySet instance.', DeprecationWarning) - return SphinxQuerySet(index=index, **kwargs) - - def __get__(self, instance, model, **kwargs): - if instance: - return SphinxInstanceManager(instance, index) - return self._sphinx - - def contribute_to_class(self, model, name, **kwargs): - if self._index is None: - self._index = model._meta.db_table - self._sphinx = SphinxModelManager(model, index=self._index, **self._kwargs) - self.model = model - if getattr(model, '__sphinx_indexes__', None) is None: - setattr(model, '__sphinx_indexes__', [self._index]) - else: - model.__sphinx_indexes__.append(self._index) - setattr(model, name, self._sphinx) - -class SphinxRelationProxy(SphinxProxy): - def count(self): - return min(self._sphinx['attrs']['@count'], self._maxmatches) - -class SphinxRelation(SphinxSearch): - """ - Adds "related model" support to django-sphinx -- - http://code.google.com/p/django-sphinx/ - http://www.sphinxsearch.com/ - - Example -- - - class MySearch(SphinxSearch): - myrelatedobject = SphinxRelation(RelatedModel) - anotherone = SphinxRelation(AnotherModel) - ... - - class MyModel(models.Model): - search = MySearch('index') - - """ - def __init__(self, model=None, attr=None, sort='@count desc', **kwargs): - if model: - self._related_model = model - self._related_attr = attr or model.__name__.lower() - self._related_sort = sort - super(SphinxRelation, self).__init__(**kwargs) - - def __get__(self, instance, instance_model, **kwargs): - self._mode = instance._mode - self._rankmode = instance._rankmode - self._index = instance._index - self._query = instance._query - self._filters = instance._filters - self._excludes = instance._excludes - self._model = self._related_model - self._groupby = self._related_attr - self._groupsort = self._related_sort - self._groupfunc = sphinxapi.SPH_GROUPBY_ATTR - return self - - def _get_results(self): - results = self._get_sphinx_results() - if not results: return [] - if results['matches'] and self._model: - ids = [] - for r in results['matches']: - value = r['attrs']['@groupby'] - if isinstance(value, (int, long)): - ids.append(value) - else: - ids.extend() - qs = self._model.objects.filter(pk__in=set(ids)) - if self._select_related: - qs = qs.select_related(*self._select_related_fields, - **self._select_related_args) - if self._extra: - qs = qs.extra(**self._extra) - queryset = dict([(o.id, o) for o in qs]) - self.__metadata = { - 'total': results['total'], - 'total_found': results['total_found'], - 'words': results['words'], - } - results = [ SphinxRelationProxy(queryset[k['attrs']['@groupby']], k) \ - for k in results['matches'] \ - if k['attrs']['@groupby'] in queryset ] - else: - results = [] - self._result_cache = results - return results - - def _sphinx(self): - if not self.__metadata: - # We have to force execution if this is accessed beforehand - self._get_data() - return self.__metadata - _sphinx = property(_sphinx) \ No newline at end of file