Cleaned branch 1.0.

[wolnelektury.git] / apps / djangosphinx / manager.py
diff --git a/apps/djangosphinx/manager.py b/apps/djangosphinx/manager.py

deleted file mode 100644 (file)

index 3fbcc96..0000000
--- a/apps/djangosphinx/manager.py
+++ /dev/null
@@ -1,648 +0,0 @@
-import select
-import socket
-import time
-import struct
-import warnings
-import operator
-import apis.current as sphinxapi
-
-try:
-    import decimal
-except ImportError:
-    from django.utils import _decimal as decimal # for Python 2.3
-
-from django.db.models.query import QuerySet, Q
-from django.conf import settings
-
-__all__ = ('SearchError', 'ConnectionError', 'SphinxSearch', 'SphinxRelation')
-
-from django.contrib.contenttypes.models import ContentType
-from datetime import datetime, date
-
-# server settings
-SPHINX_SERVER           = getattr(settings, 'SPHINX_SERVER', 'localhost')
-SPHINX_PORT             = int(getattr(settings, 'SPHINX_PORT', 3312))
-
-# These require search API 275 (Sphinx 0.9.8)
-SPHINX_RETRIES          = int(getattr(settings, 'SPHINX_RETRIES', 0))
-SPHINX_RETRIES_DELAY    = int(getattr(settings, 'SPHINX_RETRIES_DELAY', 5))
-
-MAX_INT = int(2**31-1)
-
-class SearchError(Exception): pass
-class ConnectionError(Exception): pass
-
-class SphinxProxy(object):
-    """
-    Acts exactly like a normal instance of an object except that
-    it will handle any special sphinx attributes in a _sphinx class.
-    """
-    __slots__ = ('__dict__', '__instance__', '_sphinx')
-
-    def __init__(self, instance, attributes):
-        object.__setattr__(self, '__instance__', instance)
-        object.__setattr__(self, '_sphinx', attributes)
-
-    def _get_current_object(self):
-        """
-        Return the current object.  This is useful if you want the real object
-        behind the proxy at a time for performance reasons or because you want
-        to pass the object into a different context.
-        """
-        return self.__instance__
-    __current_object = property(_get_current_object)
-
-    def __dict__(self):
-        try:
-            return self.__current_object.__dict__
-        except RuntimeError:
-            return AttributeError('__dict__')
-    __dict__ = property(__dict__)
-
-    def __repr__(self):
-        try:
-            obj = self.__current_object
-        except RuntimeError:
-            return '<%s unbound>' % self.__class__.__name__
-        return repr(obj)
-
-    def __nonzero__(self):
-        try:
-            return bool(self.__current_object)
-        except RuntimeError:
-            return False
-
-    def __unicode__(self):
-        try:
-            return unicode(self.__current_oject)
-        except RuntimeError:
-            return repr(self)
-
-    def __dir__(self):
-        try:
-            return dir(self.__current_object)
-        except RuntimeError:
-            return []
-
-    def __getattr__(self, name, value=None):
-        if name == '__members__':
-            return dir(self.__current_object)
-        elif name == '_sphinx':
-            return object.__getattr__(self, '_sphinx', value)
-        return getattr(self.__current_object, name)
-
-    def __setattr__(self, name, value):
-        if name == '_sphinx':
-            return object.__setattr__(self, '_sphinx', value)
-        return setattr(self.__current_object, name, value)
-
-    def __setitem__(self, key, value):
-        self.__current_object[key] = value
-
-    def __delitem__(self, key):
-        del self.__current_object[key]
-
-    def __setslice__(self, i, j, seq):
-        self.__current_object[i:j] = seq
-
-    def __delslice__(self, i, j):
-        del self.__current_object[i:j]
-
-    __delattr__ = lambda x, n: delattr(x.__current_object, n)
-    __str__ = lambda x: str(x.__current_object)
-    __unicode__ = lambda x: unicode(x.__current_object)
-    __lt__ = lambda x, o: x.__current_object < o
-    __le__ = lambda x, o: x.__current_object <= o
-    __eq__ = lambda x, o: x.__current_object == o
-    __ne__ = lambda x, o: x.__current_object != o
-    __gt__ = lambda x, o: x.__current_object > o
-    __ge__ = lambda x, o: x.__current_object >= o
-    __cmp__ = lambda x, o: cmp(x.__current_object, o)
-    __hash__ = lambda x: hash(x.__current_object)
-    # attributes are currently not callable
-    # __call__ = lambda x, *a, **kw: x.__current_object(*a, **kw)
-    __len__ = lambda x: len(x.__current_object)
-    __getitem__ = lambda x, i: x.__current_object[i]
-    __iter__ = lambda x: iter(x.__current_object)
-    __contains__ = lambda x, i: i in x.__current_object
-    __getslice__ = lambda x, i, j: x.__current_object[i:j]
-    __add__ = lambda x, o: x.__current_object + o
-    __sub__ = lambda x, o: x.__current_object - o
-    __mul__ = lambda x, o: x.__current_object * o
-    __floordiv__ = lambda x, o: x.__current_object // o
-    __mod__ = lambda x, o: x.__current_object % o
-    __divmod__ = lambda x, o: x.__current_object.__divmod__(o)
-    __pow__ = lambda x, o: x.__current_object ** o
-    __lshift__ = lambda x, o: x.__current_object << o
-    __rshift__ = lambda x, o: x.__current_object >> o
-    __and__ = lambda x, o: x.__current_object & o
-    __xor__ = lambda x, o: x.__current_object ^ o
-    __or__ = lambda x, o: x.__current_object | o
-    __div__ = lambda x, o: x.__current_object.__div__(o)
-    __truediv__ = lambda x, o: x.__current_object.__truediv__(o)
-    __neg__ = lambda x: -(x.__current_object)
-    __pos__ = lambda x: +(x.__current_object)
-    __abs__ = lambda x: abs(x.__current_object)
-    __invert__ = lambda x: ~(x.__current_object)
-    __complex__ = lambda x: complex(x.__current_object)
-    __int__ = lambda x: int(x.__current_object)
-    __long__ = lambda x: long(x.__current_object)
-    __float__ = lambda x: float(x.__current_object)
-    __oct__ = lambda x: oct(x.__current_object)
-    __hex__ = lambda x: hex(x.__current_object)
-    __index__ = lambda x: x.__current_object.__index__()
-    __coerce__ = lambda x, o: x.__coerce__(x, o)
-    __enter__ = lambda x: x.__enter__()
-    __exit__ = lambda x, *a, **kw: x.__exit__(*a, **kw)
-
-def to_sphinx(value):
-    "Convert a value into a sphinx query value"
-    if isinstance(value, date) or isinstance(value, datetime):
-        return int(time.mktime(value.timetuple()))
-    elif isinstance(value, decimal.Decimal) or isinstance(value, float):
-        return float(value)
-    return int(value)
-
-class SphinxQuerySet(object):
-    available_kwargs = ('rankmode', 'mode', 'weights', 'maxmatches')
-    
-    def __init__(self, model=None, **kwargs):
-        self._select_related        = False
-        self._select_related_args   = {}
-        self._select_related_fields = []
-        self._filters               = {}
-        self._excludes              = {}
-        self._extra                 = {}
-        self._query                 = ''
-        self.__metadata             = None
-        self._offset                = 0
-        self._limit                 = 20
-
-        self._groupby               = None
-        self._sort                  = None
-        self._weights               = [1, 100]
-
-        self._maxmatches            = 1000
-        self._result_cache          = None
-        self._mode                  = sphinxapi.SPH_MATCH_ALL
-        self._rankmode              = getattr(sphinxapi, 'SPH_RANK_PROXIMITY_BM25', None)
-        self._model                 = model
-        self._anchor                = {}
-        self.__metadata             = {}
-        
-        self.set_options(**kwargs)
-
-        if model:
-            self._index             = kwargs.get('index', model._meta.db_table)
-        else:
-            self._index             = kwargs.get('index')
-
-    def __repr__(self):
-        if self._result_cache is not None:
-            return repr(self._get_data())
-        else:
-            return '<%s instance>' % (self.__class__.__name__,)
-
-    def __len__(self):
-        return len(self._get_data())
-        
-    def __iter__(self):
-        return iter(self._get_data())
-    
-    def __getitem__(self, k):
-        if not isinstance(k, (slice, int, long)):
-            raise TypeError
-        assert (not isinstance(k, slice) and (k >= 0)) \
-            or (isinstance(k, slice) and (k.start is None or k.start >= 0) and (k.stop is None or k.stop >= 0)), \
-            "Negative indexing is not supported."
-        if type(k) == slice:
-            if self._offset < k.start or k.stop-k.start > self._limit:
-                self._result_cache = None
-        else:
-            if k not in range(self._offset, self._limit+self._offset):
-                self._result_cache = None
-        if self._result_cache is None:
-            if type(k) == slice:
-                self._offset = k.start
-                self._limit = k.stop-k.start
-                return self._get_results()
-            else:
-                self._offset = k
-                self._limit = 1
-                return self._get_results()[0]
-        else:
-            return self._result_cache[k]
-
-    def set_options(self, **kwargs):
-        if 'rankmode' in kwargs:
-            if kwargs.get('rankmode') is None:
-                kwargs['rankmode'] = sphinxapi.SPH_RANK_NONE
-        for key in self.available_kwargs:
-            if key in kwargs:
-                setattr(self, '_%s' % (key,), kwargs[key])
-
-    def query(self, string):
-        return self._clone(_query=unicode(string).encode('utf-8'))
-
-    def group_by(self, attribute, func, groupsort='@group desc'):
-        return self._clone(_groupby=attribute, _groupfunc=func, _groupsort=groupsort)
-
-    def rank_none(self):
-        warnings.warn('`rank_none()` is deprecated. Use `set_options(rankmode=None)` instead.', DeprecationWarning)
-        return self._clone(_rankmode=sphinxapi.SPH_RANK_NONE)
-
-    def mode(self, mode):
-        warnings.warn('`mode()` is deprecated. Use `set_options(mode='')` instead.', DeprecationWarning)
-        return self._clone(_mode=mode)
-
-    def weights(self, weights):
-        warnings.warn('`mode()` is deprecated. Use `set_options(weights=[])` instead.', DeprecationWarning)
-        return self._clone(_weights=weights)
-
-    def on_index(self, index):
-        warnings.warn('`mode()` is deprecated. Use `set_options(on_index=foo)` instead.', DeprecationWarning)
-        return self._clone(_index=index)
-
-    # only works on attributes
-    def filter(self, **kwargs):
-        filters = self._filters.copy()
-        for k,v in kwargs.iteritems():
-            if hasattr(v, 'next'):
-                v = list(v)
-            elif not (isinstance(v, list) or isinstance(v, tuple)):
-                 v = [v,]
-            filters.setdefault(k, []).extend(map(to_sphinx, v))
-        return self._clone(_filters=filters)
-
-    def geoanchor(self, lat_attr, lng_attr, lat, lng):
-        assert(sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring.")
-        return self._clone(_anchor=(lat_attr, lng_attr, float(lat), float(lng)))
-
-    # this actually does nothing, its just a passthru to
-    # keep things looking/working generally the same
-    def all(self):
-        return self
-
-    # only works on attributes
-    def exclude(self, **kwargs):
-        filters = self._excludes.copy()
-        for k,v in kwargs.iteritems():
-            if hasattr(v, 'next'):
-                v = list(v)
-            elif not (isinstance(v, list) or isinstance(v, tuple)):
-                 v = [v,]
-            filters.setdefault(k, []).extend(map(to_sphinx, v))
-        return self._clone(_excludes=filters)
-
-    # you cannot order by @weight (it always orders in descending)
-    # keywords are @id, @weight, @rank, and @relevance
-    def order_by(self, *args):
-        sort_by = []
-        for arg in args:
-            sort = 'ASC'
-            if arg[0] == '-':
-                arg = arg[1:]
-                sort = 'DESC'
-            if arg == 'id':
-                arg = '@id'
-            sort_by.append('%s %s' % (arg, sort))
-        if sort_by:
-            return self._clone(_sort=(sphinxapi.SPH_SORT_EXTENDED, ', '.join(sort_by)))
-        return self
-                    
-    # pass these thru on the queryset and let django handle it
-    def select_related(self, *args, **kwargs):
-        _args = self._select_related_fields[:]
-        _args.extend(args)
-        _kwargs = self._select_related_args.copy()
-        _kwargs.update(kwargs)
-        
-        return self._clone(
-            _select_related=True,
-            _select_related_fields=_args,
-            _select_related_args=_kwargs,
-        )
-    
-    def extra(self, **kwargs):
-        extra = self._extra.copy()
-        extra.update(kwargs)
-        return self._clone(_extra=extra)
-
-    def count(self):
-        return min(self._sphinx.get('total_found', 0), self._maxmatches)
-
-    def reset(self):
-        return self.__class__(self._model, self._index)
-
-    # Internal methods
-    def _clone(self, **kwargs):
-        # Clones the queryset passing any changed args
-        c = self.__class__()
-        c.__dict__.update(self.__dict__)
-        c.__dict__.update(kwargs)
-        return c
-    
-    def _sphinx(self):
-        if not self.__metadata:
-            # We have to force execution if this is accessed beforehand
-            self._get_data()
-        return self.__metadata
-    _sphinx = property(_sphinx)
-
-    def _get_data(self):
-        assert(self._index)
-        # need to find a way to make this work yet
-        if self._result_cache is None:
-            self._result_cache = list(self._get_results())
-        return self._result_cache
-
-    def _get_sphinx_results(self):
-        assert(self._offset + self._limit <= self._maxmatches)
-
-        client = sphinxapi.SphinxClient()
-        client.SetServer(SPHINX_SERVER, SPHINX_PORT)
-
-        if self._sort:
-            client.SetSortMode(*self._sort)
-        
-        if isinstance(self._weights, dict):
-            client.SetFieldWeights(self._weights)
-        else:
-            # assume its a list
-            client.SetWeights(map(int, self._weights))
-        
-        client.SetMatchMode(self._mode)
-
-        # 0.97 requires you to reset it
-        if hasattr(client, 'ResetFilters'):
-             client.ResetFilters()
-        if hasattr(client, 'ResetGroupBy'):
-             client.ResetGroupBy()
-        
-        def _handle_filters(filter_list, exclude=False):
-            for name, values in filter_list.iteritems():
-                parts = len(name.split('__'))
-                if parts > 2:
-                    raise NotImplementedError, 'Related object and/or multiple field lookups not supported'
-                elif parts == 2:
-                    # The float handling for __gt and __lt is kind of ugly..
-                    name, lookup = name.split('__', 1)
-                    is_float = isinstance(values[0], float)
-                    if lookup == 'gt':
-                        value = is_float and values[0] + (1.0/MAX_INT) or values[0] - 1
-                        args = (name, value, MAX_INT, exclude)
-                    elif lookup == 'gte':
-                        args = (name, values[0], MAX_INT, exclude)
-                    elif lookup == 'lt':
-                        value = is_float and values[0] - (1.0/MAX_INT) or values[0] - 1
-                        args = (name, -MAX_INT, value, exclude)
-                    elif lookup == 'lte':
-                        args = (name, -MAX_INT, values[0], exclude)
-                    elif lookup == 'range':
-                        args = (name, values[0], values[1], exclude)
-                    else:
-                        raise NotImplementedError, 'Related object and/or field lookup "%s" not supported' % lookup
-                    if is_float:
-                        client.SetFilterFloatRange(*args)
-                    elif not exclude and self._model and name == self._model._meta.pk.column:
-                        client.SetIDRange(*args[1:3])
-                    else:
-                        client.SetFilterRange(*args)
-
-                else:
-                    client.SetFilter(name, values, exclude)
-
-        # Include filters
-        if self._filters:
-            _handle_filters(self._filters)
-
-        # Exclude filters
-        if self._excludes:
-            _handle_filters(self._excludes, True)
-        
-        if self._groupby:
-            client.SetGroupBy(self._groupby, self._groupfunc, self._groupsort)
-
-        if self._anchor:
-            client.SetGeoAnchor(*self._anchor)
-
-        if self._rankmode:
-            client.SetRankingMode(self._rankmode)
-
-        if not self._limit > 0:
-            # Fix for Sphinx throwing an assertion error when you pass it an empty limiter
-            return []
-        
-
-        if sphinxapi.VER_COMMAND_SEARCH >= 0x113:
-            client.SetRetries(SPHINX_RETRIES, SPHINX_RETRIES_DELAY)
-        
-        client.SetLimits(int(self._offset), int(self._limit), int(self._maxmatches))
-        
-        results = client.Query(self._query, self._index)
-        
-        # The Sphinx API doesn't raise exceptions
-        if not results:
-            if client.GetLastError():
-                raise SearchError, client.GetLastError()
-            elif client.GetLastWarning():
-                raise SearchError, client.GetLastWarning()
-        return results
-
-    def _get_results(self):
-        results = self._get_sphinx_results()
-        if not results or not results['matches']:
-            results = []
-        elif self._model:
-            queryset = self._model.objects.all()
-            if self._select_related:
-                queryset = queryset.select_related(*self._select_related_fields, **self._select_related_args)
-            if self._extra:
-                queryset = queryset.extra(**self._extra)
-            pks = getattr(self._model._meta, 'pks', None)
-            if pks is None or len(pks) == 1:
-                queryset = queryset.filter(pk__in=[r['id'] for r in results['matches']])
-                queryset = dict([(o.pk, o) for o in queryset])
-            else:
-                for r in results['matches']:
-                    r['id'] = ', '.join([unicode(r['attrs'][p.column]) for p in pks])
-                q = reduce(operator.or_, [reduce(operator.and_, [Q(**{p.name: r['attrs'][p.column]}) for p in pks]) for r in results['matches']])
-                if q:
-                    queryset = queryset.filter(q)
-                    queryset = dict([(', '.join([unicode(p) for p in o.pks]), o) for o in queryset])
-                else:
-                    queryset = None
-        
-            if queryset:
-                self.__metadata = {
-                    'total': results['total'],
-                    'total_found': results['total_found'],
-                    'words': results['words'],
-                }
-                results = [SphinxProxy(queryset[r['id']], r) for r in results['matches'] if r['id'] in queryset]
-            else:
-                results = []
-        else:
-            "We did a query without a model, lets see if there's a content_type"
-            results['attrs'] = dict(results['attrs'])
-            if 'content_type' in results['attrs']:
-                "Now we have to do one query per content_type"
-                objcache = {}
-                for r in results['matches']:
-                    ct = r['attrs']['content_type']
-                    if ct not in objcache:
-                        objcache[ct] = {}
-                    objcache[ct][r['id']] = None
-                for ct in objcache:
-                    queryset = ContentType.objects.get(pk=ct).model_class().objects.filter(pk__in=objcache[ct])
-                    for o in queryset:
-                        objcache[ct][o.id] = o
-                results = [objcache[r['attrs']['content_type']][r['id']] for r in results['matches']]
-            else:
-                results = results['matches']
-        self._result_cache = results
-        return results
-
-class SphinxModelManager(object):
-    def __init__(self, model, **kwargs):
-        self._model = model
-        self._index = kwargs.pop('index', model._meta.db_table)
-        self._kwargs = kwargs
-    
-    def _get_query_set(self):
-        return SphinxQuerySet(self._model, index=self._index, **self._kwargs)
-    
-    def get_index(self):
-        return self._index
-    
-    def all(self):
-        return self._get_query_set()
-    
-    def filter(self, **kwargs):
-        return self._get_query_set().filter(**kwargs)
-    
-    def query(self, *args, **kwargs):
-        return self._get_query_set().query(*args, **kwargs)
-
-    def on_index(self, *args, **kwargs):
-        return self._get_query_set().on_index(*args, **kwargs)
-
-    def geoanchor(self, *args, **kwargs):
-        return self._get_query_set().geoanchor(*args, **kwargs)
-
-class SphinxInstanceManager(object):
-    """Collection of tools useful for objects which are in a Sphinx index."""
-    def __init__(self, instance, index):
-        self._instance = instance
-        self._index = index
-        
-    def update(self, **kwargs):
-        assert(sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring.")
-        sphinxapi.UpdateAttributes(index, kwargs.keys(), dict(self.instance.pk, map(to_sphinx, kwargs.values())))
-
-
-class SphinxSearch(object):
-    def __init__(self, index=None, **kwargs):
-        self._kwargs = kwargs
-        self._sphinx = None
-        self._index = index
-        self.model = None
-        
-    def __call__(self, index, **kwargs):
-        warnings.warn('For non-model searches use a SphinxQuerySet instance.', DeprecationWarning)
-        return SphinxQuerySet(index=index, **kwargs)
-        
-    def __get__(self, instance, model, **kwargs):
-        if instance:
-            return SphinxInstanceManager(instance, index)
-        return self._sphinx
-
-    def contribute_to_class(self, model, name, **kwargs):
-        if self._index is None:
-            self._index = model._meta.db_table
-        self._sphinx = SphinxModelManager(model, index=self._index, **self._kwargs)
-        self.model = model
-        if getattr(model, '__sphinx_indexes__', None) is None:
-            setattr(model, '__sphinx_indexes__', [self._index])
-        else:
-            model.__sphinx_indexes__.append(self._index)
-        setattr(model, name, self._sphinx)
-
-class SphinxRelationProxy(SphinxProxy):
-    def count(self):
-        return min(self._sphinx['attrs']['@count'], self._maxmatches)
-    
-class SphinxRelation(SphinxSearch):
-    """
-    Adds "related model" support to django-sphinx --
-    http://code.google.com/p/django-sphinx/
-    http://www.sphinxsearch.com/
-    
-    Example --
-    
-    class MySearch(SphinxSearch):
-        myrelatedobject = SphinxRelation(RelatedModel)
-        anotherone = SphinxRelation(AnotherModel)
-        ...
-    
-    class MyModel(models.Model):
-        search = MySearch('index')
-    
-    """
-    def __init__(self, model=None, attr=None, sort='@count desc', **kwargs):
-        if model:
-            self._related_model = model
-            self._related_attr = attr or model.__name__.lower()
-            self._related_sort = sort
-        super(SphinxRelation, self).__init__(**kwargs)
-        
-    def __get__(self, instance, instance_model, **kwargs):
-        self._mode = instance._mode
-        self._rankmode = instance._rankmode
-        self._index = instance._index
-        self._query = instance._query
-        self._filters = instance._filters
-        self._excludes = instance._excludes
-        self._model = self._related_model
-        self._groupby = self._related_attr
-        self._groupsort = self._related_sort
-        self._groupfunc = sphinxapi.SPH_GROUPBY_ATTR
-        return self
-
-    def _get_results(self):
-        results = self._get_sphinx_results()
-        if not results: return []
-        if results['matches'] and self._model:
-            ids = []
-            for r in results['matches']:
-                value = r['attrs']['@groupby']
-                if isinstance(value, (int, long)):
-                    ids.append(value)
-                else:
-                    ids.extend()
-            qs = self._model.objects.filter(pk__in=set(ids))
-            if self._select_related:
-                qs = qs.select_related(*self._select_related_fields,
-                                       **self._select_related_args)
-            if self._extra:
-                qs = qs.extra(**self._extra)
-            queryset = dict([(o.id, o) for o in qs])
-            self.__metadata = {
-                'total': results['total'],
-                'total_found': results['total_found'],
-                'words': results['words'],
-            }
-            results = [ SphinxRelationProxy(queryset[k['attrs']['@groupby']], k) \
-                        for k in results['matches'] \
-                        if k['attrs']['@groupby'] in queryset ]
-        else:
-            results = []
-        self._result_cache = results
-        return results
-
-    def _sphinx(self):
-        if not self.__metadata:
-            # We have to force execution if this is accessed beforehand
-            self._get_data()
-        return self.__metadata
-    _sphinx = property(_sphinx)
-\ No newline at end of file