Added branch 1.0.
authorMarek Stępniowski <marek@stepniowski.com>
Mon, 6 Oct 2008 22:37:34 +0000 (00:37 +0200)
committerMarek Stępniowski <marek@stepniowski.com>
Mon, 6 Oct 2008 22:37:34 +0000 (00:37 +0200)
21 files changed:
apps/catalogue/models.py
apps/djangosphinx/__init__.py [new file with mode: 0644]
apps/djangosphinx/apis/__init__.py [new file with mode: 0644]
apps/djangosphinx/apis/api263/__init__.py [new file with mode: 0644]
apps/djangosphinx/apis/api275/__init__.py [new file with mode: 0644]
apps/djangosphinx/apis/api275/templates/source-multiple.conf [new file with mode: 0644]
apps/djangosphinx/apis/api275/templates/source.conf [new file with mode: 0644]
apps/djangosphinx/apis/current.py [new file with mode: 0644]
apps/djangosphinx/constants.py [new file with mode: 0644]
apps/djangosphinx/management/__init__.py [new file with mode: 0644]
apps/djangosphinx/management/commands/__init__.py [new file with mode: 0644]
apps/djangosphinx/management/commands/generate_sphinx_config.py [new file with mode: 0644]
apps/djangosphinx/manager.py [new file with mode: 0644]
apps/djangosphinx/templates/index-multiple.conf [new file with mode: 0644]
apps/djangosphinx/templates/index.conf [new file with mode: 0644]
apps/djangosphinx/templates/source-multiple.conf [new file with mode: 0644]
apps/djangosphinx/templates/source.conf [new file with mode: 0644]
apps/djangosphinx/utils/__init__.py [new file with mode: 0644]
apps/djangosphinx/utils/config.py [new file with mode: 0644]
wolnelektury/settings.py
wolnelektury/sphinx.conf [new file with mode: 0644]

index 60d2785..9d072b4 100644 (file)
@@ -10,6 +10,7 @@ from django.core.urlresolvers import reverse
 
 from newtagging.models import TagBase
 from newtagging import managers
+import djangosphinx
 
 from librarian import html, dcparser
 
@@ -45,6 +46,8 @@ class Tag(TagBase):
     user = models.ForeignKey(User, blank=True, null=True)
     book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False)
     
+    search = djangosphinx.SphinxSearch()
+    
     def has_description(self):
         return len(self.description) > 0
     has_description.short_description = _('description')
@@ -97,6 +100,8 @@ class Book(models.Model):
     objects = models.Manager()
     tagged = managers.ModelTaggedItemManager(Tag)
     tags = managers.TagDescriptor(Tag)
+
+    search = djangosphinx.SphinxSearch()
     
     @property
     def name(self):
@@ -259,6 +264,8 @@ class Fragment(models.Model):
     tagged = managers.ModelTaggedItemManager(Tag)
     tags = managers.TagDescriptor(Tag)
     
+    search = djangosphinx.SphinxSearch()
+    
     def short_html(self):
         if len(self._short_html):
             return mark_safe(self._short_html)
diff --git a/apps/djangosphinx/__init__.py b/apps/djangosphinx/__init__.py
new file mode 100644 (file)
index 0000000..176befc
--- /dev/null
@@ -0,0 +1,35 @@
+"""
+Sphinx Search Engine ORM for Django models
+http://www.sphinxsearch.com/
+Developed and maintained David Cramer <dcramer@gmail.com>
+
+To add a search manager to your model:
+<code>
+    search = SphinxSearch([index=<string>, weight=[<int>,], mode=<string>])
+</code>
+
+To query the engine and retrieve objects:
+<code>
+    MyModel.search.query('my string')
+</code>
+
+To use multiple index support, you need to define a "content_type" field in your SQL
+clause. Each index also needs to have the exact same field's. The rules are almost identical
+to that of an SQL UNION query.
+<code>
+    SELECT id, name, 1 as content_type FROM model_myapp
+    SELECT id, name, 2 as content_type FROM model_myotherapp
+    search_results = SphinxSearch()
+    search_results.on_index('model_myapp model_myotherapp')
+    search_results.query('hello')
+</code>
+
+default settings.py values
+<code>
+    SPHINX_SERVER = 'localhost'
+    SPHINX_PORT = 3312
+</code>
+"""
+
+from manager import SearchError, ConnectionError, SphinxSearch
+from utils import generate_config_for_model, generate_config_for_models
\ No newline at end of file
diff --git a/apps/djangosphinx/apis/__init__.py b/apps/djangosphinx/apis/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/apps/djangosphinx/apis/api263/__init__.py b/apps/djangosphinx/apis/api263/__init__.py
new file mode 100644 (file)
index 0000000..d9a2d43
--- /dev/null
@@ -0,0 +1,577 @@
+#
+# $Id: sphinxapi.py,v 1.7 2007/04/01 21:38:13 shodan Exp $
+#
+# Python version of Sphinx searchd client (Python API)
+#
+# Copyright (c) 2006-2007, Andrew Aksyonoff
+# Copyright (c) 2006, Mike Osadnik
+# All rights reserved
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License. You should have
+# received a copy of the GPL license along with this program; if you
+# did not, you can find it at http://www.gnu.org/
+#
+
+import select
+import socket
+from struct import *
+
+
+# known searchd commands
+SEARCHD_COMMAND_SEARCH = 0
+SEARCHD_COMMAND_EXCERPT        = 1
+
+# current client-side command implementation versions
+VER_COMMAND_SEARCH             = 0x107
+VER_COMMAND_EXCERPT            = 0x100
+
+# known searchd status codes
+SEARCHD_OK                             = 0
+SEARCHD_ERROR                  = 1
+SEARCHD_RETRY                  = 2
+SEARCHD_WARNING                        = 3
+
+# known match modes
+SPH_MATCH_ALL                  = 0
+SPH_MATCH_ANY                  = 1
+SPH_MATCH_PHRASE               = 2
+SPH_MATCH_BOOLEAN              = 3
+SPH_MATCH_EXTENDED             = 4
+
+# known sort modes
+SPH_SORT_RELEVANCE             = 0
+SPH_SORT_ATTR_DESC             = 1
+SPH_SORT_ATTR_ASC              = 2
+SPH_SORT_TIME_SEGMENTS = 3
+SPH_SORT_EXTENDED              = 4
+
+# known attribute types
+SPH_ATTR_INTEGER               = 1
+SPH_ATTR_TIMESTAMP             = 2
+
+# known grouping functions
+SPH_GROUPBY_DAY                        = 0
+SPH_GROUPBY_WEEK               = 1
+SPH_GROUPBY_MONTH              = 2
+SPH_GROUPBY_YEAR               = 3
+SPH_GROUPBY_ATTR               = 4
+
+class SphinxClient:
+       _host           = 'localhost'                   # searchd host (default is "localhost")
+       _port           = 3312                                  # searchd port (default is 3312)
+       _offset         = 0                                             # how much records to seek from result-set start (default is 0)
+       _limit          = 20                                    # how much records to return from result-set starting at offset (default is 20)
+       _mode           = SPH_MATCH_ALL                 # query matching mode (default is SPH_MATCH_ALL)
+       _weights        = []                                    # per-field weights (default is 1 for all fields)
+       _sort           = SPH_SORT_RELEVANCE    # match sorting mode (default is SPH_SORT_RELEVANCE)
+       _sortby         = ''                                    # attribute to sort by (defualt is "")
+       _min_id         = 0                                             # min ID to match (default is 0)
+       _max_id         = 0xFFFFFFFF                    # max ID to match (default is UINT_MAX)
+       _filters        = []                                    # search filters
+       _groupby        = ''                                    # group-by attribute name
+       _groupfunc      = SPH_GROUPBY_DAY               # group-by function (to pre-process group-by attribute value with)
+       _groupsort      = '@group desc'                 # group-by sorting clause (to sort groups in result set with)
+       _maxmatches     = 1000                                  # max matches to retrieve
+       _error          = ''                                    # last error message
+       _warning        = ''                                    # last warning message
+
+
+       def __init__ (self):
+               """
+               create a new client object and fill defaults
+               """
+               pass
+
+
+       def GetLastError (self):
+               """
+               get last error message (string)
+               """
+               return self._error
+
+
+       def GetLastWarning (self):
+               """
+               get last warning message (string)
+               """
+               return self._warning
+
+
+       def SetServer (self, host, port):
+               """
+               set searchd server
+               """
+               assert(isinstance(host, str))
+               assert(isinstance(port, int))
+
+               self._host = host
+               self._port = port
+
+
+       def _Connect (self):
+               """
+               connect to searchd server
+               """
+               try:
+                       sock = socket.socket ( socket.AF_INET, socket.SOCK_STREAM )
+                       sock.connect ( ( self._host, self._port ) )
+               except socket.error, msg:
+                       if sock:
+                               sock.close()
+                       self._error = 'connection to %s:%s failed (%s)' % ( self._host, self._port, msg )
+                       return 0
+
+               v = unpack('>L', sock.recv(4))
+               if v<1:
+                       sock.close()
+                       self._error = 'expected searchd protocol version, got %s' % v
+                       return 0
+
+               # all ok, send my version
+               sock.send(pack('>L', 1))
+               return sock
+
+
+       def _GetResponse (self, sock, client_ver):
+               """
+               get and check response packet from searchd server
+               """
+               (status, ver, length) = unpack('>2HL', sock.recv(8))
+               response = ''
+               left = length
+               while left>0:
+                       chunk = sock.recv(left)
+                       if chunk:
+                               response += chunk
+                               left -= len(chunk)
+                       else:
+                               break
+
+               sock.close()
+
+               # check response
+               read = len(response)
+               if not response or read!=length:
+                       if length:
+                               self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \
+                                       % (status, ver, length, read)
+                       else:
+                               self._error = 'received zero-sized searchd response'
+                       return None
+
+               # check status
+               if status==SEARCHD_WARNING:
+                       wend = 4 + unpack ( '>L', response[0:4] )[0]
+                       self._warning = response[4:wend]
+                       return response[wend:]
+
+               if status==SEARCHD_ERROR:
+                       self._error = 'searchd error: '+response[4:]
+                       return None
+
+               if status==SEARCHD_RETRY:
+                       self._error = 'temporary searchd error: '+response[4:]
+                       return None
+
+               if status!=SEARCHD_OK:
+                       self._error = 'unknown status code %d' % status
+                       return None
+
+               # check version
+               if ver<client_ver:
+                       self._warning = 'searchd command v.%d.%d older than client\'s v.%d.%d, some options might not work' \
+                               % (ver>>8, ver&0xff, client_ver>>8, client_ver&0xff)
+
+               return response
+
+
+       def SetLimits (self, offset, limit, maxmatches=0):
+               """
+               set match offset, count, and max number to retrieve
+               """
+               assert(isinstance(offset, int) and offset>=0)
+               assert(isinstance(limit, int) and limit>0)
+               assert(maxmatches>=0)
+               self._offset = offset
+               self._limit = limit
+               if maxmatches>0:
+                       self._maxmatches = maxmatches
+
+
+       def SetMatchMode (self, mode):
+               """
+               set match mode
+               """
+               assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED])
+               self._mode = mode
+
+
+       def SetSortMode ( self, mode, clause='' ):
+               """
+               set sort mode
+               """
+               assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED] )
+               assert ( isinstance ( clause, str ) )
+               self._sort = mode
+               self._sortby = clause
+
+
+       def SetWeights (self, weights): 
+               """
+               set per-field weights
+               """
+               assert(isinstance(weights, list))
+               for w in weights:
+                       assert(isinstance(w, int))
+               self._weights = weights
+
+
+       def SetIDRange (self, minid, maxid):
+               """
+               set IDs range to match
+               only match those records where document ID
+               is beetwen minid and maxid (including minid and maxid)
+               """
+               assert(isinstance(minid, int))
+               assert(isinstance(maxid, int))
+               assert(minid<=maxid)
+               self._min_id = minid
+               self._max_id = maxid
+
+
+       def SetFilter ( self, attribute, values, exclude=0 ):
+               """
+               set values filter
+               only match those records where $attribute column values
+               are in specified set
+               """
+               assert(isinstance(attribute, str))
+               assert(isinstance(values, list))
+               assert(values)
+
+               values = map(int, values)
+
+               self._filters.append ( { 'attr':attribute, 'exclude':exclude, 'values':values } )
+
+
+       def SetFilterRange (self, attribute, min_, max_, exclude=0 ):
+               """
+               set range filter
+               only match those records where $attribute column value
+               is beetwen $min and $max (including $min and $max)
+               """
+               assert(isinstance(attribute, str))
+               assert(isinstance(min_, int))
+               assert(isinstance(max_, int))
+               assert(min_<=max_)
+
+               self._filters.append ( { 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } )
+
+
+       def SetGroupBy ( self, attribute, func, groupsort='@group desc' ):
+               """
+               set grouping attribute and function
+
+               in grouping mode, all matches are assigned to different groups
+               based on grouping function value.
+
+               each group keeps track of the total match count, and the best match
+               (in this group) according to current sorting function.
+
+               the final result set contains one best match per group, with
+               grouping function value and matches count attached.
+
+               groups in result set could be sorted by any sorting clause,
+               including both document attributes and the following special
+               internal Sphinx attributes:
+
+               - @id - match document ID;
+               - @weight, @rank, @relevance -  match weight;
+               - @group - groupby function value;
+               - @count - amount of matches in group.
+
+               the default mode is to sort by groupby value in descending order,
+               ie. by "@group desc".
+
+               "total_found" would contain total amount of matching groups over
+               the whole index.
+
+               WARNING: grouping is done in fixed memory and thus its results
+               are only approximate; so there might be more groups reported
+               in total_found than actually present. @count might also
+               be underestimated. 
+
+               for example, if sorting by relevance and grouping by "published"
+               attribute with SPH_GROUPBY_DAY function, then the result set will
+               contain one most relevant match per each day when there were any
+               matches published, with day number and per-day match count attached,
+               and sorted by day number in descending order (ie. recent days first).
+               """
+               assert(isinstance(attribute, str))
+               assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR] )
+               assert(isinstance(groupsort, str))
+
+               self._groupby = attribute
+               self._groupfunc = func
+               self._groupsort = groupsort
+
+
+       def Query (self, query, index='*'):
+               """
+               connect to searchd server and run given search query
+
+               "query" is query string
+               "index" is index name to query, default is "*" which means to query all indexes
+
+               returns false on failure
+               returns hash which has the following keys on success:
+                       "matches"
+                               an array of found matches represented as ( "id", "weight", "attrs" ) hashes
+                       "total"
+                               total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
+                       "total_found"
+                               total amount of matching documents in index
+                       "time"
+                               search time
+                       "words"
+                               an array of ( "word", "docs", "hits" ) hashes which contains
+                               docs and hits count for stemmed (!) query words
+               """
+               sock = self._Connect()
+               if not sock:
+                       return {}
+
+               # build request
+               req = [pack('>4L', self._offset, self._limit, self._mode, self._sort)]
+
+               req.append(pack('>L', len(self._sortby)))
+               req.append(self._sortby)
+
+               req.append(pack('>L', len(query)))
+               req.append(query)
+
+               req.append(pack('>L', len(self._weights)))
+               for w in self._weights:
+                       req.append(pack('>L', w))
+
+               req.append(pack('>L', len(index)))
+               req.append(index)
+               req.append(pack('>L', self._min_id))
+               req.append(pack('>L', self._max_id))
+
+               # filters
+               req.append ( pack ( '>L', len(self._filters) ) )
+               for f in self._filters:
+                       req.append ( pack ( '>L', len(f['attr']) ) )
+                       req.append ( f['attr'] )
+                       if ( 'values' in f ):
+                               req.append ( pack ( '>L', len(f['values']) ) )
+                               for v in f['values']:
+                                       req.append ( pack ( '>L', v ) )
+                       else:
+                               req.append ( pack ( '>3L', 0, f['min'], f['max'] ) )
+                       req.append ( pack ( '>L', f['exclude'] ) )
+
+               # group-by, max-matches, group-sort
+               req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) )
+               req.append ( self._groupby )
+               req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) )
+               req.append ( self._groupsort )
+
+               # send query, get response
+               req = ''.join(req)
+
+               length = len(req)
+               req = pack('>2HL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length)+req
+               sock.send(req)
+               response = self._GetResponse(sock, VER_COMMAND_SEARCH)
+               if not response:
+                       return {}
+
+               # parse response
+               result = {}
+               max_ = len(response)
+
+               # read schema
+               p = 0
+               fields = []
+               attrs = []
+
+               nfields = unpack('>L', response[p:p+4])[0]
+               p += 4
+               while nfields>0 and p<max_:
+                       nfields -= 1
+                       length = unpack('>L', response[p:p+4])[0]
+                       p += 4
+                       fields.append(response[p:p+length])
+                       p += length
+
+               result['fields'] = fields
+
+               nattrs = unpack('>L', response[p:p+4])[0]
+               p += 4
+               while nattrs>0 and p<max_:
+                       nattrs -= 1
+                       length = unpack('>L', response[p:p+4])[0]
+                       p += 4
+                       attr = response[p:p+length]
+                       p += length
+                       type_ = unpack('>L', response[p:p+4])[0]
+                       p += 4
+                       attrs.append([attr,type_])
+
+               result['attrs'] = attrs
+
+               # read match count
+               count = unpack('>L', response[p:p+4])[0]
+               p += 4
+
+               # read matches
+               result['matches'] = []
+               while count>0 and p<max_:
+                       count -= 1
+                       doc, weight = unpack('>2L', response[p:p+8])
+                       p += 8
+
+                       match = { 'id':doc, 'weight':weight, 'attrs':{} }
+                       for i in range(len(attrs)):
+                               match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0]
+                               p += 4
+
+                       result['matches'].append ( match )
+
+               result['total'], result['total_found'], result['time'], words = \
+                       unpack('>4L', response[p:p+16])
+
+               result['time'] = '%.3f' % (result['time']/1000.0)
+               p += 16
+
+               result['words'] = []
+               while words>0:
+                       words -= 1
+                       length = unpack('>L', response[p:p+4])[0]
+                       p += 4
+                       word = response[p:p+length]
+                       p += length
+                       docs, hits = unpack('>2L', response[p:p+8])
+                       p += 8
+
+                       result['words'].append({'word':word, 'docs':docs, 'hits':hits})
+
+               sock.close()
+
+               return result   
+
+
+       def BuildExcerpts (self, docs, index, words, opts=None):
+               """
+               connect to searchd server and generate exceprts from given documents
+
+               "docs" is an array of strings which represent the documents' contents
+               "index" is a string specifiying the index which settings will be used
+                       for stemming, lexing and case folding
+               "words" is a string which contains the words to highlight
+               "opts" is a hash which contains additional optional highlighting parameters:
+                       "before_match"
+                               a string to insert before a set of matching words, default is "<b>"
+                       "after_match"
+                               a string to insert after a set of matching words, default is "<b>"
+                       "chunk_separator"
+                               a string to insert between excerpts chunks, default is " ... "
+                       "limit"
+                               max excerpt size in symbols (codepoints), default is 256
+                       "around"
+                               how much words to highlight around each match, default is 5
+
+               returns false on failure
+               returns an array of string excerpts on success
+               """
+               if not opts:
+                       opts = {}
+
+               assert(isinstance(docs, list))
+               assert(isinstance(index, str))
+               assert(isinstance(words, str))
+               assert(isinstance(opts, dict))
+
+               sock = self._Connect()
+
+               if not sock:
+                       return []
+
+               # fixup options
+               opts.setdefault('before_match', '<b>')
+               opts.setdefault('after_match', '</b>')
+               opts.setdefault('chunk_separator', ' ... ')
+               opts.setdefault('limit', 256)
+               opts.setdefault('around', 5)
+
+               # build request
+               # v.1.0 req
+
+               # mode=0, flags=1 (remove spaces)
+               req = [pack('>2L', 0, 1)]
+
+               # req index
+               req.append(pack('>L', len(index)))
+               req.append(index)
+
+               # req words
+               req.append(pack('>L', len(words)))
+               req.append(words)
+
+               # options
+               req.append(pack('>L', len(opts['before_match'])))
+               req.append(opts['before_match'])
+
+               req.append(pack('>L', len(opts['after_match'])))
+               req.append(opts['after_match'])
+
+               req.append(pack('>L', len(opts['chunk_separator'])))
+               req.append(opts['chunk_separator'])
+
+               req.append(pack('>L', int(opts['limit'])))
+               req.append(pack('>L', int(opts['around'])))
+
+               # documents
+               req.append(pack('>L', len(docs)))
+               for doc in docs:
+                       assert(isinstance(doc, str))
+                       req.append(pack('>L', len(doc)))
+                       req.append(doc)
+
+               req = ''.join(req)
+
+               # send query, get response
+               length = len(req)
+
+               # add header
+               req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req
+               wrote = sock.send(req)
+
+               response = self._GetResponse(sock, VER_COMMAND_EXCERPT )
+               if not response:
+                       return []
+
+               # parse response
+               pos = 0
+               res = []
+               rlen = len(response)
+
+               for i in range(len(docs)):
+                       length = unpack('>L', response[pos:pos+4])[0]
+                       pos += 4
+
+                       if pos+length > rlen:
+                               self._error = 'incomplete reply'
+                               return []
+
+                       res.append(response[pos:pos+length])
+                       pos += length
+
+               return res
+
+#
+# $Id: sphinxapi.py,v 1.7 2007/04/01 21:38:13 shodan Exp $
+#
diff --git a/apps/djangosphinx/apis/api275/__init__.py b/apps/djangosphinx/apis/api275/__init__.py
new file mode 100644 (file)
index 0000000..236a5a2
--- /dev/null
@@ -0,0 +1,855 @@
+#
+# $Id: sphinxapi.py 1216 2008-03-14 23:25:39Z shodan $
+#
+# Python version of Sphinx searchd client (Python API)
+#
+# Copyright (c) 2006-2008, Andrew Aksyonoff
+# Copyright (c) 2006, Mike Osadnik
+# All rights reserved
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License. You should have
+# received a copy of the GPL license along with this program; if you
+# did not, you can find it at http://www.gnu.org/
+#
+
+import sys
+import select
+import socket
+from struct import *
+
+
+# known searchd commands
+SEARCHD_COMMAND_SEARCH = 0
+SEARCHD_COMMAND_EXCERPT        = 1
+SEARCHD_COMMAND_UPDATE = 2
+SEARCHD_COMMAND_KEYWORDS= 3
+
+# current client-side command implementation versions
+VER_COMMAND_SEARCH             = 0x113
+VER_COMMAND_EXCERPT            = 0x100
+VER_COMMAND_UPDATE             = 0x101
+VER_COMMAND_KEYWORDS   = 0x100
+
+# known searchd status codes
+SEARCHD_OK                             = 0
+SEARCHD_ERROR                  = 1
+SEARCHD_RETRY                  = 2
+SEARCHD_WARNING                        = 3
+
+# known match modes
+SPH_MATCH_ALL                  = 0
+SPH_MATCH_ANY                  = 1
+SPH_MATCH_PHRASE               = 2
+SPH_MATCH_BOOLEAN              = 3
+SPH_MATCH_EXTENDED             = 4
+SPH_MATCH_FULLSCAN             = 5
+SPH_MATCH_EXTENDED2            = 6
+
+# known ranking modes (extended2 mode only)
+SPH_RANK_PROXIMITY_BM25        = 0 # default mode, phrase proximity major factor and BM25 minor one
+SPH_RANK_BM25                  = 1 # statistical mode, BM25 ranking only (faster but worse quality)
+SPH_RANK_NONE                  = 2 # no ranking, all matches get a weight of 1
+SPH_RANK_WORDCOUNT             = 3 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
+
+# known sort modes
+SPH_SORT_RELEVANCE             = 0
+SPH_SORT_ATTR_DESC             = 1
+SPH_SORT_ATTR_ASC              = 2
+SPH_SORT_TIME_SEGMENTS = 3
+SPH_SORT_EXTENDED              = 4
+SPH_SORT_EXPR                  = 5
+
+# known filter types
+SPH_FILTER_VALUES              = 0
+SPH_FILTER_RANGE               = 1
+SPH_FILTER_FLOATRANGE  = 2
+
+# known attribute types
+SPH_ATTR_NONE                  = 0
+SPH_ATTR_INTEGER               = 1
+SPH_ATTR_TIMESTAMP             = 2
+SPH_ATTR_ORDINAL               = 3
+SPH_ATTR_BOOL                  = 4
+SPH_ATTR_FLOAT                 = 5
+SPH_ATTR_MULTI                 = 0X40000000L
+
+# known grouping functions
+SPH_GROUPBY_DAY                        = 0
+SPH_GROUPBY_WEEK               = 1
+SPH_GROUPBY_MONTH              = 2
+SPH_GROUPBY_YEAR               = 3
+SPH_GROUPBY_ATTR               = 4
+
+
+class SphinxClient:
+       def __init__ (self):
+               """
+               Create a new client object, and fill defaults.
+               """
+               self._host                      = 'localhost'                                   # searchd host (default is "localhost")
+               self._port                      = 3312                                                  # searchd port (default is 3312)
+               self._offset            = 0                                                             # how much records to seek from result-set start (default is 0)
+               self._limit                     = 20                                                    # how much records to return from result-set starting at offset (default is 20)
+               self._mode                      = SPH_MATCH_ALL                                 # query matching mode (default is SPH_MATCH_ALL)
+               self._weights           = []                                                    # per-field weights (default is 1 for all fields)
+               self._sort                      = SPH_SORT_RELEVANCE                    # match sorting mode (default is SPH_SORT_RELEVANCE)
+               self._sortby            = ''                                                    # attribute to sort by (defualt is "")
+               self._min_id            = 0                                                             # min ID to match (default is 0)
+               self._max_id            = 0xFFFFFFFF                                    # max ID to match (default is UINT_MAX)
+               self._filters           = []                                                    # search filters
+               self._groupby           = ''                                                    # group-by attribute name
+               self._groupfunc         = SPH_GROUPBY_DAY                               # group-by function (to pre-process group-by attribute value with)
+               self._groupsort         = '@group desc'                                 # group-by sorting clause (to sort groups in result set with)
+               self._groupdistinct     = ''                                                    # group-by count-distinct attribute
+               self._maxmatches        = 1000                                                  # max matches to retrieve
+               self._cutoff            = 0                                                             # cutoff to stop searching at
+               self._retrycount        = 0                                                             # distributed retry count
+               self._retrydelay        = 0                                                             # distributed retry delay
+               self._anchor            = {}                                                    # geographical anchor point
+               self._indexweights      = {}                                                    # per-index weights
+               self._ranker            = SPH_RANK_PROXIMITY_BM25               # ranking mode
+               self._maxquerytime      = 0                                                             # max query time, milliseconds (default is 0, do not limit)
+               self._fieldweights      = {}                                                    # per-field-name weights
+               self._error                     = ''                                                    # last error message
+               self._warning           = ''                                                    # last warning message
+               self._reqs                      = []                                                    # requests array for multi-query
+               return
+
+
+       def GetLastError (self):
+               """
+               Get last error message (string).
+               """
+               return self._error
+
+
+       def GetLastWarning (self):
+               """
+               Get last warning message (string).
+               """
+               return self._warning
+
+
+       def SetServer (self, host, port):
+               """
+               Set searchd server host and port.
+               """
+               assert(isinstance(host, str))
+               assert(isinstance(port, int))
+               self._host = host
+               self._port = port
+
+
+       def _Connect (self):
+               """
+               INTERNAL METHOD, DO NOT CALL. Connects to searchd server.
+               """
+               try:
+                       sock = socket.socket ( socket.AF_INET, socket.SOCK_STREAM )
+                       sock.connect ( ( self._host, self._port ) )
+               except socket.error, msg:
+                       if sock:
+                               sock.close()
+                       self._error = 'connection to %s:%s failed (%s)' % ( self._host, self._port, msg )
+                       return 0
+
+               v = unpack('>L', sock.recv(4))
+               if v<1:
+                       sock.close()
+                       self._error = 'expected searchd protocol version, got %s' % v
+                       return 0
+
+               # all ok, send my version
+               sock.send(pack('>L', 1))
+               return sock
+
+
+       def _GetResponse (self, sock, client_ver):
+               """
+               INTERNAL METHOD, DO NOT CALL. Gets and checks response packet from searchd server.
+               """
+               (status, ver, length) = unpack('>2HL', sock.recv(8))
+               response = ''
+               left = length
+               while left>0:
+                       chunk = sock.recv(left)
+                       if chunk:
+                               response += chunk
+                               left -= len(chunk)
+                       else:
+                               break
+
+               sock.close()
+
+               # check response
+               read = len(response)
+               if not response or read!=length:
+                       if length:
+                               self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \
+                                       % (status, ver, length, read)
+                       else:
+                               self._error = 'received zero-sized searchd response'
+                       return None
+
+               # check status
+               if status==SEARCHD_WARNING:
+                       wend = 4 + unpack ( '>L', response[0:4] )[0]
+                       self._warning = response[4:wend]
+                       return response[wend:]
+
+               if status==SEARCHD_ERROR:
+                       self._error = 'searchd error: '+response[4:]
+                       return None
+
+               if status==SEARCHD_RETRY:
+                       self._error = 'temporary searchd error: '+response[4:]
+                       return None
+
+               if status!=SEARCHD_OK:
+                       self._error = 'unknown status code %d' % status
+                       return None
+
+               # check version
+               if ver<client_ver:
+                       self._warning = 'searchd command v.%d.%d older than client\'s v.%d.%d, some options might not work' \
+                               % (ver>>8, ver&0xff, client_ver>>8, client_ver&0xff)
+
+               return response
+
+
+       def SetLimits (self, offset, limit, maxmatches=0, cutoff=0):
+               """
+               Set offset and count into result set, and optionally set max-matches and cutoff limits.
+               """
+               assert(isinstance(offset, int) and offset>=0)
+               assert(isinstance(limit, int) and limit>0)
+               assert(maxmatches>=0)
+               self._offset = offset
+               self._limit = limit
+               if maxmatches>0:
+                       self._maxmatches = maxmatches
+               if cutoff>=0:
+                       self._cutoff = cutoff
+
+
+       def SetMaxQueryTime (self, maxquerytime):
+               """
+               Set maximum query time, in milliseconds, per-index. 0 means 'do not limit'.
+               """
+               assert(isinstance(maxquerytime,int) and maxquerytime>0)
+               self._maxquerytime = maxquerytime
+
+
+       def SetMatchMode (self, mode):
+               """
+               Set matching mode.
+               """
+               assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED, SPH_MATCH_FULLSCAN, SPH_MATCH_EXTENDED2])
+               self._mode = mode
+
+
+       def SetRankingMode (self, ranker):
+               """
+               Set ranking mode.
+               """
+               assert(ranker in [SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, SPH_RANK_NONE, SPH_RANK_WORDCOUNT])
+               self._ranker = ranker
+
+
+       def SetSortMode ( self, mode, clause='' ):
+               """
+               Set sorting mode.
+               """
+               assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED, SPH_SORT_EXPR] )
+               assert ( isinstance ( clause, str ) )
+               self._sort = mode
+               self._sortby = clause
+
+
+       def SetWeights (self, weights): 
+               """
+               Set per-field weights.
+               WARNING, DEPRECATED; do not use it! use SetFieldWeights() instead
+               """
+               assert(isinstance(weights, list))
+               for w in weights:
+                       assert(isinstance(w, int))
+               self._weights = weights
+
+
+       def SetFieldWeights (self, weights):
+               """
+               Bind per-field weights by name; expects (name,field_weight) dictionary as argument.
+               """
+               assert(isinstance(weights,dict))
+               for key,val in weights.items():
+                       assert(isinstance(key,str))
+                       assert(isinstance(val,int))
+               self._fieldweights = weights
+
+
+       def SetIndexWeights (self, weights):
+               """
+               Bind per-index weights by name; expects (name,index_weight) dictionary as argument.
+               """
+               assert(isinstance(weights,dict))
+               for key,val in weights.items():
+                       assert(isinstance(key,str))
+                       assert(isinstance(val,int))
+               self._indexweights = weights
+
+
+       def SetIDRange (self, minid, maxid):
+               """
+               Set IDs range to match.
+               Only match records if document ID is beetwen $min and $max (inclusive).
+               """
+               assert(isinstance(minid, int))
+               assert(isinstance(maxid, int))
+               assert(minid<=maxid)
+               self._min_id = minid
+               self._max_id = maxid
+
+
+       def SetFilter ( self, attribute, values, exclude=0 ):
+               """
+               Set values set filter.
+               Only match records where 'attribute' value is in given 'values' set.
+               """
+               assert(isinstance(attribute, str))
+               assert(isinstance(values, list))
+               assert(values)
+
+               for value in values:
+                       assert(isinstance(value, int))
+
+               self._filters.append ( { 'type':SPH_FILTER_VALUES, 'attr':attribute, 'exclude':exclude, 'values':values } )
+
+
+       def SetFilterRange (self, attribute, min_, max_, exclude=0 ):
+               """
+               Set range filter.
+               Only match records if 'attribute' value is beetwen 'min_' and 'max_' (inclusive).
+               """
+               assert(isinstance(attribute, str))
+               assert(isinstance(min_, int))
+               assert(isinstance(max_, int))
+               assert(min_<=max_)
+
+               self._filters.append ( { 'type':SPH_FILTER_RANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } )
+
+
+       def SetFilterFloatRange (self, attribute, min_, max_, exclude=0 ):
+               assert(isinstance(attribute,str))
+               assert(isinstance(min_,float))
+               assert(isinstance(max_,float))
+               assert(min_ <= max_)
+               self._filters.append ( {'type':SPH_FILTER_FLOATRANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_} ) 
+
+
+       def SetGeoAnchor (self, attrlat, attrlong, latitude, longitude):
+               assert(isinstance(attrlat,str))
+               assert(isinstance(attrlong,str))
+               assert(isinstance(latitude,float))
+               assert(isinstance(longitude,float))
+               self._anchor['attrlat'] = attrlat
+               self._anchor['attrlong'] = attrlong
+               self._anchor['lat'] = latitude
+               self._anchor['long'] = longitude
+
+
+       def SetGroupBy ( self, attribute, func, groupsort='@group desc' ):
+               """
+               Set grouping attribute and function.
+               """
+               assert(isinstance(attribute, str))
+               assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR] )
+               assert(isinstance(groupsort, str))
+
+               self._groupby = attribute
+               self._groupfunc = func
+               self._groupsort = groupsort
+
+
+       def SetGroupDistinct (self, attribute):
+               assert(isinstance(attribute,str))
+               self._groupdistinct = attribute
+
+
+       def SetRetries (self, count, delay=0):
+               assert(isinstance(count,int) and count>=0)
+               assert(isinstance(delay,int) and delay>=0)
+               self._retrycount = count
+               self._retrydelay = delay
+
+
+       def ResetFilters (self):
+               """
+               Clear all filters (for multi-queries).
+               """
+               self._filters = []
+               self._anchor = {}
+
+
+       def ResetGroupBy (self):
+               """
+               Clear groupby settings (for multi-queries).
+               """
+               self._groupby = ''
+               self._groupfunc = SPH_GROUPBY_DAY
+               self._groupsort = '@group desc'
+               self._groupdistinct = ''
+
+
+       def Query (self, query, index='*', comment=''):
+               """
+               Connect to searchd server and run given search query.
+               Returns None on failure; result set hash on success (see documentation for details).
+               """
+               assert(len(self._reqs)==0)
+               self.AddQuery(query,index,comment)
+               results = self.RunQueries()
+
+               if not results or len(results)==0:
+                       return None
+               self._error = results[0]['error']
+               self._warning = results[0]['warning']
+               if results[0]['status'] == SEARCHD_ERROR:
+                       return None
+               return results[0]
+
+
+       def AddQuery (self, query, index='*', comment=''):
+               """
+               Add query to batch.
+               """
+               # build request
+               req = [pack('>5L', self._offset, self._limit, self._mode, self._ranker, self._sort)]
+               req.append(pack('>L', len(self._sortby)))
+               req.append(self._sortby)
+
+               if isinstance(query,unicode):
+                       query = query.encode('utf-8')
+               assert(isinstance(query,str))
+
+               req.append(pack('>L', len(query)))
+               req.append(query)
+
+               req.append(pack('>L', len(self._weights)))
+               for w in self._weights:
+                       req.append(pack('>L', w))
+               req.append(pack('>L', len(index)))
+               req.append(index)
+               req.append(pack('>L',0)) # id64 range marker FIXME! IMPLEMENT!
+               req.append(pack('>L', self._min_id))
+               req.append(pack('>L', self._max_id))
+               
+               # filters
+               req.append ( pack ( '>L', len(self._filters) ) )
+               for f in self._filters:
+                       req.append ( pack ( '>L', len(f['attr'])) + f['attr'])
+                       filtertype = f['type']
+                       req.append ( pack ( '>L', filtertype))
+                       if filtertype == SPH_FILTER_VALUES:
+                               req.append ( pack ('>L', len(f['values'])))
+                               for val in f['values']:
+                                       req.append ( pack ('>L', val))
+                       elif filtertype == SPH_FILTER_RANGE:
+                               req.append ( pack ('>2L', f['min'], f['max']))
+                       elif filtertype == SPH_FILTER_FLOATRANGE:
+                               req.append ( pack ('>2f', f['min'], f['max']))
+                       req.append ( pack ( '>L', f['exclude'] ) )
+
+               # group-by, max-matches, group-sort
+               req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) )
+               req.append ( self._groupby )
+               req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) )
+               req.append ( self._groupsort )
+               req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay)) 
+               req.append ( pack ( '>L', len(self._groupdistinct)))
+               req.append ( self._groupdistinct)
+
+               # anchor point
+               if len(self._anchor) == 0:
+                       req.append ( pack ('>L', 0))
+               else:
+                       attrlat, attrlong = self._anchor['attrlat'], self._anchor['attrlong']
+                       latitude, longitude = self._anchor['lat'], self._anchor['long']
+                       req.append ( pack ('>L', 1))
+                       req.append ( pack ('>L', len(attrlat)) + attrlat)
+                       req.append ( pack ('>L', len(attrlong)) + attrlong)
+                       req.append ( pack ('>f', latitude) + pack ('>f', longitude))
+
+               # per-index weights
+               req.append ( pack ('>L',len(self._indexweights)))
+               for indx,weight in self._indexweights.items():
+                       req.append ( pack ('>L',len(indx)) + indx + pack ('>L',weight))
+
+               # max query time
+               req.append ( pack ('>L', self._maxquerytime) ) 
+
+               # per-field weights
+               req.append ( pack ('>L',len(self._fieldweights) ) )
+               for field,weight in self._fieldweights.items():
+                       req.append ( pack ('>L',len(field)) + field + pack ('>L',weight) )
+
+               # comment
+               req.append ( pack('>L',len(comment)) + comment )
+
+               # send query, get response
+               req = ''.join(req)
+
+               self._reqs.append(req)
+               return
+
+
+       def RunQueries (self):
+               """
+               Run queries batch.
+               Returns None on network IO failure; or an array of result set hashes on success.
+               """
+               if len(self._reqs)==0:
+                       self._error = 'no queries defined, issue AddQuery() first'
+                       return None
+
+               sock = self._Connect()
+               if not sock:
+                       return None
+
+               req = ''.join(self._reqs)
+               length = len(req)+4
+               req = pack('>HHLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, len(self._reqs))+req
+               sock.send(req)
+
+               response = self._GetResponse(sock, VER_COMMAND_SEARCH)
+               if not response:
+                       return None
+
+               nreqs = len(self._reqs)
+
+               # parse response
+               max_ = len(response)
+               p = 0
+
+               results = []
+               for i in range(0,nreqs,1):
+                       result = {}
+                       result['error'] = ''
+                       result['warning'] = ''
+                       status = unpack('>L', response[p:p+4])[0]
+                       p += 4
+                       result['status'] = status
+                       if status != SEARCHD_OK:
+                               length = unpack('>L', response[p:p+4])[0]
+                               p += 4
+                               message = response[p:p+length]
+                               p += length
+
+                               if status == SEARCHD_WARNING:
+                                       result['warning'] = message
+                               else:
+                                       result['error'] = message
+                                       continue
+
+                       # read schema
+                       fields = []
+                       attrs = []
+
+                       nfields = unpack('>L', response[p:p+4])[0]
+                       p += 4
+                       while nfields>0 and p<max_:
+                               nfields -= 1
+                               length = unpack('>L', response[p:p+4])[0]
+                               p += 4
+                               fields.append(response[p:p+length])
+                               p += length
+
+                       result['fields'] = fields
+
+                       nattrs = unpack('>L', response[p:p+4])[0]
+                       p += 4
+                       while nattrs>0 and p<max_:
+                               nattrs -= 1
+                               length = unpack('>L', response[p:p+4])[0]
+                               p += 4
+                               attr = response[p:p+length]
+                               p += length
+                               type_ = unpack('>L', response[p:p+4])[0]
+                               p += 4
+                               attrs.append([attr,type_])
+
+                       result['attrs'] = attrs
+
+                       # read match count
+                       count = unpack('>L', response[p:p+4])[0]
+                       p += 4
+                       id64 = unpack('>L', response[p:p+4])[0]
+                       p += 4
+               
+                       # read matches
+                       result['matches'] = []
+                       while count>0 and p<max_:
+                               count -= 1
+                               if id64:
+                                       dochi, doc, weight = unpack('>3L', response[p:p+12])
+                                       doc += (dochi<<32)
+                                       p += 12
+                               else:
+                                       doc, weight = unpack('>2L', response[p:p+8])
+                                       p += 8
+
+                               match = { 'id':doc, 'weight':weight, 'attrs':{} }
+                               for i in range(len(attrs)):
+                                       if attrs[i][1] == SPH_ATTR_FLOAT:
+                                               match['attrs'][attrs[i][0]] = unpack('>f', response[p:p+4])[0]
+                                       elif attrs[i][1] == (SPH_ATTR_MULTI | SPH_ATTR_INTEGER):
+                                               match['attrs'][attrs[i][0]] = []
+                                               nvals = unpack('>L', response[p:p+4])[0]
+                                               p += 4
+                                               for n in range(0,nvals,1):
+                                                       match['attrs'][attrs[i][0]].append(unpack('>L', response[p:p+4])[0])
+                                                       p += 4
+                                               p -= 4
+                                       else:
+                                               match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0]
+                                       p += 4
+
+                               result['matches'].append ( match )
+
+                       result['total'], result['total_found'], result['time'], words = unpack('>4L', response[p:p+16])
+
+                       result['time'] = '%.3f' % (result['time']/1000.0)
+                       p += 16
+
+                       result['words'] = []
+                       while words>0:
+                               words -= 1
+                               length = unpack('>L', response[p:p+4])[0]
+                               p += 4
+                               word = response[p:p+length]
+                               p += length
+                               docs, hits = unpack('>2L', response[p:p+8])
+                               p += 8
+
+                               result['words'].append({'word':word, 'docs':docs, 'hits':hits})
+
+                       results.append(result)
+
+               self._reqs = []
+               sock.close()
+               return results
+       
+
+       def BuildExcerpts (self, docs, index, words, opts=None):
+               """
+               Connect to searchd server and generate exceprts from given documents.
+               """
+               if not opts:
+                       opts = {}
+               if isinstance(words,unicode):
+                       words = words.encode('utf-8')
+
+               assert(isinstance(docs, list))
+               assert(isinstance(index, str))
+               assert(isinstance(words, str))
+               assert(isinstance(opts, dict))
+
+               sock = self._Connect()
+
+               if not sock:
+                       return None
+
+               # fixup options
+               opts.setdefault('before_match', '<b>')
+               opts.setdefault('after_match', '</b>')
+               opts.setdefault('chunk_separator', ' ... ')
+               opts.setdefault('limit', 256)
+               opts.setdefault('around', 5)
+
+               # build request
+               # v.1.0 req
+
+               # mode=0, flags=1 (remove spaces)
+               req = [pack('>2L', 0, 1)]
+
+               # req index
+               req.append(pack('>L', len(index)))
+               req.append(index)
+
+               # req words
+               req.append(pack('>L', len(words)))
+               req.append(words)
+
+               # options
+               req.append(pack('>L', len(opts['before_match'])))
+               req.append(opts['before_match'])
+
+               req.append(pack('>L', len(opts['after_match'])))
+               req.append(opts['after_match'])
+
+               req.append(pack('>L', len(opts['chunk_separator'])))
+               req.append(opts['chunk_separator'])
+
+               req.append(pack('>L', int(opts['limit'])))
+               req.append(pack('>L', int(opts['around'])))
+
+               # documents
+               req.append(pack('>L', len(docs)))
+               for doc in docs:
+                       if isinstance(doc,unicode):
+                               doc = doc.encode('utf-8')
+                       assert(isinstance(doc, str))
+                       req.append(pack('>L', len(doc)))
+                       req.append(doc)
+
+               req = ''.join(req)
+
+               # send query, get response
+               length = len(req)
+
+               # add header
+               req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req
+               wrote = sock.send(req)
+
+               response = self._GetResponse(sock, VER_COMMAND_EXCERPT )
+               if not response:
+                       return []
+
+               # parse response
+               pos = 0
+               res = []
+               rlen = len(response)
+
+               for i in range(len(docs)):
+                       length = unpack('>L', response[pos:pos+4])[0]
+                       pos += 4
+
+                       if pos+length > rlen:
+                               self._error = 'incomplete reply'
+                               return []
+
+                       res.append(response[pos:pos+length])
+                       pos += length
+
+               return res
+
+
+       def UpdateAttributes ( self, index, attrs, values ):
+               """
+               Update given attribute values on given documents in given indexes.
+               Returns amount of updated documents (0 or more) on success, or -1 on failure.
+
+               'attrs' must be a list of strings.
+               'values' must be a dict with int key (document ID) and list of int values (new attribute values).
+
+               Example:
+                       res = cl.UpdateAttributes ( 'test1', [ 'group_id', 'date_added' ], { 2:[123,1000000000], 4:[456,1234567890] } )
+               """
+               assert ( isinstance ( index, str ) )
+               assert ( isinstance ( attrs, list ) )
+               assert ( isinstance ( values, dict ) )
+               for attr in attrs:
+                       assert ( isinstance ( attr, str ) )
+               for docid, entry in values.items():
+                       assert ( isinstance ( docid, int ) )
+                       assert ( isinstance ( entry, list ) )
+                       assert ( len(attrs)==len(entry) )
+                       for val in entry:
+                               assert ( isinstance ( val, int ) )
+
+               # build request
+               req = [ pack('>L',len(index)), index ]
+
+               req.append ( pack('>L',len(attrs)) )
+               for attr in attrs:
+                       req.append ( pack('>L',len(attr)) + attr )
+
+               req.append ( pack('>L',len(values)) )
+               for docid, entry in values.items():
+                       req.append ( pack('>q',docid) )
+                       for val in entry:
+                               req.append ( pack('>L',val) )
+
+               # connect, send query, get response
+               sock = self._Connect()
+               if not sock:
+                       return None
+
+               req = ''.join(req)
+               length = len(req)
+               req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req
+               wrote = sock.send ( req )
+
+               response = self._GetResponse ( sock, VER_COMMAND_UPDATE )
+               if not response:
+                       return -1
+
+               # parse response
+               updated = unpack ( '>L', response[0:4] )[0]
+               return updated
+
+
+       def BuildKeywords ( self, query, index, hits ):
+               """
+               Connect to searchd server, and generate keywords list for a given query.
+               Returns None on failure, or a list of keywords on success.
+               """
+               assert ( isinstance ( query, str ) )
+               assert ( isinstance ( index, str ) )
+               assert ( isinstance ( hits, int ) )
+
+               # build request
+               req = [ pack ( '>L', len(query) ) + query ]
+               req.append ( pack ( '>L', len(index) ) + index )
+               req.append ( pack ( '>L', hits ) )
+
+               # connect, send query, get response
+               sock = self._Connect()
+               if not sock:
+                       return None
+
+               req = ''.join(req)
+               length = len(req)
+               req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req
+               wrote = sock.send ( req )
+
+               response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS )
+               if not response:
+                       return None
+
+               # parse response
+               res = []
+
+               nwords = unpack ( '>L', response[0:4] )[0]
+               p = 4
+               max_ = len(response)
+
+               while nwords>0 and p<max_:
+                       nwords -= 1
+
+                       length = unpack ( '>L', response[p:p+4] )[0]
+                       p += 4
+                       tokenized = response[p:p+length]
+                       p += length
+
+                       length = unpack ( '>L', response[p:p+4] )[0]
+                       p += 4
+                       normalized = response[p:p+length]
+                       p += length
+
+                       entry = { 'tokenized':tokenized, 'normalized':normalized }
+                       if hits:
+                               entry['docs'], entry['hits'] = unpack ( '>2L', response[p:p+8] )
+                               p += 8
+
+                       res.append ( entry )
+
+               if nwords>0 or p>max_:
+                       self._error = 'incomplete reply'
+                       return None
+
+               return res
+#
+# $Id: sphinxapi.py 1216 2008-03-14 23:25:39Z shodan $
+#
\ No newline at end of file
diff --git a/apps/djangosphinx/apis/api275/templates/source-multiple.conf b/apps/djangosphinx/apis/api275/templates/source-multiple.conf
new file mode 100644 (file)
index 0000000..506e6f1
--- /dev/null
@@ -0,0 +1,36 @@
+source {{ source_name }}
+{
+    type                = {{ database_engine }}
+    strip_html          = 0
+    index_html_attrs    =
+    sql_host            = {{ database_host }}
+    sql_user            = {{ database_user }}
+    sql_pass            = {{ database_password }}
+    sql_db              = {{ database_name }}
+    sql_port            = {{ database_port }}
+    log                 = {{ log_file }}
+
+    sql_query_pre       =
+    sql_query_post      =
+    sql_query           = \
+{% for table_name, content_type in tables %}
+        SELECT {{ field_names|join:", " }}, {{ content_type.id }} as content_type \
+        FROM `{{ table_name }}`{% if not loop.last %} UNION \{% endif %}
+{% endfor %}
+{% if group_columns %}
+    # ForeignKey's
+{% for field_name in group_columns %}    sql_attr_uint       = {{ field_name }}
+{% endfor %}{% endif %}
+{% if date_columns %}
+    # DateField's and DateTimeField's
+{% for field_name in date_columns %}    sql_attr_timestamp   = {{ field_name }}
+{% endfor %}{% endif %}
+{% if bool_columns %}
+    # BooleanField's
+{% for field_name in bool_columns %}    sql_attr_bool        = {{ field_name }}
+{% endfor %}{% endif %}
+{% if float_columns %}
+    # FloatField's and DecimalField's
+{% for field_name in float_columns %}    sql_attr_float       = {{ field_name }}
+{% endfor %}{% endif %}
+}
\ No newline at end of file
diff --git a/apps/djangosphinx/apis/api275/templates/source.conf b/apps/djangosphinx/apis/api275/templates/source.conf
new file mode 100644 (file)
index 0000000..9108aee
--- /dev/null
@@ -0,0 +1,32 @@
+source {{ source_name }}
+{
+    type                = {{ database_engine }}
+    sql_host            = {{ database_host }}
+    sql_user            = {{ database_user }}
+    sql_pass            = {{ database_password }}
+    sql_db              = {{ database_name }}
+    sql_port            = {{ database_port }}
+
+    sql_query_pre       =
+    sql_query_post      =
+    sql_query           = \
+        SELECT {{ field_names|join:", " }} \
+        FROM {{ table_name }}
+    sql_query_info      = SELECT * FROM `{{ table_name }}` WHERE `{{ primary_key }}` = $id
+{% if group_columns %}
+    # ForeignKey's
+{% for field_name in group_columns %}    sql_attr_uint       = {{ field_name }}
+{% endfor %}{% endif %}
+{% if date_columns %}
+    # DateField's and DateTimeField's
+{% for field_name in date_columns %}    sql_attr_timestamp   = {{ field_name }}
+{% endfor %}{% endif %}
+{% if bool_columns %}
+    # BooleanField's
+{% for field_name in bool_columns %}    sql_attr_bool        = {{ field_name }}
+{% endfor %}{% endif %}
+{% if float_columns %}
+    # FloatField's and DecimalField's
+{% for field_name in float_columns %}    sql_attr_float       = {{ field_name }}
+{% endfor %}{% endif %}
+}
\ No newline at end of file
diff --git a/apps/djangosphinx/apis/current.py b/apps/djangosphinx/apis/current.py
new file mode 100644 (file)
index 0000000..e85f4ec
--- /dev/null
@@ -0,0 +1,11 @@
+from djangosphinx.constants import *
+
+try:
+    from sphinxapi import *
+except ImportError, exc:
+    name = 'djangosphinx.apis.api%d' % (SPHINX_API_VERSION,)
+    sphinxapi = __import__(name)
+    for name in name.split('.')[1:]:
+        sphinxapi = getattr(sphinxapi, name)
+    for attr in dir(sphinxapi):
+        globals()[attr] = getattr(sphinxapi, attr)
diff --git a/apps/djangosphinx/constants.py b/apps/djangosphinx/constants.py
new file mode 100644 (file)
index 0000000..976d48d
--- /dev/null
@@ -0,0 +1,7 @@
+from django.conf import settings
+
+__all__ = ('SPHINX_API_VERSION',)
+
+# 0x113 = 1.19
+# 0x107 = 1.17
+SPHINX_API_VERSION = getattr(settings, 'SPHINX_API_VERSION', 0x107)
\ No newline at end of file
diff --git a/apps/djangosphinx/management/__init__.py b/apps/djangosphinx/management/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/apps/djangosphinx/management/commands/__init__.py b/apps/djangosphinx/management/commands/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/apps/djangosphinx/management/commands/generate_sphinx_config.py b/apps/djangosphinx/management/commands/generate_sphinx_config.py
new file mode 100644 (file)
index 0000000..3320ce6
--- /dev/null
@@ -0,0 +1,22 @@
+from django.core.management.base import AppCommand
+from django.db import models
+
+from djangosphinx.manager import SphinxModelManager
+
+class Command(AppCommand):
+    help = "Prints generic configuration for any models which use a standard SphinxSearch manager."
+
+    output_transaction = True
+
+    def handle_app(self, app, **options):
+        from djangosphinx.utils.config import generate_config_for_model
+        model_classes = [getattr(app, n) for n in dir(app) if hasattr(getattr(app, n), '_meta')]
+        found = 0
+        for model in model_classes:
+            indexes = getattr(model, '__sphinx_indexes__', [])
+            for index in indexes:
+                found += 1
+                print generate_config_for_model(model, index)
+        if found == 0:
+            print "Unable to find any models in application which use standard SphinxSearch configuration."
+        #return u'\n'.join(sql_create(app, self.style)).encode('utf-8')
diff --git a/apps/djangosphinx/manager.py b/apps/djangosphinx/manager.py
new file mode 100644 (file)
index 0000000..3fbcc96
--- /dev/null
@@ -0,0 +1,648 @@
+import select
+import socket
+import time
+import struct
+import warnings
+import operator
+import apis.current as sphinxapi
+
+try:
+    import decimal
+except ImportError:
+    from django.utils import _decimal as decimal # for Python 2.3
+
+from django.db.models.query import QuerySet, Q
+from django.conf import settings
+
+__all__ = ('SearchError', 'ConnectionError', 'SphinxSearch', 'SphinxRelation')
+
+from django.contrib.contenttypes.models import ContentType
+from datetime import datetime, date
+
+# server settings
+SPHINX_SERVER           = getattr(settings, 'SPHINX_SERVER', 'localhost')
+SPHINX_PORT             = int(getattr(settings, 'SPHINX_PORT', 3312))
+
+# These require search API 275 (Sphinx 0.9.8)
+SPHINX_RETRIES          = int(getattr(settings, 'SPHINX_RETRIES', 0))
+SPHINX_RETRIES_DELAY    = int(getattr(settings, 'SPHINX_RETRIES_DELAY', 5))
+
+MAX_INT = int(2**31-1)
+
+class SearchError(Exception): pass
+class ConnectionError(Exception): pass
+
+class SphinxProxy(object):
+    """
+    Acts exactly like a normal instance of an object except that
+    it will handle any special sphinx attributes in a _sphinx class.
+    """
+    __slots__ = ('__dict__', '__instance__', '_sphinx')
+
+    def __init__(self, instance, attributes):
+        object.__setattr__(self, '__instance__', instance)
+        object.__setattr__(self, '_sphinx', attributes)
+
+    def _get_current_object(self):
+        """
+        Return the current object.  This is useful if you want the real object
+        behind the proxy at a time for performance reasons or because you want
+        to pass the object into a different context.
+        """
+        return self.__instance__
+    __current_object = property(_get_current_object)
+
+    def __dict__(self):
+        try:
+            return self.__current_object.__dict__
+        except RuntimeError:
+            return AttributeError('__dict__')
+    __dict__ = property(__dict__)
+
+    def __repr__(self):
+        try:
+            obj = self.__current_object
+        except RuntimeError:
+            return '<%s unbound>' % self.__class__.__name__
+        return repr(obj)
+
+    def __nonzero__(self):
+        try:
+            return bool(self.__current_object)
+        except RuntimeError:
+            return False
+
+    def __unicode__(self):
+        try:
+            return unicode(self.__current_oject)
+        except RuntimeError:
+            return repr(self)
+
+    def __dir__(self):
+        try:
+            return dir(self.__current_object)
+        except RuntimeError:
+            return []
+
+    def __getattr__(self, name, value=None):
+        if name == '__members__':
+            return dir(self.__current_object)
+        elif name == '_sphinx':
+            return object.__getattr__(self, '_sphinx', value)
+        return getattr(self.__current_object, name)
+
+    def __setattr__(self, name, value):
+        if name == '_sphinx':
+            return object.__setattr__(self, '_sphinx', value)
+        return setattr(self.__current_object, name, value)
+
+    def __setitem__(self, key, value):
+        self.__current_object[key] = value
+
+    def __delitem__(self, key):
+        del self.__current_object[key]
+
+    def __setslice__(self, i, j, seq):
+        self.__current_object[i:j] = seq
+
+    def __delslice__(self, i, j):
+        del self.__current_object[i:j]
+
+    __delattr__ = lambda x, n: delattr(x.__current_object, n)
+    __str__ = lambda x: str(x.__current_object)
+    __unicode__ = lambda x: unicode(x.__current_object)
+    __lt__ = lambda x, o: x.__current_object < o
+    __le__ = lambda x, o: x.__current_object <= o
+    __eq__ = lambda x, o: x.__current_object == o
+    __ne__ = lambda x, o: x.__current_object != o
+    __gt__ = lambda x, o: x.__current_object > o
+    __ge__ = lambda x, o: x.__current_object >= o
+    __cmp__ = lambda x, o: cmp(x.__current_object, o)
+    __hash__ = lambda x: hash(x.__current_object)
+    # attributes are currently not callable
+    # __call__ = lambda x, *a, **kw: x.__current_object(*a, **kw)
+    __len__ = lambda x: len(x.__current_object)
+    __getitem__ = lambda x, i: x.__current_object[i]
+    __iter__ = lambda x: iter(x.__current_object)
+    __contains__ = lambda x, i: i in x.__current_object
+    __getslice__ = lambda x, i, j: x.__current_object[i:j]
+    __add__ = lambda x, o: x.__current_object + o
+    __sub__ = lambda x, o: x.__current_object - o
+    __mul__ = lambda x, o: x.__current_object * o
+    __floordiv__ = lambda x, o: x.__current_object // o
+    __mod__ = lambda x, o: x.__current_object % o
+    __divmod__ = lambda x, o: x.__current_object.__divmod__(o)
+    __pow__ = lambda x, o: x.__current_object ** o
+    __lshift__ = lambda x, o: x.__current_object << o
+    __rshift__ = lambda x, o: x.__current_object >> o
+    __and__ = lambda x, o: x.__current_object & o
+    __xor__ = lambda x, o: x.__current_object ^ o
+    __or__ = lambda x, o: x.__current_object | o
+    __div__ = lambda x, o: x.__current_object.__div__(o)
+    __truediv__ = lambda x, o: x.__current_object.__truediv__(o)
+    __neg__ = lambda x: -(x.__current_object)
+    __pos__ = lambda x: +(x.__current_object)
+    __abs__ = lambda x: abs(x.__current_object)
+    __invert__ = lambda x: ~(x.__current_object)
+    __complex__ = lambda x: complex(x.__current_object)
+    __int__ = lambda x: int(x.__current_object)
+    __long__ = lambda x: long(x.__current_object)
+    __float__ = lambda x: float(x.__current_object)
+    __oct__ = lambda x: oct(x.__current_object)
+    __hex__ = lambda x: hex(x.__current_object)
+    __index__ = lambda x: x.__current_object.__index__()
+    __coerce__ = lambda x, o: x.__coerce__(x, o)
+    __enter__ = lambda x: x.__enter__()
+    __exit__ = lambda x, *a, **kw: x.__exit__(*a, **kw)
+
+def to_sphinx(value):
+    "Convert a value into a sphinx query value"
+    if isinstance(value, date) or isinstance(value, datetime):
+        return int(time.mktime(value.timetuple()))
+    elif isinstance(value, decimal.Decimal) or isinstance(value, float):
+        return float(value)
+    return int(value)
+
+class SphinxQuerySet(object):
+    available_kwargs = ('rankmode', 'mode', 'weights', 'maxmatches')
+    
+    def __init__(self, model=None, **kwargs):
+        self._select_related        = False
+        self._select_related_args   = {}
+        self._select_related_fields = []
+        self._filters               = {}
+        self._excludes              = {}
+        self._extra                 = {}
+        self._query                 = ''
+        self.__metadata             = None
+        self._offset                = 0
+        self._limit                 = 20
+
+        self._groupby               = None
+        self._sort                  = None
+        self._weights               = [1, 100]
+
+        self._maxmatches            = 1000
+        self._result_cache          = None
+        self._mode                  = sphinxapi.SPH_MATCH_ALL
+        self._rankmode              = getattr(sphinxapi, 'SPH_RANK_PROXIMITY_BM25', None)
+        self._model                 = model
+        self._anchor                = {}
+        self.__metadata             = {}
+        
+        self.set_options(**kwargs)
+
+        if model:
+            self._index             = kwargs.get('index', model._meta.db_table)
+        else:
+            self._index             = kwargs.get('index')
+
+    def __repr__(self):
+        if self._result_cache is not None:
+            return repr(self._get_data())
+        else:
+            return '<%s instance>' % (self.__class__.__name__,)
+
+    def __len__(self):
+        return len(self._get_data())
+        
+    def __iter__(self):
+        return iter(self._get_data())
+    
+    def __getitem__(self, k):
+        if not isinstance(k, (slice, int, long)):
+            raise TypeError
+        assert (not isinstance(k, slice) and (k >= 0)) \
+            or (isinstance(k, slice) and (k.start is None or k.start >= 0) and (k.stop is None or k.stop >= 0)), \
+            "Negative indexing is not supported."
+        if type(k) == slice:
+            if self._offset < k.start or k.stop-k.start > self._limit:
+                self._result_cache = None
+        else:
+            if k not in range(self._offset, self._limit+self._offset):
+                self._result_cache = None
+        if self._result_cache is None:
+            if type(k) == slice:
+                self._offset = k.start
+                self._limit = k.stop-k.start
+                return self._get_results()
+            else:
+                self._offset = k
+                self._limit = 1
+                return self._get_results()[0]
+        else:
+            return self._result_cache[k]
+
+    def set_options(self, **kwargs):
+        if 'rankmode' in kwargs:
+            if kwargs.get('rankmode') is None:
+                kwargs['rankmode'] = sphinxapi.SPH_RANK_NONE
+        for key in self.available_kwargs:
+            if key in kwargs:
+                setattr(self, '_%s' % (key,), kwargs[key])
+
+    def query(self, string):
+        return self._clone(_query=unicode(string).encode('utf-8'))
+
+    def group_by(self, attribute, func, groupsort='@group desc'):
+        return self._clone(_groupby=attribute, _groupfunc=func, _groupsort=groupsort)
+
+    def rank_none(self):
+        warnings.warn('`rank_none()` is deprecated. Use `set_options(rankmode=None)` instead.', DeprecationWarning)
+        return self._clone(_rankmode=sphinxapi.SPH_RANK_NONE)
+
+    def mode(self, mode):
+        warnings.warn('`mode()` is deprecated. Use `set_options(mode='')` instead.', DeprecationWarning)
+        return self._clone(_mode=mode)
+
+    def weights(self, weights):
+        warnings.warn('`mode()` is deprecated. Use `set_options(weights=[])` instead.', DeprecationWarning)
+        return self._clone(_weights=weights)
+
+    def on_index(self, index):
+        warnings.warn('`mode()` is deprecated. Use `set_options(on_index=foo)` instead.', DeprecationWarning)
+        return self._clone(_index=index)
+
+    # only works on attributes
+    def filter(self, **kwargs):
+        filters = self._filters.copy()
+        for k,v in kwargs.iteritems():
+            if hasattr(v, 'next'):
+                v = list(v)
+            elif not (isinstance(v, list) or isinstance(v, tuple)):
+                 v = [v,]
+            filters.setdefault(k, []).extend(map(to_sphinx, v))
+        return self._clone(_filters=filters)
+
+    def geoanchor(self, lat_attr, lng_attr, lat, lng):
+        assert(sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring.")
+        return self._clone(_anchor=(lat_attr, lng_attr, float(lat), float(lng)))
+
+    # this actually does nothing, its just a passthru to
+    # keep things looking/working generally the same
+    def all(self):
+        return self
+
+    # only works on attributes
+    def exclude(self, **kwargs):
+        filters = self._excludes.copy()
+        for k,v in kwargs.iteritems():
+            if hasattr(v, 'next'):
+                v = list(v)
+            elif not (isinstance(v, list) or isinstance(v, tuple)):
+                 v = [v,]
+            filters.setdefault(k, []).extend(map(to_sphinx, v))
+        return self._clone(_excludes=filters)
+
+    # you cannot order by @weight (it always orders in descending)
+    # keywords are @id, @weight, @rank, and @relevance
+    def order_by(self, *args):
+        sort_by = []
+        for arg in args:
+            sort = 'ASC'
+            if arg[0] == '-':
+                arg = arg[1:]
+                sort = 'DESC'
+            if arg == 'id':
+                arg = '@id'
+            sort_by.append('%s %s' % (arg, sort))
+        if sort_by:
+            return self._clone(_sort=(sphinxapi.SPH_SORT_EXTENDED, ', '.join(sort_by)))
+        return self
+                    
+    # pass these thru on the queryset and let django handle it
+    def select_related(self, *args, **kwargs):
+        _args = self._select_related_fields[:]
+        _args.extend(args)
+        _kwargs = self._select_related_args.copy()
+        _kwargs.update(kwargs)
+        
+        return self._clone(
+            _select_related=True,
+            _select_related_fields=_args,
+            _select_related_args=_kwargs,
+        )
+    
+    def extra(self, **kwargs):
+        extra = self._extra.copy()
+        extra.update(kwargs)
+        return self._clone(_extra=extra)
+
+    def count(self):
+        return min(self._sphinx.get('total_found', 0), self._maxmatches)
+
+    def reset(self):
+        return self.__class__(self._model, self._index)
+
+    # Internal methods
+    def _clone(self, **kwargs):
+        # Clones the queryset passing any changed args
+        c = self.__class__()
+        c.__dict__.update(self.__dict__)
+        c.__dict__.update(kwargs)
+        return c
+    
+    def _sphinx(self):
+        if not self.__metadata:
+            # We have to force execution if this is accessed beforehand
+            self._get_data()
+        return self.__metadata
+    _sphinx = property(_sphinx)
+
+    def _get_data(self):
+        assert(self._index)
+        # need to find a way to make this work yet
+        if self._result_cache is None:
+            self._result_cache = list(self._get_results())
+        return self._result_cache
+
+    def _get_sphinx_results(self):
+        assert(self._offset + self._limit <= self._maxmatches)
+
+        client = sphinxapi.SphinxClient()
+        client.SetServer(SPHINX_SERVER, SPHINX_PORT)
+
+        if self._sort:
+            client.SetSortMode(*self._sort)
+        
+        if isinstance(self._weights, dict):
+            client.SetFieldWeights(self._weights)
+        else:
+            # assume its a list
+            client.SetWeights(map(int, self._weights))
+        
+        client.SetMatchMode(self._mode)
+
+        # 0.97 requires you to reset it
+        if hasattr(client, 'ResetFilters'):
+             client.ResetFilters()
+        if hasattr(client, 'ResetGroupBy'):
+             client.ResetGroupBy()
+        
+        def _handle_filters(filter_list, exclude=False):
+            for name, values in filter_list.iteritems():
+                parts = len(name.split('__'))
+                if parts > 2:
+                    raise NotImplementedError, 'Related object and/or multiple field lookups not supported'
+                elif parts == 2:
+                    # The float handling for __gt and __lt is kind of ugly..
+                    name, lookup = name.split('__', 1)
+                    is_float = isinstance(values[0], float)
+                    if lookup == 'gt':
+                        value = is_float and values[0] + (1.0/MAX_INT) or values[0] - 1
+                        args = (name, value, MAX_INT, exclude)
+                    elif lookup == 'gte':
+                        args = (name, values[0], MAX_INT, exclude)
+                    elif lookup == 'lt':
+                        value = is_float and values[0] - (1.0/MAX_INT) or values[0] - 1
+                        args = (name, -MAX_INT, value, exclude)
+                    elif lookup == 'lte':
+                        args = (name, -MAX_INT, values[0], exclude)
+                    elif lookup == 'range':
+                        args = (name, values[0], values[1], exclude)
+                    else:
+                        raise NotImplementedError, 'Related object and/or field lookup "%s" not supported' % lookup
+                    if is_float:
+                        client.SetFilterFloatRange(*args)
+                    elif not exclude and self._model and name == self._model._meta.pk.column:
+                        client.SetIDRange(*args[1:3])
+                    else:
+                        client.SetFilterRange(*args)
+
+                else:
+                    client.SetFilter(name, values, exclude)
+
+        # Include filters
+        if self._filters:
+            _handle_filters(self._filters)
+
+        # Exclude filters
+        if self._excludes:
+            _handle_filters(self._excludes, True)
+        
+        if self._groupby:
+            client.SetGroupBy(self._groupby, self._groupfunc, self._groupsort)
+
+        if self._anchor:
+            client.SetGeoAnchor(*self._anchor)
+
+        if self._rankmode:
+            client.SetRankingMode(self._rankmode)
+
+        if not self._limit > 0:
+            # Fix for Sphinx throwing an assertion error when you pass it an empty limiter
+            return []
+        
+
+        if sphinxapi.VER_COMMAND_SEARCH >= 0x113:
+            client.SetRetries(SPHINX_RETRIES, SPHINX_RETRIES_DELAY)
+        
+        client.SetLimits(int(self._offset), int(self._limit), int(self._maxmatches))
+        
+        results = client.Query(self._query, self._index)
+        
+        # The Sphinx API doesn't raise exceptions
+        if not results:
+            if client.GetLastError():
+                raise SearchError, client.GetLastError()
+            elif client.GetLastWarning():
+                raise SearchError, client.GetLastWarning()
+        return results
+
+    def _get_results(self):
+        results = self._get_sphinx_results()
+        if not results or not results['matches']:
+            results = []
+        elif self._model:
+            queryset = self._model.objects.all()
+            if self._select_related:
+                queryset = queryset.select_related(*self._select_related_fields, **self._select_related_args)
+            if self._extra:
+                queryset = queryset.extra(**self._extra)
+            pks = getattr(self._model._meta, 'pks', None)
+            if pks is None or len(pks) == 1:
+                queryset = queryset.filter(pk__in=[r['id'] for r in results['matches']])
+                queryset = dict([(o.pk, o) for o in queryset])
+            else:
+                for r in results['matches']:
+                    r['id'] = ', '.join([unicode(r['attrs'][p.column]) for p in pks])
+                q = reduce(operator.or_, [reduce(operator.and_, [Q(**{p.name: r['attrs'][p.column]}) for p in pks]) for r in results['matches']])
+                if q:
+                    queryset = queryset.filter(q)
+                    queryset = dict([(', '.join([unicode(p) for p in o.pks]), o) for o in queryset])
+                else:
+                    queryset = None
+        
+            if queryset:
+                self.__metadata = {
+                    'total': results['total'],
+                    'total_found': results['total_found'],
+                    'words': results['words'],
+                }
+                results = [SphinxProxy(queryset[r['id']], r) for r in results['matches'] if r['id'] in queryset]
+            else:
+                results = []
+        else:
+            "We did a query without a model, lets see if there's a content_type"
+            results['attrs'] = dict(results['attrs'])
+            if 'content_type' in results['attrs']:
+                "Now we have to do one query per content_type"
+                objcache = {}
+                for r in results['matches']:
+                    ct = r['attrs']['content_type']
+                    if ct not in objcache:
+                        objcache[ct] = {}
+                    objcache[ct][r['id']] = None
+                for ct in objcache:
+                    queryset = ContentType.objects.get(pk=ct).model_class().objects.filter(pk__in=objcache[ct])
+                    for o in queryset:
+                        objcache[ct][o.id] = o
+                results = [objcache[r['attrs']['content_type']][r['id']] for r in results['matches']]
+            else:
+                results = results['matches']
+        self._result_cache = results
+        return results
+
+class SphinxModelManager(object):
+    def __init__(self, model, **kwargs):
+        self._model = model
+        self._index = kwargs.pop('index', model._meta.db_table)
+        self._kwargs = kwargs
+    
+    def _get_query_set(self):
+        return SphinxQuerySet(self._model, index=self._index, **self._kwargs)
+    
+    def get_index(self):
+        return self._index
+    
+    def all(self):
+        return self._get_query_set()
+    
+    def filter(self, **kwargs):
+        return self._get_query_set().filter(**kwargs)
+    
+    def query(self, *args, **kwargs):
+        return self._get_query_set().query(*args, **kwargs)
+
+    def on_index(self, *args, **kwargs):
+        return self._get_query_set().on_index(*args, **kwargs)
+
+    def geoanchor(self, *args, **kwargs):
+        return self._get_query_set().geoanchor(*args, **kwargs)
+
+class SphinxInstanceManager(object):
+    """Collection of tools useful for objects which are in a Sphinx index."""
+    def __init__(self, instance, index):
+        self._instance = instance
+        self._index = index
+        
+    def update(self, **kwargs):
+        assert(sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring.")
+        sphinxapi.UpdateAttributes(index, kwargs.keys(), dict(self.instance.pk, map(to_sphinx, kwargs.values())))
+
+
+class SphinxSearch(object):
+    def __init__(self, index=None, **kwargs):
+        self._kwargs = kwargs
+        self._sphinx = None
+        self._index = index
+        self.model = None
+        
+    def __call__(self, index, **kwargs):
+        warnings.warn('For non-model searches use a SphinxQuerySet instance.', DeprecationWarning)
+        return SphinxQuerySet(index=index, **kwargs)
+        
+    def __get__(self, instance, model, **kwargs):
+        if instance:
+            return SphinxInstanceManager(instance, index)
+        return self._sphinx
+
+    def contribute_to_class(self, model, name, **kwargs):
+        if self._index is None:
+            self._index = model._meta.db_table
+        self._sphinx = SphinxModelManager(model, index=self._index, **self._kwargs)
+        self.model = model
+        if getattr(model, '__sphinx_indexes__', None) is None:
+            setattr(model, '__sphinx_indexes__', [self._index])
+        else:
+            model.__sphinx_indexes__.append(self._index)
+        setattr(model, name, self._sphinx)
+
+class SphinxRelationProxy(SphinxProxy):
+    def count(self):
+        return min(self._sphinx['attrs']['@count'], self._maxmatches)
+    
+class SphinxRelation(SphinxSearch):
+    """
+    Adds "related model" support to django-sphinx --
+    http://code.google.com/p/django-sphinx/
+    http://www.sphinxsearch.com/
+    
+    Example --
+    
+    class MySearch(SphinxSearch):
+        myrelatedobject = SphinxRelation(RelatedModel)
+        anotherone = SphinxRelation(AnotherModel)
+        ...
+    
+    class MyModel(models.Model):
+        search = MySearch('index')
+    
+    """
+    def __init__(self, model=None, attr=None, sort='@count desc', **kwargs):
+        if model:
+            self._related_model = model
+            self._related_attr = attr or model.__name__.lower()
+            self._related_sort = sort
+        super(SphinxRelation, self).__init__(**kwargs)
+        
+    def __get__(self, instance, instance_model, **kwargs):
+        self._mode = instance._mode
+        self._rankmode = instance._rankmode
+        self._index = instance._index
+        self._query = instance._query
+        self._filters = instance._filters
+        self._excludes = instance._excludes
+        self._model = self._related_model
+        self._groupby = self._related_attr
+        self._groupsort = self._related_sort
+        self._groupfunc = sphinxapi.SPH_GROUPBY_ATTR
+        return self
+
+    def _get_results(self):
+        results = self._get_sphinx_results()
+        if not results: return []
+        if results['matches'] and self._model:
+            ids = []
+            for r in results['matches']:
+                value = r['attrs']['@groupby']
+                if isinstance(value, (int, long)):
+                    ids.append(value)
+                else:
+                    ids.extend()
+            qs = self._model.objects.filter(pk__in=set(ids))
+            if self._select_related:
+                qs = qs.select_related(*self._select_related_fields,
+                                       **self._select_related_args)
+            if self._extra:
+                qs = qs.extra(**self._extra)
+            queryset = dict([(o.id, o) for o in qs])
+            self.__metadata = {
+                'total': results['total'],
+                'total_found': results['total_found'],
+                'words': results['words'],
+            }
+            results = [ SphinxRelationProxy(queryset[k['attrs']['@groupby']], k) \
+                        for k in results['matches'] \
+                        if k['attrs']['@groupby'] in queryset ]
+        else:
+            results = []
+        self._result_cache = results
+        return results
+
+    def _sphinx(self):
+        if not self.__metadata:
+            # We have to force execution if this is accessed beforehand
+            self._get_data()
+        return self.__metadata
+    _sphinx = property(_sphinx)
\ No newline at end of file
diff --git a/apps/djangosphinx/templates/index-multiple.conf b/apps/djangosphinx/templates/index-multiple.conf
new file mode 100644 (file)
index 0000000..3516e46
--- /dev/null
@@ -0,0 +1,12 @@
+index {{ index_name }}
+{
+    source          = {{ source_name }}
+    path            = /var/data/{{ index_name }}
+    docinfo         = extern
+    morphology      = none
+    stopwords       =
+    min_word_len    = 2
+    charset_type    = sbcs
+    min_prefix_len  = 0
+    min_infix_len   = 0
+}
\ No newline at end of file
diff --git a/apps/djangosphinx/templates/index.conf b/apps/djangosphinx/templates/index.conf
new file mode 100644 (file)
index 0000000..3516e46
--- /dev/null
@@ -0,0 +1,12 @@
+index {{ index_name }}
+{
+    source          = {{ source_name }}
+    path            = /var/data/{{ index_name }}
+    docinfo         = extern
+    morphology      = none
+    stopwords       =
+    min_word_len    = 2
+    charset_type    = sbcs
+    min_prefix_len  = 0
+    min_infix_len   = 0
+}
\ No newline at end of file
diff --git a/apps/djangosphinx/templates/source-multiple.conf b/apps/djangosphinx/templates/source-multiple.conf
new file mode 100644 (file)
index 0000000..6f525dc
--- /dev/null
@@ -0,0 +1,31 @@
+source {{ source_name }}
+{
+    type                = {{ database_engine }}
+    html_strip          = 0
+    html_index_attrs    =
+    sql_host            = {{ database_host }}
+    sql_user            = {{ database_user }}
+    sql_pass            = {{ database_password }}
+    sql_db              = {{ database_name }}
+    sql_port            = {{ database_port }}
+
+    sql_query_pre       =
+    sql_query_post      =
+    sql_query           = \
+{% for table_name, content_type in tables %}
+        SELECT {{ field_names|join:", " }}, {{ content_type.id }} as content_type \
+        FROM `{{ table_name }}`{% if not loop.last %} UNION \{% endif %}
+{% endfor %}
+{% if group_columns %}
+    # ForeignKey's
+{% for field_name in group_columns %}    sql_group_column    = {{ field_name }}
+{% endfor %}{% endif %}
+{% if bool_columns %}
+    # BooleanField's
+{% for field_name in bool_columns %}    sql_group_column    = {{ field_name }}
+{% endfor %}{% endif %}
+{% if date_columns %}
+    # DateField's and DateTimeField's
+{% for field_name in date_columns %}    sql_date_column     = {{ field_name }}
+{% endfor %}{% endif %}
+}
\ No newline at end of file
diff --git a/apps/djangosphinx/templates/source.conf b/apps/djangosphinx/templates/source.conf
new file mode 100644 (file)
index 0000000..a991f64
--- /dev/null
@@ -0,0 +1,31 @@
+source {{ source_name }}
+{
+    type                = {{ database_engine }}
+    strip_html          = 0
+    index_html_attrs    =
+    sql_host            = {{ database_host }}
+    sql_user            = {{ database_user }}
+    sql_pass            = {{ database_password }}
+    sql_db              = {{ database_name }}
+    sql_port            = {{ database_port }}
+    log                 = {{ log_file }}
+
+    sql_query_pre       =
+    sql_query_post      =
+    sql_query           = \
+        SELECT {{ field_names|join:", " }} \
+        FROM {{ table_name }}
+    sql_query_info      = SELECT * FROM `{{ table_name }}` WHERE `{{ primary_key }}` = $id
+{% if group_columns %}
+    # ForeignKey's
+{% for field_name in group_columns %}    sql_group_column    = {{ field_name }}
+{% endfor %}{% endif %}
+{% if bool_columns %}
+    # BooleanField's
+{% for field_name in bool_columns %}    sql_group_column     = {{ field_name }}
+{% endfor %}{% endif %}
+{% if date_columns %}
+    # DateField's and DateTimeField's
+{% for field_name in date_columns %}    sql_date_column     = {{ field_name }}
+{% endfor %}{% endif %}
+}
\ No newline at end of file
diff --git a/apps/djangosphinx/utils/__init__.py b/apps/djangosphinx/utils/__init__.py
new file mode 100644 (file)
index 0000000..635cf56
--- /dev/null
@@ -0,0 +1 @@
+from config import *
\ No newline at end of file
diff --git a/apps/djangosphinx/utils/config.py b/apps/djangosphinx/utils/config.py
new file mode 100644 (file)
index 0000000..d73abd1
--- /dev/null
@@ -0,0 +1,182 @@
+from django.conf import settings
+from django.template import Template, Context
+
+from django.db import models
+from django.contrib.contenttypes.models import ContentType
+
+import os.path
+
+import djangosphinx.apis.current as sphinxapi
+
+__all__ = ('generate_config_for_model', 'generate_config_for_models')
+
+def _get_database_engine():
+    if settings.DATABASE_ENGINE == 'mysql':
+        return settings.DATABASE_ENGINE
+    elif settings.DATABASE_ENGINE.startswith('postgresql'):
+        return 'pgsql'
+    raise ValueError, "Only MySQL and PostgreSQL engines are supported by Sphinx."
+
+def _get_template(name):
+    paths = (
+        os.path.join(os.path.dirname(__file__), '../apis/api%s/templates/' % (sphinxapi.VER_COMMAND_SEARCH,)),
+        os.path.join(os.path.dirname(__file__), '../templates/'),
+    )
+    for path in paths:
+        try:
+            fp = open(path + name, 'r')
+        except IOError:
+            continue
+        try:
+            t = Template(fp.read())
+            return t
+        finally:
+            fp.close()
+    raise ValueError, "Template matching name does not exist: %s." % (name,)
+
+def _is_sourcable_field(field):
+    # We can use float fields in 0.98
+    if sphinxapi.VER_COMMAND_SEARCH >= 0x113 and (isinstance(field, models.FloatField) or isinstance(field, models.DecimalField)):
+        return True
+    if isinstance(field, models.ForeignKey):
+        return True
+    if isinstance(field, models.IntegerField) and field.choices:
+        return True
+    if not field.rel:
+        return True
+    return False
+
+# No trailing slashes on paths
+DEFAULT_SPHINX_PARAMS = {
+    'database_engine': _get_database_engine(),
+    'database_host': settings.DATABASE_HOST,
+    'database_port': settings.DATABASE_PORT,
+    'database_name': settings.DATABASE_NAME,
+    'database_user': settings.DATABASE_USER,
+    'database_password': settings.DATABASE_PASSWORD,
+    'log_file': '/var/log/sphinx/searchd.log',
+    'data_path': '/var/data',
+}
+
+# Generate for single models
+
+def generate_config_for_model(model_class, index=None, sphinx_params={}):
+    """
+    Generates a sample configuration including an index and source for
+    the given model which includes all attributes and date fields.
+    """
+    return generate_source_for_model(model_class, index, sphinx_params) + "\n\n" + generate_index_for_model(model_class, index, sphinx_params)
+
+def generate_index_for_model(model_class, index=None, sphinx_params={}):
+    """Generates a source configmration for a model."""
+    t = _get_template('index.conf')
+    
+    if index is None:
+        index = model_class._meta.db_table
+    
+    params = DEFAULT_SPHINX_PARAMS
+    params.update(sphinx_params)
+    params.update({
+        'index_name': index,
+        'source_name': index,
+    })
+    
+    c = Context(params)
+    
+    return t.render(c)
+    
+
+def generate_source_for_model(model_class, index=None, sphinx_params={}):
+    """Generates a source configmration for a model."""
+    t = _get_template('source.conf')
+    
+    valid_fields = [f for f in model_class._meta.fields if _is_sourcable_field(f)]
+    
+    # Hackish solution for a bug I've introduced into composite pks branch
+    pk = model_class._meta.get_field(model_class._meta.pk.name)
+    
+    if pk not in valid_fields:
+        valid_fields.insert(0, model_class._meta.pk)
+    
+    if index is None:
+        index = model_class._meta.db_table
+    
+    params = DEFAULT_SPHINX_PARAMS
+    params.update(sphinx_params)
+    params.update({
+        'source_name': index,
+        'index_name': index,
+        'table_name': index,
+        'primary_key': pk.column,
+        'field_names': [f.column for f in valid_fields],
+        'group_columns': [f.column for f in valid_fields if (f.rel or isinstance(f, models.BooleanField) or isinstance(f, models.IntegerField)) and not f.primary_key],
+        'date_columns': [f.column for f in valid_fields if isinstance(f, models.DateTimeField) or isinstance(f, models.DateField)],
+        'float_columns': [f.column for f in valid_fields if isinstance(f, models.FloatField) or isinstance(f, models.DecimalField)],
+    })
+    
+    c = Context(params)
+    
+    return t.render(c)
+    
+# Generate for multiple models (search UNIONs)
+
+def generate_config_for_models(model_classes, index=None, sphinx_params={}):
+    """
+    Generates a sample configuration including an index and source for
+    the given model which includes all attributes and date fields.
+    """
+    return generate_source_for_models(model_classes, index, sphinx_params) + "\n\n" + generate_index_for_models(model_classes, index, sphinx_params)
+
+def generate_index_for_models(model_classes, index=None, sphinx_params={}):
+    """Generates a source configmration for a model."""
+    t = _get_template('index-multiple.conf')
+    
+    if index is None:
+        index = '_'.join(m._meta.db_table for m in model_classes)
+    
+    params = DEFAULT_SPHINX_PARAMS
+    params.update(sphinx_params)
+    params.update({
+        'index_name': index,
+        'source_name': index,
+    })
+    
+    c = Context(params)
+    
+    return t.render(c)
+
+def generate_source_for_models(model_classes, index=None, sphinx_params={}):
+    """Generates a source configmration for a model."""
+    t = _get_template('source-multiple.conf')
+    
+    # We need to loop through each model and find only the fields that exist *exactly* the
+    # same across models.
+    def _the_tuple(f):
+        return (f.__class__, f.column, getattr(f.rel, 'to', None), f.choices)
+    
+    valid_fields = [_the_tuple(f) for f in model_classes[0]._meta.fields if _is_sourcable_field(f)]
+    for model_class in model_classes[1:]:
+        valid_fields = [_the_tuple(f) for f in model_class._meta.fields if _the_tuple(f) in valid_fields]
+    
+    tables = []
+    for model_class in model_classes:
+        tables.append((model_class._meta.db_table, ContentType.objects.get_for_model(model_class)))
+    
+    if index is None:
+        index = '_'.join(m._meta.db_table for m in model_classes)
+    
+    params = DEFAULT_SPHINX_PARAMS
+    params.update(sphinx_params)
+    params.update({
+        'tables': tables,
+        'source_name': index,
+        'index_name': index,
+        'field_names': [f[1] for f in valid_fields],
+        'group_columns': [f[1] for f in valid_fields if f[2] or isinstance(f[0], models.BooleanField) or isinstance(f[0], models.IntegerField)],
+        'date_columns': [f[1] for f in valid_fields if issubclass(f[0], models.DateTimeField) or issubclass(f[0], models.DateField)],
+        'float_columns': [f[1] for f in valid_fields if isinstance(f[0], models.FloatField) or isinstance(f[0], models.DecimalField)],
+    })
+    
+    c = Context(params)
+    
+    return t.render(c)
\ No newline at end of file
index bdbc992..2b9d542 100644 (file)
@@ -96,6 +96,7 @@ INSTALLED_APPS = [
     
     # external
     'south',
+    'djangosphinx',
     'newtagging',
     'pagination',
     'chunks',
@@ -135,6 +136,9 @@ COMPRESS_JS = {
 
 COMPRESS_CSS_FILTERS = None
 
+SPHINX_SERVER = 'localhost'
+SPHINX_PORT = 3312
+
 
 # Load localsettings, if they exist
 try:
diff --git a/wolnelektury/sphinx.conf b/wolnelektury/sphinx.conf
new file mode 100644 (file)
index 0000000..80c4112
--- /dev/null
@@ -0,0 +1,127 @@
+searchd {
+       port                                    = 3312
+       log                                             = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/searchd.log
+       query_log                               = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/query.log
+       read_timeout                    = 5
+       max_children                    = 30
+       pid_file                                = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/searchd.pid
+       max_matches                             = 1000
+       seamless_rotate                 = 1
+       preopen_indexes                 = 0
+       unlink_old                              = 1
+}
+
+source catalogue_book
+{
+    type                = mysql
+    strip_html          = 0
+    index_html_attrs    =
+    sql_host            = 
+    sql_user            = root
+    sql_pass            = 
+    sql_db              = wolnelektury
+    sql_port            = 
+
+    sql_query_pre       =
+    sql_query_post      =
+    sql_query           = \
+        SELECT id, title, slug, description, created_at, _short_html, parent_number, xml_file, html_file, pdf_file, odt_file, txt_file, parent_id \
+        FROM catalogue_book
+    sql_query_info      = SELECT * FROM `catalogue_book` WHERE `id` = $id
+
+    # ForeignKey's
+    sql_group_column    = parent_number
+    sql_group_column    = parent_id
+
+
+
+    # DateField's and DateTimeField's
+    sql_date_column     = created_at
+
+}
+
+index catalogue_book
+{
+    source          = catalogue_book
+    path            = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/catalogue_book
+    docinfo         = extern
+    morphology      = none
+    stopwords       =
+    min_word_len    = 2
+    charset_type    = sbcs
+    min_prefix_len  = 0
+    min_infix_len   = 0
+}
+
+
+source catalogue_fragment
+{
+    type                = mysql
+    strip_html          = 0
+    index_html_attrs    =
+    sql_host            = 
+    sql_user            = root
+    sql_pass            = 
+    sql_db              = wolnelektury
+    sql_port            = 
+
+    sql_query_pre       =
+    sql_query_post      =
+    sql_query           = \
+        SELECT id, text, short_text, _short_html, anchor, book_id \
+        FROM catalogue_fragment
+    sql_query_info      = SELECT * FROM `catalogue_fragment` WHERE `id` = $id
+
+    # ForeignKey's
+    sql_group_column    = book_id
+}
+
+index catalogue_fragment
+{
+    source          = catalogue_fragment
+    path            = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/catalogue_fragment
+    docinfo         = extern
+    morphology      = none
+    stopwords       =
+    min_word_len    = 2
+    charset_type    = sbcs
+    min_prefix_len  = 0
+    min_infix_len   = 0
+}
+
+source catalogue_tag
+{
+    type                = mysql
+    strip_html          = 0
+    index_html_attrs    =
+    sql_host            = 
+    sql_user            = root
+    sql_pass            = 
+    sql_db              = wolnelektury
+    sql_port            = 
+
+    sql_query_pre       =
+    sql_query_post      =
+    sql_query           = \
+        SELECT id, name, slug, sort_key, category, description, main_page, user_id, book_count \
+        FROM catalogue_tag
+    sql_query_info      = SELECT * FROM `catalogue_tag` WHERE `id` = $id
+
+    # ForeignKey's
+    sql_group_column    = main_page
+    sql_group_column    = user_id
+    sql_group_column    = book_count
+}
+
+index catalogue_tag
+{
+    source          = catalogue_tag
+    path            = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/catalogue_tag
+    docinfo         = extern
+    morphology      = none
+    stopwords       =
+    min_word_len    = 2
+    charset_type    = sbcs
+    min_prefix_len  = 0
+    min_infix_len   = 0
+}