From: Marek Stępniowski Date: Mon, 6 Oct 2008 22:49:33 +0000 (+0200) Subject: Cleaned branch 1.0. X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/6ab5e576c0bccd88cc5d4e6924373a5e393b0fa1 Cleaned branch 1.0. --- diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index 9d072b44f..61e78a00f 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -10,7 +10,6 @@ from django.core.urlresolvers import reverse from newtagging.models import TagBase from newtagging import managers -import djangosphinx from librarian import html, dcparser @@ -46,8 +45,6 @@ class Tag(TagBase): user = models.ForeignKey(User, blank=True, null=True) book_count = models.IntegerField(_('book count'), default=0, blank=False, null=False) - search = djangosphinx.SphinxSearch() - def has_description(self): return len(self.description) > 0 has_description.short_description = _('description') @@ -101,7 +98,6 @@ class Book(models.Model): tagged = managers.ModelTaggedItemManager(Tag) tags = managers.TagDescriptor(Tag) - search = djangosphinx.SphinxSearch() @property def name(self): @@ -264,8 +260,6 @@ class Fragment(models.Model): tagged = managers.ModelTaggedItemManager(Tag) tags = managers.TagDescriptor(Tag) - search = djangosphinx.SphinxSearch() - def short_html(self): if len(self._short_html): return mark_safe(self._short_html) diff --git a/apps/djangosphinx/__init__.py b/apps/djangosphinx/__init__.py deleted file mode 100644 index 176befc5a..000000000 --- a/apps/djangosphinx/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Sphinx Search Engine ORM for Django models -http://www.sphinxsearch.com/ -Developed and maintained David Cramer - -To add a search manager to your model: - - search = SphinxSearch([index=, weight=[,], mode=]) - - -To query the engine and retrieve objects: - - MyModel.search.query('my string') - - -To use multiple index support, you need to define a "content_type" field in your SQL -clause. Each index also needs to have the exact same field's. The rules are almost identical -to that of an SQL UNION query. - - SELECT id, name, 1 as content_type FROM model_myapp - SELECT id, name, 2 as content_type FROM model_myotherapp - search_results = SphinxSearch() - search_results.on_index('model_myapp model_myotherapp') - search_results.query('hello') - - -default settings.py values - - SPHINX_SERVER = 'localhost' - SPHINX_PORT = 3312 - -""" - -from manager import SearchError, ConnectionError, SphinxSearch -from utils import generate_config_for_model, generate_config_for_models \ No newline at end of file diff --git a/apps/djangosphinx/apis/__init__.py b/apps/djangosphinx/apis/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/apps/djangosphinx/apis/api263/__init__.py b/apps/djangosphinx/apis/api263/__init__.py deleted file mode 100644 index d9a2d43f7..000000000 --- a/apps/djangosphinx/apis/api263/__init__.py +++ /dev/null @@ -1,577 +0,0 @@ -# -# $Id: sphinxapi.py,v 1.7 2007/04/01 21:38:13 shodan Exp $ -# -# Python version of Sphinx searchd client (Python API) -# -# Copyright (c) 2006-2007, Andrew Aksyonoff -# Copyright (c) 2006, Mike Osadnik -# All rights reserved -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License. You should have -# received a copy of the GPL license along with this program; if you -# did not, you can find it at http://www.gnu.org/ -# - -import select -import socket -from struct import * - - -# known searchd commands -SEARCHD_COMMAND_SEARCH = 0 -SEARCHD_COMMAND_EXCERPT = 1 - -# current client-side command implementation versions -VER_COMMAND_SEARCH = 0x107 -VER_COMMAND_EXCERPT = 0x100 - -# known searchd status codes -SEARCHD_OK = 0 -SEARCHD_ERROR = 1 -SEARCHD_RETRY = 2 -SEARCHD_WARNING = 3 - -# known match modes -SPH_MATCH_ALL = 0 -SPH_MATCH_ANY = 1 -SPH_MATCH_PHRASE = 2 -SPH_MATCH_BOOLEAN = 3 -SPH_MATCH_EXTENDED = 4 - -# known sort modes -SPH_SORT_RELEVANCE = 0 -SPH_SORT_ATTR_DESC = 1 -SPH_SORT_ATTR_ASC = 2 -SPH_SORT_TIME_SEGMENTS = 3 -SPH_SORT_EXTENDED = 4 - -# known attribute types -SPH_ATTR_INTEGER = 1 -SPH_ATTR_TIMESTAMP = 2 - -# known grouping functions -SPH_GROUPBY_DAY = 0 -SPH_GROUPBY_WEEK = 1 -SPH_GROUPBY_MONTH = 2 -SPH_GROUPBY_YEAR = 3 -SPH_GROUPBY_ATTR = 4 - -class SphinxClient: - _host = 'localhost' # searchd host (default is "localhost") - _port = 3312 # searchd port (default is 3312) - _offset = 0 # how much records to seek from result-set start (default is 0) - _limit = 20 # how much records to return from result-set starting at offset (default is 20) - _mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) - _weights = [] # per-field weights (default is 1 for all fields) - _sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) - _sortby = '' # attribute to sort by (defualt is "") - _min_id = 0 # min ID to match (default is 0) - _max_id = 0xFFFFFFFF # max ID to match (default is UINT_MAX) - _filters = [] # search filters - _groupby = '' # group-by attribute name - _groupfunc = SPH_GROUPBY_DAY # group-by function (to pre-process group-by attribute value with) - _groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) - _maxmatches = 1000 # max matches to retrieve - _error = '' # last error message - _warning = '' # last warning message - - - def __init__ (self): - """ - create a new client object and fill defaults - """ - pass - - - def GetLastError (self): - """ - get last error message (string) - """ - return self._error - - - def GetLastWarning (self): - """ - get last warning message (string) - """ - return self._warning - - - def SetServer (self, host, port): - """ - set searchd server - """ - assert(isinstance(host, str)) - assert(isinstance(port, int)) - - self._host = host - self._port = port - - - def _Connect (self): - """ - connect to searchd server - """ - try: - sock = socket.socket ( socket.AF_INET, socket.SOCK_STREAM ) - sock.connect ( ( self._host, self._port ) ) - except socket.error, msg: - if sock: - sock.close() - self._error = 'connection to %s:%s failed (%s)' % ( self._host, self._port, msg ) - return 0 - - v = unpack('>L', sock.recv(4)) - if v<1: - sock.close() - self._error = 'expected searchd protocol version, got %s' % v - return 0 - - # all ok, send my version - sock.send(pack('>L', 1)) - return sock - - - def _GetResponse (self, sock, client_ver): - """ - get and check response packet from searchd server - """ - (status, ver, length) = unpack('>2HL', sock.recv(8)) - response = '' - left = length - while left>0: - chunk = sock.recv(left) - if chunk: - response += chunk - left -= len(chunk) - else: - break - - sock.close() - - # check response - read = len(response) - if not response or read!=length: - if length: - self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \ - % (status, ver, length, read) - else: - self._error = 'received zero-sized searchd response' - return None - - # check status - if status==SEARCHD_WARNING: - wend = 4 + unpack ( '>L', response[0:4] )[0] - self._warning = response[4:wend] - return response[wend:] - - if status==SEARCHD_ERROR: - self._error = 'searchd error: '+response[4:] - return None - - if status==SEARCHD_RETRY: - self._error = 'temporary searchd error: '+response[4:] - return None - - if status!=SEARCHD_OK: - self._error = 'unknown status code %d' % status - return None - - # check version - if ver>8, ver&0xff, client_ver>>8, client_ver&0xff) - - return response - - - def SetLimits (self, offset, limit, maxmatches=0): - """ - set match offset, count, and max number to retrieve - """ - assert(isinstance(offset, int) and offset>=0) - assert(isinstance(limit, int) and limit>0) - assert(maxmatches>=0) - self._offset = offset - self._limit = limit - if maxmatches>0: - self._maxmatches = maxmatches - - - def SetMatchMode (self, mode): - """ - set match mode - """ - assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED]) - self._mode = mode - - - def SetSortMode ( self, mode, clause='' ): - """ - set sort mode - """ - assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED] ) - assert ( isinstance ( clause, str ) ) - self._sort = mode - self._sortby = clause - - - def SetWeights (self, weights): - """ - set per-field weights - """ - assert(isinstance(weights, list)) - for w in weights: - assert(isinstance(w, int)) - self._weights = weights - - - def SetIDRange (self, minid, maxid): - """ - set IDs range to match - only match those records where document ID - is beetwen minid and maxid (including minid and maxid) - """ - assert(isinstance(minid, int)) - assert(isinstance(maxid, int)) - assert(minid<=maxid) - self._min_id = minid - self._max_id = maxid - - - def SetFilter ( self, attribute, values, exclude=0 ): - """ - set values filter - only match those records where $attribute column values - are in specified set - """ - assert(isinstance(attribute, str)) - assert(isinstance(values, list)) - assert(values) - - values = map(int, values) - - self._filters.append ( { 'attr':attribute, 'exclude':exclude, 'values':values } ) - - - def SetFilterRange (self, attribute, min_, max_, exclude=0 ): - """ - set range filter - only match those records where $attribute column value - is beetwen $min and $max (including $min and $max) - """ - assert(isinstance(attribute, str)) - assert(isinstance(min_, int)) - assert(isinstance(max_, int)) - assert(min_<=max_) - - self._filters.append ( { 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } ) - - - def SetGroupBy ( self, attribute, func, groupsort='@group desc' ): - """ - set grouping attribute and function - - in grouping mode, all matches are assigned to different groups - based on grouping function value. - - each group keeps track of the total match count, and the best match - (in this group) according to current sorting function. - - the final result set contains one best match per group, with - grouping function value and matches count attached. - - groups in result set could be sorted by any sorting clause, - including both document attributes and the following special - internal Sphinx attributes: - - - @id - match document ID; - - @weight, @rank, @relevance - match weight; - - @group - groupby function value; - - @count - amount of matches in group. - - the default mode is to sort by groupby value in descending order, - ie. by "@group desc". - - "total_found" would contain total amount of matching groups over - the whole index. - - WARNING: grouping is done in fixed memory and thus its results - are only approximate; so there might be more groups reported - in total_found than actually present. @count might also - be underestimated. - - for example, if sorting by relevance and grouping by "published" - attribute with SPH_GROUPBY_DAY function, then the result set will - contain one most relevant match per each day when there were any - matches published, with day number and per-day match count attached, - and sorted by day number in descending order (ie. recent days first). - """ - assert(isinstance(attribute, str)) - assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR] ) - assert(isinstance(groupsort, str)) - - self._groupby = attribute - self._groupfunc = func - self._groupsort = groupsort - - - def Query (self, query, index='*'): - """ - connect to searchd server and run given search query - - "query" is query string - "index" is index name to query, default is "*" which means to query all indexes - - returns false on failure - returns hash which has the following keys on success: - "matches" - an array of found matches represented as ( "id", "weight", "attrs" ) hashes - "total" - total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h) - "total_found" - total amount of matching documents in index - "time" - search time - "words" - an array of ( "word", "docs", "hits" ) hashes which contains - docs and hits count for stemmed (!) query words - """ - sock = self._Connect() - if not sock: - return {} - - # build request - req = [pack('>4L', self._offset, self._limit, self._mode, self._sort)] - - req.append(pack('>L', len(self._sortby))) - req.append(self._sortby) - - req.append(pack('>L', len(query))) - req.append(query) - - req.append(pack('>L', len(self._weights))) - for w in self._weights: - req.append(pack('>L', w)) - - req.append(pack('>L', len(index))) - req.append(index) - req.append(pack('>L', self._min_id)) - req.append(pack('>L', self._max_id)) - - # filters - req.append ( pack ( '>L', len(self._filters) ) ) - for f in self._filters: - req.append ( pack ( '>L', len(f['attr']) ) ) - req.append ( f['attr'] ) - if ( 'values' in f ): - req.append ( pack ( '>L', len(f['values']) ) ) - for v in f['values']: - req.append ( pack ( '>L', v ) ) - else: - req.append ( pack ( '>3L', 0, f['min'], f['max'] ) ) - req.append ( pack ( '>L', f['exclude'] ) ) - - # group-by, max-matches, group-sort - req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) ) - req.append ( self._groupby ) - req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) ) - req.append ( self._groupsort ) - - # send query, get response - req = ''.join(req) - - length = len(req) - req = pack('>2HL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length)+req - sock.send(req) - response = self._GetResponse(sock, VER_COMMAND_SEARCH) - if not response: - return {} - - # parse response - result = {} - max_ = len(response) - - # read schema - p = 0 - fields = [] - attrs = [] - - nfields = unpack('>L', response[p:p+4])[0] - p += 4 - while nfields>0 and pL', response[p:p+4])[0] - p += 4 - fields.append(response[p:p+length]) - p += length - - result['fields'] = fields - - nattrs = unpack('>L', response[p:p+4])[0] - p += 4 - while nattrs>0 and pL', response[p:p+4])[0] - p += 4 - attr = response[p:p+length] - p += length - type_ = unpack('>L', response[p:p+4])[0] - p += 4 - attrs.append([attr,type_]) - - result['attrs'] = attrs - - # read match count - count = unpack('>L', response[p:p+4])[0] - p += 4 - - # read matches - result['matches'] = [] - while count>0 and p2L', response[p:p+8]) - p += 8 - - match = { 'id':doc, 'weight':weight, 'attrs':{} } - for i in range(len(attrs)): - match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0] - p += 4 - - result['matches'].append ( match ) - - result['total'], result['total_found'], result['time'], words = \ - unpack('>4L', response[p:p+16]) - - result['time'] = '%.3f' % (result['time']/1000.0) - p += 16 - - result['words'] = [] - while words>0: - words -= 1 - length = unpack('>L', response[p:p+4])[0] - p += 4 - word = response[p:p+length] - p += length - docs, hits = unpack('>2L', response[p:p+8]) - p += 8 - - result['words'].append({'word':word, 'docs':docs, 'hits':hits}) - - sock.close() - - return result - - - def BuildExcerpts (self, docs, index, words, opts=None): - """ - connect to searchd server and generate exceprts from given documents - - "docs" is an array of strings which represent the documents' contents - "index" is a string specifiying the index which settings will be used - for stemming, lexing and case folding - "words" is a string which contains the words to highlight - "opts" is a hash which contains additional optional highlighting parameters: - "before_match" - a string to insert before a set of matching words, default is "" - "after_match" - a string to insert after a set of matching words, default is "" - "chunk_separator" - a string to insert between excerpts chunks, default is " ... " - "limit" - max excerpt size in symbols (codepoints), default is 256 - "around" - how much words to highlight around each match, default is 5 - - returns false on failure - returns an array of string excerpts on success - """ - if not opts: - opts = {} - - assert(isinstance(docs, list)) - assert(isinstance(index, str)) - assert(isinstance(words, str)) - assert(isinstance(opts, dict)) - - sock = self._Connect() - - if not sock: - return [] - - # fixup options - opts.setdefault('before_match', '') - opts.setdefault('after_match', '') - opts.setdefault('chunk_separator', ' ... ') - opts.setdefault('limit', 256) - opts.setdefault('around', 5) - - # build request - # v.1.0 req - - # mode=0, flags=1 (remove spaces) - req = [pack('>2L', 0, 1)] - - # req index - req.append(pack('>L', len(index))) - req.append(index) - - # req words - req.append(pack('>L', len(words))) - req.append(words) - - # options - req.append(pack('>L', len(opts['before_match']))) - req.append(opts['before_match']) - - req.append(pack('>L', len(opts['after_match']))) - req.append(opts['after_match']) - - req.append(pack('>L', len(opts['chunk_separator']))) - req.append(opts['chunk_separator']) - - req.append(pack('>L', int(opts['limit']))) - req.append(pack('>L', int(opts['around']))) - - # documents - req.append(pack('>L', len(docs))) - for doc in docs: - assert(isinstance(doc, str)) - req.append(pack('>L', len(doc))) - req.append(doc) - - req = ''.join(req) - - # send query, get response - length = len(req) - - # add header - req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req - wrote = sock.send(req) - - response = self._GetResponse(sock, VER_COMMAND_EXCERPT ) - if not response: - return [] - - # parse response - pos = 0 - res = [] - rlen = len(response) - - for i in range(len(docs)): - length = unpack('>L', response[pos:pos+4])[0] - pos += 4 - - if pos+length > rlen: - self._error = 'incomplete reply' - return [] - - res.append(response[pos:pos+length]) - pos += length - - return res - -# -# $Id: sphinxapi.py,v 1.7 2007/04/01 21:38:13 shodan Exp $ -# diff --git a/apps/djangosphinx/apis/api275/__init__.py b/apps/djangosphinx/apis/api275/__init__.py deleted file mode 100644 index 236a5a20d..000000000 --- a/apps/djangosphinx/apis/api275/__init__.py +++ /dev/null @@ -1,855 +0,0 @@ -# -# $Id: sphinxapi.py 1216 2008-03-14 23:25:39Z shodan $ -# -# Python version of Sphinx searchd client (Python API) -# -# Copyright (c) 2006-2008, Andrew Aksyonoff -# Copyright (c) 2006, Mike Osadnik -# All rights reserved -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License. You should have -# received a copy of the GPL license along with this program; if you -# did not, you can find it at http://www.gnu.org/ -# - -import sys -import select -import socket -from struct import * - - -# known searchd commands -SEARCHD_COMMAND_SEARCH = 0 -SEARCHD_COMMAND_EXCERPT = 1 -SEARCHD_COMMAND_UPDATE = 2 -SEARCHD_COMMAND_KEYWORDS= 3 - -# current client-side command implementation versions -VER_COMMAND_SEARCH = 0x113 -VER_COMMAND_EXCERPT = 0x100 -VER_COMMAND_UPDATE = 0x101 -VER_COMMAND_KEYWORDS = 0x100 - -# known searchd status codes -SEARCHD_OK = 0 -SEARCHD_ERROR = 1 -SEARCHD_RETRY = 2 -SEARCHD_WARNING = 3 - -# known match modes -SPH_MATCH_ALL = 0 -SPH_MATCH_ANY = 1 -SPH_MATCH_PHRASE = 2 -SPH_MATCH_BOOLEAN = 3 -SPH_MATCH_EXTENDED = 4 -SPH_MATCH_FULLSCAN = 5 -SPH_MATCH_EXTENDED2 = 6 - -# known ranking modes (extended2 mode only) -SPH_RANK_PROXIMITY_BM25 = 0 # default mode, phrase proximity major factor and BM25 minor one -SPH_RANK_BM25 = 1 # statistical mode, BM25 ranking only (faster but worse quality) -SPH_RANK_NONE = 2 # no ranking, all matches get a weight of 1 -SPH_RANK_WORDCOUNT = 3 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts - -# known sort modes -SPH_SORT_RELEVANCE = 0 -SPH_SORT_ATTR_DESC = 1 -SPH_SORT_ATTR_ASC = 2 -SPH_SORT_TIME_SEGMENTS = 3 -SPH_SORT_EXTENDED = 4 -SPH_SORT_EXPR = 5 - -# known filter types -SPH_FILTER_VALUES = 0 -SPH_FILTER_RANGE = 1 -SPH_FILTER_FLOATRANGE = 2 - -# known attribute types -SPH_ATTR_NONE = 0 -SPH_ATTR_INTEGER = 1 -SPH_ATTR_TIMESTAMP = 2 -SPH_ATTR_ORDINAL = 3 -SPH_ATTR_BOOL = 4 -SPH_ATTR_FLOAT = 5 -SPH_ATTR_MULTI = 0X40000000L - -# known grouping functions -SPH_GROUPBY_DAY = 0 -SPH_GROUPBY_WEEK = 1 -SPH_GROUPBY_MONTH = 2 -SPH_GROUPBY_YEAR = 3 -SPH_GROUPBY_ATTR = 4 - - -class SphinxClient: - def __init__ (self): - """ - Create a new client object, and fill defaults. - """ - self._host = 'localhost' # searchd host (default is "localhost") - self._port = 3312 # searchd port (default is 3312) - self._offset = 0 # how much records to seek from result-set start (default is 0) - self._limit = 20 # how much records to return from result-set starting at offset (default is 20) - self._mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) - self._weights = [] # per-field weights (default is 1 for all fields) - self._sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) - self._sortby = '' # attribute to sort by (defualt is "") - self._min_id = 0 # min ID to match (default is 0) - self._max_id = 0xFFFFFFFF # max ID to match (default is UINT_MAX) - self._filters = [] # search filters - self._groupby = '' # group-by attribute name - self._groupfunc = SPH_GROUPBY_DAY # group-by function (to pre-process group-by attribute value with) - self._groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) - self._groupdistinct = '' # group-by count-distinct attribute - self._maxmatches = 1000 # max matches to retrieve - self._cutoff = 0 # cutoff to stop searching at - self._retrycount = 0 # distributed retry count - self._retrydelay = 0 # distributed retry delay - self._anchor = {} # geographical anchor point - self._indexweights = {} # per-index weights - self._ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode - self._maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) - self._fieldweights = {} # per-field-name weights - self._error = '' # last error message - self._warning = '' # last warning message - self._reqs = [] # requests array for multi-query - return - - - def GetLastError (self): - """ - Get last error message (string). - """ - return self._error - - - def GetLastWarning (self): - """ - Get last warning message (string). - """ - return self._warning - - - def SetServer (self, host, port): - """ - Set searchd server host and port. - """ - assert(isinstance(host, str)) - assert(isinstance(port, int)) - self._host = host - self._port = port - - - def _Connect (self): - """ - INTERNAL METHOD, DO NOT CALL. Connects to searchd server. - """ - try: - sock = socket.socket ( socket.AF_INET, socket.SOCK_STREAM ) - sock.connect ( ( self._host, self._port ) ) - except socket.error, msg: - if sock: - sock.close() - self._error = 'connection to %s:%s failed (%s)' % ( self._host, self._port, msg ) - return 0 - - v = unpack('>L', sock.recv(4)) - if v<1: - sock.close() - self._error = 'expected searchd protocol version, got %s' % v - return 0 - - # all ok, send my version - sock.send(pack('>L', 1)) - return sock - - - def _GetResponse (self, sock, client_ver): - """ - INTERNAL METHOD, DO NOT CALL. Gets and checks response packet from searchd server. - """ - (status, ver, length) = unpack('>2HL', sock.recv(8)) - response = '' - left = length - while left>0: - chunk = sock.recv(left) - if chunk: - response += chunk - left -= len(chunk) - else: - break - - sock.close() - - # check response - read = len(response) - if not response or read!=length: - if length: - self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \ - % (status, ver, length, read) - else: - self._error = 'received zero-sized searchd response' - return None - - # check status - if status==SEARCHD_WARNING: - wend = 4 + unpack ( '>L', response[0:4] )[0] - self._warning = response[4:wend] - return response[wend:] - - if status==SEARCHD_ERROR: - self._error = 'searchd error: '+response[4:] - return None - - if status==SEARCHD_RETRY: - self._error = 'temporary searchd error: '+response[4:] - return None - - if status!=SEARCHD_OK: - self._error = 'unknown status code %d' % status - return None - - # check version - if ver>8, ver&0xff, client_ver>>8, client_ver&0xff) - - return response - - - def SetLimits (self, offset, limit, maxmatches=0, cutoff=0): - """ - Set offset and count into result set, and optionally set max-matches and cutoff limits. - """ - assert(isinstance(offset, int) and offset>=0) - assert(isinstance(limit, int) and limit>0) - assert(maxmatches>=0) - self._offset = offset - self._limit = limit - if maxmatches>0: - self._maxmatches = maxmatches - if cutoff>=0: - self._cutoff = cutoff - - - def SetMaxQueryTime (self, maxquerytime): - """ - Set maximum query time, in milliseconds, per-index. 0 means 'do not limit'. - """ - assert(isinstance(maxquerytime,int) and maxquerytime>0) - self._maxquerytime = maxquerytime - - - def SetMatchMode (self, mode): - """ - Set matching mode. - """ - assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED, SPH_MATCH_FULLSCAN, SPH_MATCH_EXTENDED2]) - self._mode = mode - - - def SetRankingMode (self, ranker): - """ - Set ranking mode. - """ - assert(ranker in [SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, SPH_RANK_NONE, SPH_RANK_WORDCOUNT]) - self._ranker = ranker - - - def SetSortMode ( self, mode, clause='' ): - """ - Set sorting mode. - """ - assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED, SPH_SORT_EXPR] ) - assert ( isinstance ( clause, str ) ) - self._sort = mode - self._sortby = clause - - - def SetWeights (self, weights): - """ - Set per-field weights. - WARNING, DEPRECATED; do not use it! use SetFieldWeights() instead - """ - assert(isinstance(weights, list)) - for w in weights: - assert(isinstance(w, int)) - self._weights = weights - - - def SetFieldWeights (self, weights): - """ - Bind per-field weights by name; expects (name,field_weight) dictionary as argument. - """ - assert(isinstance(weights,dict)) - for key,val in weights.items(): - assert(isinstance(key,str)) - assert(isinstance(val,int)) - self._fieldweights = weights - - - def SetIndexWeights (self, weights): - """ - Bind per-index weights by name; expects (name,index_weight) dictionary as argument. - """ - assert(isinstance(weights,dict)) - for key,val in weights.items(): - assert(isinstance(key,str)) - assert(isinstance(val,int)) - self._indexweights = weights - - - def SetIDRange (self, minid, maxid): - """ - Set IDs range to match. - Only match records if document ID is beetwen $min and $max (inclusive). - """ - assert(isinstance(minid, int)) - assert(isinstance(maxid, int)) - assert(minid<=maxid) - self._min_id = minid - self._max_id = maxid - - - def SetFilter ( self, attribute, values, exclude=0 ): - """ - Set values set filter. - Only match records where 'attribute' value is in given 'values' set. - """ - assert(isinstance(attribute, str)) - assert(isinstance(values, list)) - assert(values) - - for value in values: - assert(isinstance(value, int)) - - self._filters.append ( { 'type':SPH_FILTER_VALUES, 'attr':attribute, 'exclude':exclude, 'values':values } ) - - - def SetFilterRange (self, attribute, min_, max_, exclude=0 ): - """ - Set range filter. - Only match records if 'attribute' value is beetwen 'min_' and 'max_' (inclusive). - """ - assert(isinstance(attribute, str)) - assert(isinstance(min_, int)) - assert(isinstance(max_, int)) - assert(min_<=max_) - - self._filters.append ( { 'type':SPH_FILTER_RANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } ) - - - def SetFilterFloatRange (self, attribute, min_, max_, exclude=0 ): - assert(isinstance(attribute,str)) - assert(isinstance(min_,float)) - assert(isinstance(max_,float)) - assert(min_ <= max_) - self._filters.append ( {'type':SPH_FILTER_FLOATRANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_} ) - - - def SetGeoAnchor (self, attrlat, attrlong, latitude, longitude): - assert(isinstance(attrlat,str)) - assert(isinstance(attrlong,str)) - assert(isinstance(latitude,float)) - assert(isinstance(longitude,float)) - self._anchor['attrlat'] = attrlat - self._anchor['attrlong'] = attrlong - self._anchor['lat'] = latitude - self._anchor['long'] = longitude - - - def SetGroupBy ( self, attribute, func, groupsort='@group desc' ): - """ - Set grouping attribute and function. - """ - assert(isinstance(attribute, str)) - assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR] ) - assert(isinstance(groupsort, str)) - - self._groupby = attribute - self._groupfunc = func - self._groupsort = groupsort - - - def SetGroupDistinct (self, attribute): - assert(isinstance(attribute,str)) - self._groupdistinct = attribute - - - def SetRetries (self, count, delay=0): - assert(isinstance(count,int) and count>=0) - assert(isinstance(delay,int) and delay>=0) - self._retrycount = count - self._retrydelay = delay - - - def ResetFilters (self): - """ - Clear all filters (for multi-queries). - """ - self._filters = [] - self._anchor = {} - - - def ResetGroupBy (self): - """ - Clear groupby settings (for multi-queries). - """ - self._groupby = '' - self._groupfunc = SPH_GROUPBY_DAY - self._groupsort = '@group desc' - self._groupdistinct = '' - - - def Query (self, query, index='*', comment=''): - """ - Connect to searchd server and run given search query. - Returns None on failure; result set hash on success (see documentation for details). - """ - assert(len(self._reqs)==0) - self.AddQuery(query,index,comment) - results = self.RunQueries() - - if not results or len(results)==0: - return None - self._error = results[0]['error'] - self._warning = results[0]['warning'] - if results[0]['status'] == SEARCHD_ERROR: - return None - return results[0] - - - def AddQuery (self, query, index='*', comment=''): - """ - Add query to batch. - """ - # build request - req = [pack('>5L', self._offset, self._limit, self._mode, self._ranker, self._sort)] - req.append(pack('>L', len(self._sortby))) - req.append(self._sortby) - - if isinstance(query,unicode): - query = query.encode('utf-8') - assert(isinstance(query,str)) - - req.append(pack('>L', len(query))) - req.append(query) - - req.append(pack('>L', len(self._weights))) - for w in self._weights: - req.append(pack('>L', w)) - req.append(pack('>L', len(index))) - req.append(index) - req.append(pack('>L',0)) # id64 range marker FIXME! IMPLEMENT! - req.append(pack('>L', self._min_id)) - req.append(pack('>L', self._max_id)) - - # filters - req.append ( pack ( '>L', len(self._filters) ) ) - for f in self._filters: - req.append ( pack ( '>L', len(f['attr'])) + f['attr']) - filtertype = f['type'] - req.append ( pack ( '>L', filtertype)) - if filtertype == SPH_FILTER_VALUES: - req.append ( pack ('>L', len(f['values']))) - for val in f['values']: - req.append ( pack ('>L', val)) - elif filtertype == SPH_FILTER_RANGE: - req.append ( pack ('>2L', f['min'], f['max'])) - elif filtertype == SPH_FILTER_FLOATRANGE: - req.append ( pack ('>2f', f['min'], f['max'])) - req.append ( pack ( '>L', f['exclude'] ) ) - - # group-by, max-matches, group-sort - req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) ) - req.append ( self._groupby ) - req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) ) - req.append ( self._groupsort ) - req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay)) - req.append ( pack ( '>L', len(self._groupdistinct))) - req.append ( self._groupdistinct) - - # anchor point - if len(self._anchor) == 0: - req.append ( pack ('>L', 0)) - else: - attrlat, attrlong = self._anchor['attrlat'], self._anchor['attrlong'] - latitude, longitude = self._anchor['lat'], self._anchor['long'] - req.append ( pack ('>L', 1)) - req.append ( pack ('>L', len(attrlat)) + attrlat) - req.append ( pack ('>L', len(attrlong)) + attrlong) - req.append ( pack ('>f', latitude) + pack ('>f', longitude)) - - # per-index weights - req.append ( pack ('>L',len(self._indexweights))) - for indx,weight in self._indexweights.items(): - req.append ( pack ('>L',len(indx)) + indx + pack ('>L',weight)) - - # max query time - req.append ( pack ('>L', self._maxquerytime) ) - - # per-field weights - req.append ( pack ('>L',len(self._fieldweights) ) ) - for field,weight in self._fieldweights.items(): - req.append ( pack ('>L',len(field)) + field + pack ('>L',weight) ) - - # comment - req.append ( pack('>L',len(comment)) + comment ) - - # send query, get response - req = ''.join(req) - - self._reqs.append(req) - return - - - def RunQueries (self): - """ - Run queries batch. - Returns None on network IO failure; or an array of result set hashes on success. - """ - if len(self._reqs)==0: - self._error = 'no queries defined, issue AddQuery() first' - return None - - sock = self._Connect() - if not sock: - return None - - req = ''.join(self._reqs) - length = len(req)+4 - req = pack('>HHLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, len(self._reqs))+req - sock.send(req) - - response = self._GetResponse(sock, VER_COMMAND_SEARCH) - if not response: - return None - - nreqs = len(self._reqs) - - # parse response - max_ = len(response) - p = 0 - - results = [] - for i in range(0,nreqs,1): - result = {} - result['error'] = '' - result['warning'] = '' - status = unpack('>L', response[p:p+4])[0] - p += 4 - result['status'] = status - if status != SEARCHD_OK: - length = unpack('>L', response[p:p+4])[0] - p += 4 - message = response[p:p+length] - p += length - - if status == SEARCHD_WARNING: - result['warning'] = message - else: - result['error'] = message - continue - - # read schema - fields = [] - attrs = [] - - nfields = unpack('>L', response[p:p+4])[0] - p += 4 - while nfields>0 and pL', response[p:p+4])[0] - p += 4 - fields.append(response[p:p+length]) - p += length - - result['fields'] = fields - - nattrs = unpack('>L', response[p:p+4])[0] - p += 4 - while nattrs>0 and pL', response[p:p+4])[0] - p += 4 - attr = response[p:p+length] - p += length - type_ = unpack('>L', response[p:p+4])[0] - p += 4 - attrs.append([attr,type_]) - - result['attrs'] = attrs - - # read match count - count = unpack('>L', response[p:p+4])[0] - p += 4 - id64 = unpack('>L', response[p:p+4])[0] - p += 4 - - # read matches - result['matches'] = [] - while count>0 and p3L', response[p:p+12]) - doc += (dochi<<32) - p += 12 - else: - doc, weight = unpack('>2L', response[p:p+8]) - p += 8 - - match = { 'id':doc, 'weight':weight, 'attrs':{} } - for i in range(len(attrs)): - if attrs[i][1] == SPH_ATTR_FLOAT: - match['attrs'][attrs[i][0]] = unpack('>f', response[p:p+4])[0] - elif attrs[i][1] == (SPH_ATTR_MULTI | SPH_ATTR_INTEGER): - match['attrs'][attrs[i][0]] = [] - nvals = unpack('>L', response[p:p+4])[0] - p += 4 - for n in range(0,nvals,1): - match['attrs'][attrs[i][0]].append(unpack('>L', response[p:p+4])[0]) - p += 4 - p -= 4 - else: - match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0] - p += 4 - - result['matches'].append ( match ) - - result['total'], result['total_found'], result['time'], words = unpack('>4L', response[p:p+16]) - - result['time'] = '%.3f' % (result['time']/1000.0) - p += 16 - - result['words'] = [] - while words>0: - words -= 1 - length = unpack('>L', response[p:p+4])[0] - p += 4 - word = response[p:p+length] - p += length - docs, hits = unpack('>2L', response[p:p+8]) - p += 8 - - result['words'].append({'word':word, 'docs':docs, 'hits':hits}) - - results.append(result) - - self._reqs = [] - sock.close() - return results - - - def BuildExcerpts (self, docs, index, words, opts=None): - """ - Connect to searchd server and generate exceprts from given documents. - """ - if not opts: - opts = {} - if isinstance(words,unicode): - words = words.encode('utf-8') - - assert(isinstance(docs, list)) - assert(isinstance(index, str)) - assert(isinstance(words, str)) - assert(isinstance(opts, dict)) - - sock = self._Connect() - - if not sock: - return None - - # fixup options - opts.setdefault('before_match', '') - opts.setdefault('after_match', '') - opts.setdefault('chunk_separator', ' ... ') - opts.setdefault('limit', 256) - opts.setdefault('around', 5) - - # build request - # v.1.0 req - - # mode=0, flags=1 (remove spaces) - req = [pack('>2L', 0, 1)] - - # req index - req.append(pack('>L', len(index))) - req.append(index) - - # req words - req.append(pack('>L', len(words))) - req.append(words) - - # options - req.append(pack('>L', len(opts['before_match']))) - req.append(opts['before_match']) - - req.append(pack('>L', len(opts['after_match']))) - req.append(opts['after_match']) - - req.append(pack('>L', len(opts['chunk_separator']))) - req.append(opts['chunk_separator']) - - req.append(pack('>L', int(opts['limit']))) - req.append(pack('>L', int(opts['around']))) - - # documents - req.append(pack('>L', len(docs))) - for doc in docs: - if isinstance(doc,unicode): - doc = doc.encode('utf-8') - assert(isinstance(doc, str)) - req.append(pack('>L', len(doc))) - req.append(doc) - - req = ''.join(req) - - # send query, get response - length = len(req) - - # add header - req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req - wrote = sock.send(req) - - response = self._GetResponse(sock, VER_COMMAND_EXCERPT ) - if not response: - return [] - - # parse response - pos = 0 - res = [] - rlen = len(response) - - for i in range(len(docs)): - length = unpack('>L', response[pos:pos+4])[0] - pos += 4 - - if pos+length > rlen: - self._error = 'incomplete reply' - return [] - - res.append(response[pos:pos+length]) - pos += length - - return res - - - def UpdateAttributes ( self, index, attrs, values ): - """ - Update given attribute values on given documents in given indexes. - Returns amount of updated documents (0 or more) on success, or -1 on failure. - - 'attrs' must be a list of strings. - 'values' must be a dict with int key (document ID) and list of int values (new attribute values). - - Example: - res = cl.UpdateAttributes ( 'test1', [ 'group_id', 'date_added' ], { 2:[123,1000000000], 4:[456,1234567890] } ) - """ - assert ( isinstance ( index, str ) ) - assert ( isinstance ( attrs, list ) ) - assert ( isinstance ( values, dict ) ) - for attr in attrs: - assert ( isinstance ( attr, str ) ) - for docid, entry in values.items(): - assert ( isinstance ( docid, int ) ) - assert ( isinstance ( entry, list ) ) - assert ( len(attrs)==len(entry) ) - for val in entry: - assert ( isinstance ( val, int ) ) - - # build request - req = [ pack('>L',len(index)), index ] - - req.append ( pack('>L',len(attrs)) ) - for attr in attrs: - req.append ( pack('>L',len(attr)) + attr ) - - req.append ( pack('>L',len(values)) ) - for docid, entry in values.items(): - req.append ( pack('>q',docid) ) - for val in entry: - req.append ( pack('>L',val) ) - - # connect, send query, get response - sock = self._Connect() - if not sock: - return None - - req = ''.join(req) - length = len(req) - req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req - wrote = sock.send ( req ) - - response = self._GetResponse ( sock, VER_COMMAND_UPDATE ) - if not response: - return -1 - - # parse response - updated = unpack ( '>L', response[0:4] )[0] - return updated - - - def BuildKeywords ( self, query, index, hits ): - """ - Connect to searchd server, and generate keywords list for a given query. - Returns None on failure, or a list of keywords on success. - """ - assert ( isinstance ( query, str ) ) - assert ( isinstance ( index, str ) ) - assert ( isinstance ( hits, int ) ) - - # build request - req = [ pack ( '>L', len(query) ) + query ] - req.append ( pack ( '>L', len(index) ) + index ) - req.append ( pack ( '>L', hits ) ) - - # connect, send query, get response - sock = self._Connect() - if not sock: - return None - - req = ''.join(req) - length = len(req) - req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req - wrote = sock.send ( req ) - - response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS ) - if not response: - return None - - # parse response - res = [] - - nwords = unpack ( '>L', response[0:4] )[0] - p = 4 - max_ = len(response) - - while nwords>0 and pL', response[p:p+4] )[0] - p += 4 - tokenized = response[p:p+length] - p += length - - length = unpack ( '>L', response[p:p+4] )[0] - p += 4 - normalized = response[p:p+length] - p += length - - entry = { 'tokenized':tokenized, 'normalized':normalized } - if hits: - entry['docs'], entry['hits'] = unpack ( '>2L', response[p:p+8] ) - p += 8 - - res.append ( entry ) - - if nwords>0 or p>max_: - self._error = 'incomplete reply' - return None - - return res -# -# $Id: sphinxapi.py 1216 2008-03-14 23:25:39Z shodan $ -# \ No newline at end of file diff --git a/apps/djangosphinx/apis/api275/templates/source-multiple.conf b/apps/djangosphinx/apis/api275/templates/source-multiple.conf deleted file mode 100644 index 506e6f190..000000000 --- a/apps/djangosphinx/apis/api275/templates/source-multiple.conf +++ /dev/null @@ -1,36 +0,0 @@ -source {{ source_name }} -{ - type = {{ database_engine }} - strip_html = 0 - index_html_attrs = - sql_host = {{ database_host }} - sql_user = {{ database_user }} - sql_pass = {{ database_password }} - sql_db = {{ database_name }} - sql_port = {{ database_port }} - log = {{ log_file }} - - sql_query_pre = - sql_query_post = - sql_query = \ -{% for table_name, content_type in tables %} - SELECT {{ field_names|join:", " }}, {{ content_type.id }} as content_type \ - FROM `{{ table_name }}`{% if not loop.last %} UNION \{% endif %} -{% endfor %} -{% if group_columns %} - # ForeignKey's -{% for field_name in group_columns %} sql_attr_uint = {{ field_name }} -{% endfor %}{% endif %} -{% if date_columns %} - # DateField's and DateTimeField's -{% for field_name in date_columns %} sql_attr_timestamp = {{ field_name }} -{% endfor %}{% endif %} -{% if bool_columns %} - # BooleanField's -{% for field_name in bool_columns %} sql_attr_bool = {{ field_name }} -{% endfor %}{% endif %} -{% if float_columns %} - # FloatField's and DecimalField's -{% for field_name in float_columns %} sql_attr_float = {{ field_name }} -{% endfor %}{% endif %} -} \ No newline at end of file diff --git a/apps/djangosphinx/apis/api275/templates/source.conf b/apps/djangosphinx/apis/api275/templates/source.conf deleted file mode 100644 index 9108aeea0..000000000 --- a/apps/djangosphinx/apis/api275/templates/source.conf +++ /dev/null @@ -1,32 +0,0 @@ -source {{ source_name }} -{ - type = {{ database_engine }} - sql_host = {{ database_host }} - sql_user = {{ database_user }} - sql_pass = {{ database_password }} - sql_db = {{ database_name }} - sql_port = {{ database_port }} - - sql_query_pre = - sql_query_post = - sql_query = \ - SELECT {{ field_names|join:", " }} \ - FROM {{ table_name }} - sql_query_info = SELECT * FROM `{{ table_name }}` WHERE `{{ primary_key }}` = $id -{% if group_columns %} - # ForeignKey's -{% for field_name in group_columns %} sql_attr_uint = {{ field_name }} -{% endfor %}{% endif %} -{% if date_columns %} - # DateField's and DateTimeField's -{% for field_name in date_columns %} sql_attr_timestamp = {{ field_name }} -{% endfor %}{% endif %} -{% if bool_columns %} - # BooleanField's -{% for field_name in bool_columns %} sql_attr_bool = {{ field_name }} -{% endfor %}{% endif %} -{% if float_columns %} - # FloatField's and DecimalField's -{% for field_name in float_columns %} sql_attr_float = {{ field_name }} -{% endfor %}{% endif %} -} \ No newline at end of file diff --git a/apps/djangosphinx/apis/current.py b/apps/djangosphinx/apis/current.py deleted file mode 100644 index e85f4ec86..000000000 --- a/apps/djangosphinx/apis/current.py +++ /dev/null @@ -1,11 +0,0 @@ -from djangosphinx.constants import * - -try: - from sphinxapi import * -except ImportError, exc: - name = 'djangosphinx.apis.api%d' % (SPHINX_API_VERSION,) - sphinxapi = __import__(name) - for name in name.split('.')[1:]: - sphinxapi = getattr(sphinxapi, name) - for attr in dir(sphinxapi): - globals()[attr] = getattr(sphinxapi, attr) diff --git a/apps/djangosphinx/constants.py b/apps/djangosphinx/constants.py deleted file mode 100644 index 976d48d41..000000000 --- a/apps/djangosphinx/constants.py +++ /dev/null @@ -1,7 +0,0 @@ -from django.conf import settings - -__all__ = ('SPHINX_API_VERSION',) - -# 0x113 = 1.19 -# 0x107 = 1.17 -SPHINX_API_VERSION = getattr(settings, 'SPHINX_API_VERSION', 0x107) \ No newline at end of file diff --git a/apps/djangosphinx/management/__init__.py b/apps/djangosphinx/management/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/apps/djangosphinx/management/commands/__init__.py b/apps/djangosphinx/management/commands/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/apps/djangosphinx/management/commands/generate_sphinx_config.py b/apps/djangosphinx/management/commands/generate_sphinx_config.py deleted file mode 100644 index 3320ce601..000000000 --- a/apps/djangosphinx/management/commands/generate_sphinx_config.py +++ /dev/null @@ -1,22 +0,0 @@ -from django.core.management.base import AppCommand -from django.db import models - -from djangosphinx.manager import SphinxModelManager - -class Command(AppCommand): - help = "Prints generic configuration for any models which use a standard SphinxSearch manager." - - output_transaction = True - - def handle_app(self, app, **options): - from djangosphinx.utils.config import generate_config_for_model - model_classes = [getattr(app, n) for n in dir(app) if hasattr(getattr(app, n), '_meta')] - found = 0 - for model in model_classes: - indexes = getattr(model, '__sphinx_indexes__', []) - for index in indexes: - found += 1 - print generate_config_for_model(model, index) - if found == 0: - print "Unable to find any models in application which use standard SphinxSearch configuration." - #return u'\n'.join(sql_create(app, self.style)).encode('utf-8') diff --git a/apps/djangosphinx/manager.py b/apps/djangosphinx/manager.py deleted file mode 100644 index 3fbcc9657..000000000 --- a/apps/djangosphinx/manager.py +++ /dev/null @@ -1,648 +0,0 @@ -import select -import socket -import time -import struct -import warnings -import operator -import apis.current as sphinxapi - -try: - import decimal -except ImportError: - from django.utils import _decimal as decimal # for Python 2.3 - -from django.db.models.query import QuerySet, Q -from django.conf import settings - -__all__ = ('SearchError', 'ConnectionError', 'SphinxSearch', 'SphinxRelation') - -from django.contrib.contenttypes.models import ContentType -from datetime import datetime, date - -# server settings -SPHINX_SERVER = getattr(settings, 'SPHINX_SERVER', 'localhost') -SPHINX_PORT = int(getattr(settings, 'SPHINX_PORT', 3312)) - -# These require search API 275 (Sphinx 0.9.8) -SPHINX_RETRIES = int(getattr(settings, 'SPHINX_RETRIES', 0)) -SPHINX_RETRIES_DELAY = int(getattr(settings, 'SPHINX_RETRIES_DELAY', 5)) - -MAX_INT = int(2**31-1) - -class SearchError(Exception): pass -class ConnectionError(Exception): pass - -class SphinxProxy(object): - """ - Acts exactly like a normal instance of an object except that - it will handle any special sphinx attributes in a _sphinx class. - """ - __slots__ = ('__dict__', '__instance__', '_sphinx') - - def __init__(self, instance, attributes): - object.__setattr__(self, '__instance__', instance) - object.__setattr__(self, '_sphinx', attributes) - - def _get_current_object(self): - """ - Return the current object. This is useful if you want the real object - behind the proxy at a time for performance reasons or because you want - to pass the object into a different context. - """ - return self.__instance__ - __current_object = property(_get_current_object) - - def __dict__(self): - try: - return self.__current_object.__dict__ - except RuntimeError: - return AttributeError('__dict__') - __dict__ = property(__dict__) - - def __repr__(self): - try: - obj = self.__current_object - except RuntimeError: - return '<%s unbound>' % self.__class__.__name__ - return repr(obj) - - def __nonzero__(self): - try: - return bool(self.__current_object) - except RuntimeError: - return False - - def __unicode__(self): - try: - return unicode(self.__current_oject) - except RuntimeError: - return repr(self) - - def __dir__(self): - try: - return dir(self.__current_object) - except RuntimeError: - return [] - - def __getattr__(self, name, value=None): - if name == '__members__': - return dir(self.__current_object) - elif name == '_sphinx': - return object.__getattr__(self, '_sphinx', value) - return getattr(self.__current_object, name) - - def __setattr__(self, name, value): - if name == '_sphinx': - return object.__setattr__(self, '_sphinx', value) - return setattr(self.__current_object, name, value) - - def __setitem__(self, key, value): - self.__current_object[key] = value - - def __delitem__(self, key): - del self.__current_object[key] - - def __setslice__(self, i, j, seq): - self.__current_object[i:j] = seq - - def __delslice__(self, i, j): - del self.__current_object[i:j] - - __delattr__ = lambda x, n: delattr(x.__current_object, n) - __str__ = lambda x: str(x.__current_object) - __unicode__ = lambda x: unicode(x.__current_object) - __lt__ = lambda x, o: x.__current_object < o - __le__ = lambda x, o: x.__current_object <= o - __eq__ = lambda x, o: x.__current_object == o - __ne__ = lambda x, o: x.__current_object != o - __gt__ = lambda x, o: x.__current_object > o - __ge__ = lambda x, o: x.__current_object >= o - __cmp__ = lambda x, o: cmp(x.__current_object, o) - __hash__ = lambda x: hash(x.__current_object) - # attributes are currently not callable - # __call__ = lambda x, *a, **kw: x.__current_object(*a, **kw) - __len__ = lambda x: len(x.__current_object) - __getitem__ = lambda x, i: x.__current_object[i] - __iter__ = lambda x: iter(x.__current_object) - __contains__ = lambda x, i: i in x.__current_object - __getslice__ = lambda x, i, j: x.__current_object[i:j] - __add__ = lambda x, o: x.__current_object + o - __sub__ = lambda x, o: x.__current_object - o - __mul__ = lambda x, o: x.__current_object * o - __floordiv__ = lambda x, o: x.__current_object // o - __mod__ = lambda x, o: x.__current_object % o - __divmod__ = lambda x, o: x.__current_object.__divmod__(o) - __pow__ = lambda x, o: x.__current_object ** o - __lshift__ = lambda x, o: x.__current_object << o - __rshift__ = lambda x, o: x.__current_object >> o - __and__ = lambda x, o: x.__current_object & o - __xor__ = lambda x, o: x.__current_object ^ o - __or__ = lambda x, o: x.__current_object | o - __div__ = lambda x, o: x.__current_object.__div__(o) - __truediv__ = lambda x, o: x.__current_object.__truediv__(o) - __neg__ = lambda x: -(x.__current_object) - __pos__ = lambda x: +(x.__current_object) - __abs__ = lambda x: abs(x.__current_object) - __invert__ = lambda x: ~(x.__current_object) - __complex__ = lambda x: complex(x.__current_object) - __int__ = lambda x: int(x.__current_object) - __long__ = lambda x: long(x.__current_object) - __float__ = lambda x: float(x.__current_object) - __oct__ = lambda x: oct(x.__current_object) - __hex__ = lambda x: hex(x.__current_object) - __index__ = lambda x: x.__current_object.__index__() - __coerce__ = lambda x, o: x.__coerce__(x, o) - __enter__ = lambda x: x.__enter__() - __exit__ = lambda x, *a, **kw: x.__exit__(*a, **kw) - -def to_sphinx(value): - "Convert a value into a sphinx query value" - if isinstance(value, date) or isinstance(value, datetime): - return int(time.mktime(value.timetuple())) - elif isinstance(value, decimal.Decimal) or isinstance(value, float): - return float(value) - return int(value) - -class SphinxQuerySet(object): - available_kwargs = ('rankmode', 'mode', 'weights', 'maxmatches') - - def __init__(self, model=None, **kwargs): - self._select_related = False - self._select_related_args = {} - self._select_related_fields = [] - self._filters = {} - self._excludes = {} - self._extra = {} - self._query = '' - self.__metadata = None - self._offset = 0 - self._limit = 20 - - self._groupby = None - self._sort = None - self._weights = [1, 100] - - self._maxmatches = 1000 - self._result_cache = None - self._mode = sphinxapi.SPH_MATCH_ALL - self._rankmode = getattr(sphinxapi, 'SPH_RANK_PROXIMITY_BM25', None) - self._model = model - self._anchor = {} - self.__metadata = {} - - self.set_options(**kwargs) - - if model: - self._index = kwargs.get('index', model._meta.db_table) - else: - self._index = kwargs.get('index') - - def __repr__(self): - if self._result_cache is not None: - return repr(self._get_data()) - else: - return '<%s instance>' % (self.__class__.__name__,) - - def __len__(self): - return len(self._get_data()) - - def __iter__(self): - return iter(self._get_data()) - - def __getitem__(self, k): - if not isinstance(k, (slice, int, long)): - raise TypeError - assert (not isinstance(k, slice) and (k >= 0)) \ - or (isinstance(k, slice) and (k.start is None or k.start >= 0) and (k.stop is None or k.stop >= 0)), \ - "Negative indexing is not supported." - if type(k) == slice: - if self._offset < k.start or k.stop-k.start > self._limit: - self._result_cache = None - else: - if k not in range(self._offset, self._limit+self._offset): - self._result_cache = None - if self._result_cache is None: - if type(k) == slice: - self._offset = k.start - self._limit = k.stop-k.start - return self._get_results() - else: - self._offset = k - self._limit = 1 - return self._get_results()[0] - else: - return self._result_cache[k] - - def set_options(self, **kwargs): - if 'rankmode' in kwargs: - if kwargs.get('rankmode') is None: - kwargs['rankmode'] = sphinxapi.SPH_RANK_NONE - for key in self.available_kwargs: - if key in kwargs: - setattr(self, '_%s' % (key,), kwargs[key]) - - def query(self, string): - return self._clone(_query=unicode(string).encode('utf-8')) - - def group_by(self, attribute, func, groupsort='@group desc'): - return self._clone(_groupby=attribute, _groupfunc=func, _groupsort=groupsort) - - def rank_none(self): - warnings.warn('`rank_none()` is deprecated. Use `set_options(rankmode=None)` instead.', DeprecationWarning) - return self._clone(_rankmode=sphinxapi.SPH_RANK_NONE) - - def mode(self, mode): - warnings.warn('`mode()` is deprecated. Use `set_options(mode='')` instead.', DeprecationWarning) - return self._clone(_mode=mode) - - def weights(self, weights): - warnings.warn('`mode()` is deprecated. Use `set_options(weights=[])` instead.', DeprecationWarning) - return self._clone(_weights=weights) - - def on_index(self, index): - warnings.warn('`mode()` is deprecated. Use `set_options(on_index=foo)` instead.', DeprecationWarning) - return self._clone(_index=index) - - # only works on attributes - def filter(self, **kwargs): - filters = self._filters.copy() - for k,v in kwargs.iteritems(): - if hasattr(v, 'next'): - v = list(v) - elif not (isinstance(v, list) or isinstance(v, tuple)): - v = [v,] - filters.setdefault(k, []).extend(map(to_sphinx, v)) - return self._clone(_filters=filters) - - def geoanchor(self, lat_attr, lng_attr, lat, lng): - assert(sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring.") - return self._clone(_anchor=(lat_attr, lng_attr, float(lat), float(lng))) - - # this actually does nothing, its just a passthru to - # keep things looking/working generally the same - def all(self): - return self - - # only works on attributes - def exclude(self, **kwargs): - filters = self._excludes.copy() - for k,v in kwargs.iteritems(): - if hasattr(v, 'next'): - v = list(v) - elif not (isinstance(v, list) or isinstance(v, tuple)): - v = [v,] - filters.setdefault(k, []).extend(map(to_sphinx, v)) - return self._clone(_excludes=filters) - - # you cannot order by @weight (it always orders in descending) - # keywords are @id, @weight, @rank, and @relevance - def order_by(self, *args): - sort_by = [] - for arg in args: - sort = 'ASC' - if arg[0] == '-': - arg = arg[1:] - sort = 'DESC' - if arg == 'id': - arg = '@id' - sort_by.append('%s %s' % (arg, sort)) - if sort_by: - return self._clone(_sort=(sphinxapi.SPH_SORT_EXTENDED, ', '.join(sort_by))) - return self - - # pass these thru on the queryset and let django handle it - def select_related(self, *args, **kwargs): - _args = self._select_related_fields[:] - _args.extend(args) - _kwargs = self._select_related_args.copy() - _kwargs.update(kwargs) - - return self._clone( - _select_related=True, - _select_related_fields=_args, - _select_related_args=_kwargs, - ) - - def extra(self, **kwargs): - extra = self._extra.copy() - extra.update(kwargs) - return self._clone(_extra=extra) - - def count(self): - return min(self._sphinx.get('total_found', 0), self._maxmatches) - - def reset(self): - return self.__class__(self._model, self._index) - - # Internal methods - def _clone(self, **kwargs): - # Clones the queryset passing any changed args - c = self.__class__() - c.__dict__.update(self.__dict__) - c.__dict__.update(kwargs) - return c - - def _sphinx(self): - if not self.__metadata: - # We have to force execution if this is accessed beforehand - self._get_data() - return self.__metadata - _sphinx = property(_sphinx) - - def _get_data(self): - assert(self._index) - # need to find a way to make this work yet - if self._result_cache is None: - self._result_cache = list(self._get_results()) - return self._result_cache - - def _get_sphinx_results(self): - assert(self._offset + self._limit <= self._maxmatches) - - client = sphinxapi.SphinxClient() - client.SetServer(SPHINX_SERVER, SPHINX_PORT) - - if self._sort: - client.SetSortMode(*self._sort) - - if isinstance(self._weights, dict): - client.SetFieldWeights(self._weights) - else: - # assume its a list - client.SetWeights(map(int, self._weights)) - - client.SetMatchMode(self._mode) - - # 0.97 requires you to reset it - if hasattr(client, 'ResetFilters'): - client.ResetFilters() - if hasattr(client, 'ResetGroupBy'): - client.ResetGroupBy() - - def _handle_filters(filter_list, exclude=False): - for name, values in filter_list.iteritems(): - parts = len(name.split('__')) - if parts > 2: - raise NotImplementedError, 'Related object and/or multiple field lookups not supported' - elif parts == 2: - # The float handling for __gt and __lt is kind of ugly.. - name, lookup = name.split('__', 1) - is_float = isinstance(values[0], float) - if lookup == 'gt': - value = is_float and values[0] + (1.0/MAX_INT) or values[0] - 1 - args = (name, value, MAX_INT, exclude) - elif lookup == 'gte': - args = (name, values[0], MAX_INT, exclude) - elif lookup == 'lt': - value = is_float and values[0] - (1.0/MAX_INT) or values[0] - 1 - args = (name, -MAX_INT, value, exclude) - elif lookup == 'lte': - args = (name, -MAX_INT, values[0], exclude) - elif lookup == 'range': - args = (name, values[0], values[1], exclude) - else: - raise NotImplementedError, 'Related object and/or field lookup "%s" not supported' % lookup - if is_float: - client.SetFilterFloatRange(*args) - elif not exclude and self._model and name == self._model._meta.pk.column: - client.SetIDRange(*args[1:3]) - else: - client.SetFilterRange(*args) - - else: - client.SetFilter(name, values, exclude) - - # Include filters - if self._filters: - _handle_filters(self._filters) - - # Exclude filters - if self._excludes: - _handle_filters(self._excludes, True) - - if self._groupby: - client.SetGroupBy(self._groupby, self._groupfunc, self._groupsort) - - if self._anchor: - client.SetGeoAnchor(*self._anchor) - - if self._rankmode: - client.SetRankingMode(self._rankmode) - - if not self._limit > 0: - # Fix for Sphinx throwing an assertion error when you pass it an empty limiter - return [] - - - if sphinxapi.VER_COMMAND_SEARCH >= 0x113: - client.SetRetries(SPHINX_RETRIES, SPHINX_RETRIES_DELAY) - - client.SetLimits(int(self._offset), int(self._limit), int(self._maxmatches)) - - results = client.Query(self._query, self._index) - - # The Sphinx API doesn't raise exceptions - if not results: - if client.GetLastError(): - raise SearchError, client.GetLastError() - elif client.GetLastWarning(): - raise SearchError, client.GetLastWarning() - return results - - def _get_results(self): - results = self._get_sphinx_results() - if not results or not results['matches']: - results = [] - elif self._model: - queryset = self._model.objects.all() - if self._select_related: - queryset = queryset.select_related(*self._select_related_fields, **self._select_related_args) - if self._extra: - queryset = queryset.extra(**self._extra) - pks = getattr(self._model._meta, 'pks', None) - if pks is None or len(pks) == 1: - queryset = queryset.filter(pk__in=[r['id'] for r in results['matches']]) - queryset = dict([(o.pk, o) for o in queryset]) - else: - for r in results['matches']: - r['id'] = ', '.join([unicode(r['attrs'][p.column]) for p in pks]) - q = reduce(operator.or_, [reduce(operator.and_, [Q(**{p.name: r['attrs'][p.column]}) for p in pks]) for r in results['matches']]) - if q: - queryset = queryset.filter(q) - queryset = dict([(', '.join([unicode(p) for p in o.pks]), o) for o in queryset]) - else: - queryset = None - - if queryset: - self.__metadata = { - 'total': results['total'], - 'total_found': results['total_found'], - 'words': results['words'], - } - results = [SphinxProxy(queryset[r['id']], r) for r in results['matches'] if r['id'] in queryset] - else: - results = [] - else: - "We did a query without a model, lets see if there's a content_type" - results['attrs'] = dict(results['attrs']) - if 'content_type' in results['attrs']: - "Now we have to do one query per content_type" - objcache = {} - for r in results['matches']: - ct = r['attrs']['content_type'] - if ct not in objcache: - objcache[ct] = {} - objcache[ct][r['id']] = None - for ct in objcache: - queryset = ContentType.objects.get(pk=ct).model_class().objects.filter(pk__in=objcache[ct]) - for o in queryset: - objcache[ct][o.id] = o - results = [objcache[r['attrs']['content_type']][r['id']] for r in results['matches']] - else: - results = results['matches'] - self._result_cache = results - return results - -class SphinxModelManager(object): - def __init__(self, model, **kwargs): - self._model = model - self._index = kwargs.pop('index', model._meta.db_table) - self._kwargs = kwargs - - def _get_query_set(self): - return SphinxQuerySet(self._model, index=self._index, **self._kwargs) - - def get_index(self): - return self._index - - def all(self): - return self._get_query_set() - - def filter(self, **kwargs): - return self._get_query_set().filter(**kwargs) - - def query(self, *args, **kwargs): - return self._get_query_set().query(*args, **kwargs) - - def on_index(self, *args, **kwargs): - return self._get_query_set().on_index(*args, **kwargs) - - def geoanchor(self, *args, **kwargs): - return self._get_query_set().geoanchor(*args, **kwargs) - -class SphinxInstanceManager(object): - """Collection of tools useful for objects which are in a Sphinx index.""" - def __init__(self, instance, index): - self._instance = instance - self._index = index - - def update(self, **kwargs): - assert(sphinxapi.VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 0.98 to use Geo Anchoring.") - sphinxapi.UpdateAttributes(index, kwargs.keys(), dict(self.instance.pk, map(to_sphinx, kwargs.values()))) - - -class SphinxSearch(object): - def __init__(self, index=None, **kwargs): - self._kwargs = kwargs - self._sphinx = None - self._index = index - self.model = None - - def __call__(self, index, **kwargs): - warnings.warn('For non-model searches use a SphinxQuerySet instance.', DeprecationWarning) - return SphinxQuerySet(index=index, **kwargs) - - def __get__(self, instance, model, **kwargs): - if instance: - return SphinxInstanceManager(instance, index) - return self._sphinx - - def contribute_to_class(self, model, name, **kwargs): - if self._index is None: - self._index = model._meta.db_table - self._sphinx = SphinxModelManager(model, index=self._index, **self._kwargs) - self.model = model - if getattr(model, '__sphinx_indexes__', None) is None: - setattr(model, '__sphinx_indexes__', [self._index]) - else: - model.__sphinx_indexes__.append(self._index) - setattr(model, name, self._sphinx) - -class SphinxRelationProxy(SphinxProxy): - def count(self): - return min(self._sphinx['attrs']['@count'], self._maxmatches) - -class SphinxRelation(SphinxSearch): - """ - Adds "related model" support to django-sphinx -- - http://code.google.com/p/django-sphinx/ - http://www.sphinxsearch.com/ - - Example -- - - class MySearch(SphinxSearch): - myrelatedobject = SphinxRelation(RelatedModel) - anotherone = SphinxRelation(AnotherModel) - ... - - class MyModel(models.Model): - search = MySearch('index') - - """ - def __init__(self, model=None, attr=None, sort='@count desc', **kwargs): - if model: - self._related_model = model - self._related_attr = attr or model.__name__.lower() - self._related_sort = sort - super(SphinxRelation, self).__init__(**kwargs) - - def __get__(self, instance, instance_model, **kwargs): - self._mode = instance._mode - self._rankmode = instance._rankmode - self._index = instance._index - self._query = instance._query - self._filters = instance._filters - self._excludes = instance._excludes - self._model = self._related_model - self._groupby = self._related_attr - self._groupsort = self._related_sort - self._groupfunc = sphinxapi.SPH_GROUPBY_ATTR - return self - - def _get_results(self): - results = self._get_sphinx_results() - if not results: return [] - if results['matches'] and self._model: - ids = [] - for r in results['matches']: - value = r['attrs']['@groupby'] - if isinstance(value, (int, long)): - ids.append(value) - else: - ids.extend() - qs = self._model.objects.filter(pk__in=set(ids)) - if self._select_related: - qs = qs.select_related(*self._select_related_fields, - **self._select_related_args) - if self._extra: - qs = qs.extra(**self._extra) - queryset = dict([(o.id, o) for o in qs]) - self.__metadata = { - 'total': results['total'], - 'total_found': results['total_found'], - 'words': results['words'], - } - results = [ SphinxRelationProxy(queryset[k['attrs']['@groupby']], k) \ - for k in results['matches'] \ - if k['attrs']['@groupby'] in queryset ] - else: - results = [] - self._result_cache = results - return results - - def _sphinx(self): - if not self.__metadata: - # We have to force execution if this is accessed beforehand - self._get_data() - return self.__metadata - _sphinx = property(_sphinx) \ No newline at end of file diff --git a/apps/djangosphinx/templates/index-multiple.conf b/apps/djangosphinx/templates/index-multiple.conf deleted file mode 100644 index 3516e4619..000000000 --- a/apps/djangosphinx/templates/index-multiple.conf +++ /dev/null @@ -1,12 +0,0 @@ -index {{ index_name }} -{ - source = {{ source_name }} - path = /var/data/{{ index_name }} - docinfo = extern - morphology = none - stopwords = - min_word_len = 2 - charset_type = sbcs - min_prefix_len = 0 - min_infix_len = 0 -} \ No newline at end of file diff --git a/apps/djangosphinx/templates/index.conf b/apps/djangosphinx/templates/index.conf deleted file mode 100644 index 3516e4619..000000000 --- a/apps/djangosphinx/templates/index.conf +++ /dev/null @@ -1,12 +0,0 @@ -index {{ index_name }} -{ - source = {{ source_name }} - path = /var/data/{{ index_name }} - docinfo = extern - morphology = none - stopwords = - min_word_len = 2 - charset_type = sbcs - min_prefix_len = 0 - min_infix_len = 0 -} \ No newline at end of file diff --git a/apps/djangosphinx/templates/source-multiple.conf b/apps/djangosphinx/templates/source-multiple.conf deleted file mode 100644 index 6f525dcdb..000000000 --- a/apps/djangosphinx/templates/source-multiple.conf +++ /dev/null @@ -1,31 +0,0 @@ -source {{ source_name }} -{ - type = {{ database_engine }} - html_strip = 0 - html_index_attrs = - sql_host = {{ database_host }} - sql_user = {{ database_user }} - sql_pass = {{ database_password }} - sql_db = {{ database_name }} - sql_port = {{ database_port }} - - sql_query_pre = - sql_query_post = - sql_query = \ -{% for table_name, content_type in tables %} - SELECT {{ field_names|join:", " }}, {{ content_type.id }} as content_type \ - FROM `{{ table_name }}`{% if not loop.last %} UNION \{% endif %} -{% endfor %} -{% if group_columns %} - # ForeignKey's -{% for field_name in group_columns %} sql_group_column = {{ field_name }} -{% endfor %}{% endif %} -{% if bool_columns %} - # BooleanField's -{% for field_name in bool_columns %} sql_group_column = {{ field_name }} -{% endfor %}{% endif %} -{% if date_columns %} - # DateField's and DateTimeField's -{% for field_name in date_columns %} sql_date_column = {{ field_name }} -{% endfor %}{% endif %} -} \ No newline at end of file diff --git a/apps/djangosphinx/templates/source.conf b/apps/djangosphinx/templates/source.conf deleted file mode 100644 index a991f6415..000000000 --- a/apps/djangosphinx/templates/source.conf +++ /dev/null @@ -1,31 +0,0 @@ -source {{ source_name }} -{ - type = {{ database_engine }} - strip_html = 0 - index_html_attrs = - sql_host = {{ database_host }} - sql_user = {{ database_user }} - sql_pass = {{ database_password }} - sql_db = {{ database_name }} - sql_port = {{ database_port }} - log = {{ log_file }} - - sql_query_pre = - sql_query_post = - sql_query = \ - SELECT {{ field_names|join:", " }} \ - FROM {{ table_name }} - sql_query_info = SELECT * FROM `{{ table_name }}` WHERE `{{ primary_key }}` = $id -{% if group_columns %} - # ForeignKey's -{% for field_name in group_columns %} sql_group_column = {{ field_name }} -{% endfor %}{% endif %} -{% if bool_columns %} - # BooleanField's -{% for field_name in bool_columns %} sql_group_column = {{ field_name }} -{% endfor %}{% endif %} -{% if date_columns %} - # DateField's and DateTimeField's -{% for field_name in date_columns %} sql_date_column = {{ field_name }} -{% endfor %}{% endif %} -} \ No newline at end of file diff --git a/apps/djangosphinx/utils/__init__.py b/apps/djangosphinx/utils/__init__.py deleted file mode 100644 index 635cf5678..000000000 --- a/apps/djangosphinx/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from config import * \ No newline at end of file diff --git a/apps/djangosphinx/utils/config.py b/apps/djangosphinx/utils/config.py deleted file mode 100644 index d73abd141..000000000 --- a/apps/djangosphinx/utils/config.py +++ /dev/null @@ -1,182 +0,0 @@ -from django.conf import settings -from django.template import Template, Context - -from django.db import models -from django.contrib.contenttypes.models import ContentType - -import os.path - -import djangosphinx.apis.current as sphinxapi - -__all__ = ('generate_config_for_model', 'generate_config_for_models') - -def _get_database_engine(): - if settings.DATABASE_ENGINE == 'mysql': - return settings.DATABASE_ENGINE - elif settings.DATABASE_ENGINE.startswith('postgresql'): - return 'pgsql' - raise ValueError, "Only MySQL and PostgreSQL engines are supported by Sphinx." - -def _get_template(name): - paths = ( - os.path.join(os.path.dirname(__file__), '../apis/api%s/templates/' % (sphinxapi.VER_COMMAND_SEARCH,)), - os.path.join(os.path.dirname(__file__), '../templates/'), - ) - for path in paths: - try: - fp = open(path + name, 'r') - except IOError: - continue - try: - t = Template(fp.read()) - return t - finally: - fp.close() - raise ValueError, "Template matching name does not exist: %s." % (name,) - -def _is_sourcable_field(field): - # We can use float fields in 0.98 - if sphinxapi.VER_COMMAND_SEARCH >= 0x113 and (isinstance(field, models.FloatField) or isinstance(field, models.DecimalField)): - return True - if isinstance(field, models.ForeignKey): - return True - if isinstance(field, models.IntegerField) and field.choices: - return True - if not field.rel: - return True - return False - -# No trailing slashes on paths -DEFAULT_SPHINX_PARAMS = { - 'database_engine': _get_database_engine(), - 'database_host': settings.DATABASE_HOST, - 'database_port': settings.DATABASE_PORT, - 'database_name': settings.DATABASE_NAME, - 'database_user': settings.DATABASE_USER, - 'database_password': settings.DATABASE_PASSWORD, - 'log_file': '/var/log/sphinx/searchd.log', - 'data_path': '/var/data', -} - -# Generate for single models - -def generate_config_for_model(model_class, index=None, sphinx_params={}): - """ - Generates a sample configuration including an index and source for - the given model which includes all attributes and date fields. - """ - return generate_source_for_model(model_class, index, sphinx_params) + "\n\n" + generate_index_for_model(model_class, index, sphinx_params) - -def generate_index_for_model(model_class, index=None, sphinx_params={}): - """Generates a source configmration for a model.""" - t = _get_template('index.conf') - - if index is None: - index = model_class._meta.db_table - - params = DEFAULT_SPHINX_PARAMS - params.update(sphinx_params) - params.update({ - 'index_name': index, - 'source_name': index, - }) - - c = Context(params) - - return t.render(c) - - -def generate_source_for_model(model_class, index=None, sphinx_params={}): - """Generates a source configmration for a model.""" - t = _get_template('source.conf') - - valid_fields = [f for f in model_class._meta.fields if _is_sourcable_field(f)] - - # Hackish solution for a bug I've introduced into composite pks branch - pk = model_class._meta.get_field(model_class._meta.pk.name) - - if pk not in valid_fields: - valid_fields.insert(0, model_class._meta.pk) - - if index is None: - index = model_class._meta.db_table - - params = DEFAULT_SPHINX_PARAMS - params.update(sphinx_params) - params.update({ - 'source_name': index, - 'index_name': index, - 'table_name': index, - 'primary_key': pk.column, - 'field_names': [f.column for f in valid_fields], - 'group_columns': [f.column for f in valid_fields if (f.rel or isinstance(f, models.BooleanField) or isinstance(f, models.IntegerField)) and not f.primary_key], - 'date_columns': [f.column for f in valid_fields if isinstance(f, models.DateTimeField) or isinstance(f, models.DateField)], - 'float_columns': [f.column for f in valid_fields if isinstance(f, models.FloatField) or isinstance(f, models.DecimalField)], - }) - - c = Context(params) - - return t.render(c) - -# Generate for multiple models (search UNIONs) - -def generate_config_for_models(model_classes, index=None, sphinx_params={}): - """ - Generates a sample configuration including an index and source for - the given model which includes all attributes and date fields. - """ - return generate_source_for_models(model_classes, index, sphinx_params) + "\n\n" + generate_index_for_models(model_classes, index, sphinx_params) - -def generate_index_for_models(model_classes, index=None, sphinx_params={}): - """Generates a source configmration for a model.""" - t = _get_template('index-multiple.conf') - - if index is None: - index = '_'.join(m._meta.db_table for m in model_classes) - - params = DEFAULT_SPHINX_PARAMS - params.update(sphinx_params) - params.update({ - 'index_name': index, - 'source_name': index, - }) - - c = Context(params) - - return t.render(c) - -def generate_source_for_models(model_classes, index=None, sphinx_params={}): - """Generates a source configmration for a model.""" - t = _get_template('source-multiple.conf') - - # We need to loop through each model and find only the fields that exist *exactly* the - # same across models. - def _the_tuple(f): - return (f.__class__, f.column, getattr(f.rel, 'to', None), f.choices) - - valid_fields = [_the_tuple(f) for f in model_classes[0]._meta.fields if _is_sourcable_field(f)] - for model_class in model_classes[1:]: - valid_fields = [_the_tuple(f) for f in model_class._meta.fields if _the_tuple(f) in valid_fields] - - tables = [] - for model_class in model_classes: - tables.append((model_class._meta.db_table, ContentType.objects.get_for_model(model_class))) - - if index is None: - index = '_'.join(m._meta.db_table for m in model_classes) - - params = DEFAULT_SPHINX_PARAMS - params.update(sphinx_params) - params.update({ - 'tables': tables, - 'source_name': index, - 'index_name': index, - 'field_names': [f[1] for f in valid_fields], - 'group_columns': [f[1] for f in valid_fields if f[2] or isinstance(f[0], models.BooleanField) or isinstance(f[0], models.IntegerField)], - 'date_columns': [f[1] for f in valid_fields if issubclass(f[0], models.DateTimeField) or issubclass(f[0], models.DateField)], - 'float_columns': [f[1] for f in valid_fields if isinstance(f[0], models.FloatField) or isinstance(f[0], models.DecimalField)], - }) - - c = Context(params) - - return t.render(c) \ No newline at end of file diff --git a/wolnelektury/settings.py b/wolnelektury/settings.py index 2b9d542d0..bdbc9923b 100644 --- a/wolnelektury/settings.py +++ b/wolnelektury/settings.py @@ -96,7 +96,6 @@ INSTALLED_APPS = [ # external 'south', - 'djangosphinx', 'newtagging', 'pagination', 'chunks', @@ -136,9 +135,6 @@ COMPRESS_JS = { COMPRESS_CSS_FILTERS = None -SPHINX_SERVER = 'localhost' -SPHINX_PORT = 3312 - # Load localsettings, if they exist try: diff --git a/wolnelektury/sphinx.conf b/wolnelektury/sphinx.conf deleted file mode 100644 index 80c411286..000000000 --- a/wolnelektury/sphinx.conf +++ /dev/null @@ -1,127 +0,0 @@ -searchd { - port = 3312 - log = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/searchd.log - query_log = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/query.log - read_timeout = 5 - max_children = 30 - pid_file = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/searchd.pid - max_matches = 1000 - seamless_rotate = 1 - preopen_indexes = 0 - unlink_old = 1 -} - -source catalogue_book -{ - type = mysql - strip_html = 0 - index_html_attrs = - sql_host = - sql_user = root - sql_pass = - sql_db = wolnelektury - sql_port = - - sql_query_pre = - sql_query_post = - sql_query = \ - SELECT id, title, slug, description, created_at, _short_html, parent_number, xml_file, html_file, pdf_file, odt_file, txt_file, parent_id \ - FROM catalogue_book - sql_query_info = SELECT * FROM `catalogue_book` WHERE `id` = $id - - # ForeignKey's - sql_group_column = parent_number - sql_group_column = parent_id - - - - # DateField's and DateTimeField's - sql_date_column = created_at - -} - -index catalogue_book -{ - source = catalogue_book - path = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/catalogue_book - docinfo = extern - morphology = none - stopwords = - min_word_len = 2 - charset_type = sbcs - min_prefix_len = 0 - min_infix_len = 0 -} - - -source catalogue_fragment -{ - type = mysql - strip_html = 0 - index_html_attrs = - sql_host = - sql_user = root - sql_pass = - sql_db = wolnelektury - sql_port = - - sql_query_pre = - sql_query_post = - sql_query = \ - SELECT id, text, short_text, _short_html, anchor, book_id \ - FROM catalogue_fragment - sql_query_info = SELECT * FROM `catalogue_fragment` WHERE `id` = $id - - # ForeignKey's - sql_group_column = book_id -} - -index catalogue_fragment -{ - source = catalogue_fragment - path = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/catalogue_fragment - docinfo = extern - morphology = none - stopwords = - min_word_len = 2 - charset_type = sbcs - min_prefix_len = 0 - min_infix_len = 0 -} - -source catalogue_tag -{ - type = mysql - strip_html = 0 - index_html_attrs = - sql_host = - sql_user = root - sql_pass = - sql_db = wolnelektury - sql_port = - - sql_query_pre = - sql_query_post = - sql_query = \ - SELECT id, name, slug, sort_key, category, description, main_page, user_id, book_count \ - FROM catalogue_tag - sql_query_info = SELECT * FROM `catalogue_tag` WHERE `id` = $id - - # ForeignKey's - sql_group_column = main_page - sql_group_column = user_id - sql_group_column = book_count -} - -index catalogue_tag -{ - source = catalogue_tag - path = /Users/zuber/Projekty/wolnelektury.pl-sphinx/wolnelektury/sphinx/catalogue_tag - docinfo = extern - morphology = none - stopwords = - min_word_len = 2 - charset_type = sbcs - min_prefix_len = 0 - min_infix_len = 0 -}