apps/djangosphinx/apis/api275/__init__.py

   1 #
   2 # $Id: sphinxapi.py 1216 2008-03-14 23:25:39Z shodan $
   3 #
   4 # Python version of Sphinx searchd client (Python API)
   5 #
   6 # Copyright (c) 2006-2008, Andrew Aksyonoff
   7 # Copyright (c) 2006, Mike Osadnik
   8 # All rights reserved
   9 #
  10 # This program is free software; you can redistribute it and/or modify
  11 # it under the terms of the GNU General Public License. You should have
  12 # received a copy of the GPL license along with this program; if you
  13 # did not, you can find it at http://www.gnu.org/
  14 #
  15
  16 import sys
  17 import select
  18 import socket
  19 from struct import *
  20
  21
  22 # known searchd commands
  23 SEARCHD_COMMAND_SEARCH  = 0
  24 SEARCHD_COMMAND_EXCERPT = 1
  25 SEARCHD_COMMAND_UPDATE  = 2
  26 SEARCHD_COMMAND_KEYWORDS= 3
  27
  28 # current client-side command implementation versions
  29 VER_COMMAND_SEARCH              = 0x113
  30 VER_COMMAND_EXCERPT             = 0x100
  31 VER_COMMAND_UPDATE              = 0x101
  32 VER_COMMAND_KEYWORDS    = 0x100
  33
  34 # known searchd status codes
  35 SEARCHD_OK                              = 0
  36 SEARCHD_ERROR                   = 1
  37 SEARCHD_RETRY                   = 2
  38 SEARCHD_WARNING                 = 3
  39
  40 # known match modes
  41 SPH_MATCH_ALL                   = 0
  42 SPH_MATCH_ANY                   = 1
  43 SPH_MATCH_PHRASE                = 2
  44 SPH_MATCH_BOOLEAN               = 3
  45 SPH_MATCH_EXTENDED              = 4
  46 SPH_MATCH_FULLSCAN              = 5
  47 SPH_MATCH_EXTENDED2             = 6
  48
  49 # known ranking modes (extended2 mode only)
  50 SPH_RANK_PROXIMITY_BM25 = 0 # default mode, phrase proximity major factor and BM25 minor one
  51 SPH_RANK_BM25                   = 1 # statistical mode, BM25 ranking only (faster but worse quality)
  52 SPH_RANK_NONE                   = 2 # no ranking, all matches get a weight of 1
  53 SPH_RANK_WORDCOUNT              = 3 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  54
  55 # known sort modes
  56 SPH_SORT_RELEVANCE              = 0
  57 SPH_SORT_ATTR_DESC              = 1
  58 SPH_SORT_ATTR_ASC               = 2
  59 SPH_SORT_TIME_SEGMENTS  = 3
  60 SPH_SORT_EXTENDED               = 4
  61 SPH_SORT_EXPR                   = 5
  62
  63 # known filter types
  64 SPH_FILTER_VALUES               = 0
  65 SPH_FILTER_RANGE                = 1
  66 SPH_FILTER_FLOATRANGE   = 2
  67
  68 # known attribute types
  69 SPH_ATTR_NONE                   = 0
  70 SPH_ATTR_INTEGER                = 1
  71 SPH_ATTR_TIMESTAMP              = 2
  72 SPH_ATTR_ORDINAL                = 3
  73 SPH_ATTR_BOOL                   = 4
  74 SPH_ATTR_FLOAT                  = 5
  75 SPH_ATTR_MULTI                  = 0X40000000L
  76
  77 # known grouping functions
  78 SPH_GROUPBY_DAY                 = 0
  79 SPH_GROUPBY_WEEK                = 1
  80 SPH_GROUPBY_MONTH               = 2
  81 SPH_GROUPBY_YEAR                = 3
  82 SPH_GROUPBY_ATTR                = 4
  83
  84
  85 class SphinxClient:
  86         def __init__ (self):
  87                 """
  88                 Create a new client object, and fill defaults.
  89                 """
  90                 self._host                      = 'localhost'                                   # searchd host (default is "localhost")
  91                 self._port                      = 3312                                                  # searchd port (default is 3312)
  92                 self._offset            = 0                                                             # how much records to seek from result-set start (default is 0)
  93                 self._limit                     = 20                                                    # how much records to return from result-set starting at offset (default is 20)
  94                 self._mode                      = SPH_MATCH_ALL                                 # query matching mode (default is SPH_MATCH_ALL)
  95                 self._weights           = []                                                    # per-field weights (default is 1 for all fields)
  96                 self._sort                      = SPH_SORT_RELEVANCE                    # match sorting mode (default is SPH_SORT_RELEVANCE)
  97                 self._sortby            = ''                                                    # attribute to sort by (defualt is "")
  98                 self._min_id            = 0                                                             # min ID to match (default is 0)
  99                 self._max_id            = 0xFFFFFFFF                                    # max ID to match (default is UINT_MAX)
 100                 self._filters           = []                                                    # search filters
 101                 self._groupby           = ''                                                    # group-by attribute name
 102                 self._groupfunc         = SPH_GROUPBY_DAY                               # group-by function (to pre-process group-by attribute value with)
 103                 self._groupsort         = '@group desc'                                 # group-by sorting clause (to sort groups in result set with)
 104                 self._groupdistinct     = ''                                                    # group-by count-distinct attribute
 105                 self._maxmatches        = 1000                                                  # max matches to retrieve
 106                 self._cutoff            = 0                                                             # cutoff to stop searching at
 107                 self._retrycount        = 0                                                             # distributed retry count
 108                 self._retrydelay        = 0                                                             # distributed retry delay
 109                 self._anchor            = {}                                                    # geographical anchor point
 110                 self._indexweights      = {}                                                    # per-index weights
 111                 self._ranker            = SPH_RANK_PROXIMITY_BM25               # ranking mode
 112                 self._maxquerytime      = 0                                                             # max query time, milliseconds (default is 0, do not limit)
 113                 self._fieldweights      = {}                                                    # per-field-name weights
 114                 self._error                     = ''                                                    # last error message
 115                 self._warning           = ''                                                    # last warning message
 116                 self._reqs                      = []                                                    # requests array for multi-query
 117                 return
 118
 119
 120         def GetLastError (self):
 121                 """
 122                 Get last error message (string).
 123                 """
 124                 return self._error
 125
 126
 127         def GetLastWarning (self):
 128                 """
 129                 Get last warning message (string).
 130                 """
 131                 return self._warning
 132
 133
 134         def SetServer (self, host, port):
 135                 """
 136                 Set searchd server host and port.
 137                 """
 138                 assert(isinstance(host, str))
 139                 assert(isinstance(port, int))
 140                 self._host = host
 141                 self._port = port
 142
 143
 144         def _Connect (self):
 145                 """
 146                 INTERNAL METHOD, DO NOT CALL. Connects to searchd server.
 147                 """
 148                 try:
 149                         sock = socket.socket ( socket.AF_INET, socket.SOCK_STREAM )
 150                         sock.connect ( ( self._host, self._port ) )
 151                 except socket.error, msg:
 152                         if sock:
 153                                 sock.close()
 154                         self._error = 'connection to %s:%s failed (%s)' % ( self._host, self._port, msg )
 155                         return 0
 156
 157                 v = unpack('>L', sock.recv(4))
 158                 if v<1:
 159                         sock.close()
 160                         self._error = 'expected searchd protocol version, got %s' % v
 161                         return 0
 162
 163                 # all ok, send my version
 164                 sock.send(pack('>L', 1))
 165                 return sock
 166
 167
 168         def _GetResponse (self, sock, client_ver):
 169                 """
 170                 INTERNAL METHOD, DO NOT CALL. Gets and checks response packet from searchd server.
 171                 """
 172                 (status, ver, length) = unpack('>2HL', sock.recv(8))
 173                 response = ''
 174                 left = length
 175                 while left>0:
 176                         chunk = sock.recv(left)
 177                         if chunk:
 178                                 response += chunk
 179                                 left -= len(chunk)
 180                         else:
 181                                 break
 182
 183                 sock.close()
 184
 185                 # check response
 186                 read = len(response)
 187                 if not response or read!=length:
 188                         if length:
 189                                 self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \
 190                                         % (status, ver, length, read)
 191                         else:
 192                                 self._error = 'received zero-sized searchd response'
 193                         return None
 194
 195                 # check status
 196                 if status==SEARCHD_WARNING:
 197                         wend = 4 + unpack ( '>L', response[0:4] )[0]
 198                         self._warning = response[4:wend]
 199                         return response[wend:]
 200
 201                 if status==SEARCHD_ERROR:
 202                         self._error = 'searchd error: '+response[4:]
 203                         return None
 204
 205                 if status==SEARCHD_RETRY:
 206                         self._error = 'temporary searchd error: '+response[4:]
 207                         return None
 208
 209                 if status!=SEARCHD_OK:
 210                         self._error = 'unknown status code %d' % status
 211                         return None
 212
 213                 # check version
 214                 if ver<client_ver:
 215                         self._warning = 'searchd command v.%d.%d older than client\'s v.%d.%d, some options might not work' \
 216                                 % (ver>>8, ver&0xff, client_ver>>8, client_ver&0xff)
 217
 218                 return response
 219
 220
 221         def SetLimits (self, offset, limit, maxmatches=0, cutoff=0):
 222                 """
 223                 Set offset and count into result set, and optionally set max-matches and cutoff limits.
 224                 """
 225                 assert(isinstance(offset, int) and offset>=0)
 226                 assert(isinstance(limit, int) and limit>0)
 227                 assert(maxmatches>=0)
 228                 self._offset = offset
 229                 self._limit = limit
 230                 if maxmatches>0:
 231                         self._maxmatches = maxmatches
 232                 if cutoff>=0:
 233                         self._cutoff = cutoff
 234
 235
 236         def SetMaxQueryTime (self, maxquerytime):
 237                 """
 238                 Set maximum query time, in milliseconds, per-index. 0 means 'do not limit'.
 239                 """
 240                 assert(isinstance(maxquerytime,int) and maxquerytime>0)
 241                 self._maxquerytime = maxquerytime
 242
 243
 244         def SetMatchMode (self, mode):
 245                 """
 246                 Set matching mode.
 247                 """
 248                 assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED, SPH_MATCH_FULLSCAN, SPH_MATCH_EXTENDED2])
 249                 self._mode = mode
 250
 251
 252         def SetRankingMode (self, ranker):
 253                 """
 254                 Set ranking mode.
 255                 """
 256                 assert(ranker in [SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, SPH_RANK_NONE, SPH_RANK_WORDCOUNT])
 257                 self._ranker = ranker
 258
 259
 260         def SetSortMode ( self, mode, clause='' ):
 261                 """
 262                 Set sorting mode.
 263                 """
 264                 assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED, SPH_SORT_EXPR] )
 265                 assert ( isinstance ( clause, str ) )
 266                 self._sort = mode
 267                 self._sortby = clause
 268
 269
 270         def SetWeights (self, weights):
 271                 """
 272                 Set per-field weights.
 273                 WARNING, DEPRECATED; do not use it! use SetFieldWeights() instead
 274                 """
 275                 assert(isinstance(weights, list))
 276                 for w in weights:
 277                         assert(isinstance(w, int))
 278                 self._weights = weights
 279
 280
 281         def SetFieldWeights (self, weights):
 282                 """
 283                 Bind per-field weights by name; expects (name,field_weight) dictionary as argument.
 284                 """
 285                 assert(isinstance(weights,dict))
 286                 for key,val in weights.items():
 287                         assert(isinstance(key,str))
 288                         assert(isinstance(val,int))
 289                 self._fieldweights = weights
 290
 291
 292         def SetIndexWeights (self, weights):
 293                 """
 294                 Bind per-index weights by name; expects (name,index_weight) dictionary as argument.
 295                 """
 296                 assert(isinstance(weights,dict))
 297                 for key,val in weights.items():
 298                         assert(isinstance(key,str))
 299                         assert(isinstance(val,int))
 300                 self._indexweights = weights
 301
 302
 303         def SetIDRange (self, minid, maxid):
 304                 """
 305                 Set IDs range to match.
 306                 Only match records if document ID is beetwen $min and $max (inclusive).
 307                 """
 308                 assert(isinstance(minid, int))
 309                 assert(isinstance(maxid, int))
 310                 assert(minid<=maxid)
 311                 self._min_id = minid
 312                 self._max_id = maxid
 313
 314
 315         def SetFilter ( self, attribute, values, exclude=0 ):
 316                 """
 317                 Set values set filter.
 318                 Only match records where 'attribute' value is in given 'values' set.
 319                 """
 320                 assert(isinstance(attribute, str))
 321                 assert(isinstance(values, list))
 322                 assert(values)
 323
 324                 for value in values:
 325                         assert(isinstance(value, int))
 326
 327                 self._filters.append ( { 'type':SPH_FILTER_VALUES, 'attr':attribute, 'exclude':exclude, 'values':values } )
 328
 329
 330         def SetFilterRange (self, attribute, min_, max_, exclude=0 ):
 331                 """
 332                 Set range filter.
 333                 Only match records if 'attribute' value is beetwen 'min_' and 'max_' (inclusive).
 334                 """
 335                 assert(isinstance(attribute, str))
 336                 assert(isinstance(min_, int))
 337                 assert(isinstance(max_, int))
 338                 assert(min_<=max_)
 339
 340                 self._filters.append ( { 'type':SPH_FILTER_RANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } )
 341
 342
 343         def SetFilterFloatRange (self, attribute, min_, max_, exclude=0 ):
 344                 assert(isinstance(attribute,str))
 345                 assert(isinstance(min_,float))
 346                 assert(isinstance(max_,float))
 347                 assert(min_ <= max_)
 348                 self._filters.append ( {'type':SPH_FILTER_FLOATRANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_} )
 349
 350
 351         def SetGeoAnchor (self, attrlat, attrlong, latitude, longitude):
 352                 assert(isinstance(attrlat,str))
 353                 assert(isinstance(attrlong,str))
 354                 assert(isinstance(latitude,float))
 355                 assert(isinstance(longitude,float))
 356                 self._anchor['attrlat'] = attrlat
 357                 self._anchor['attrlong'] = attrlong
 358                 self._anchor['lat'] = latitude
 359                 self._anchor['long'] = longitude
 360
 361
 362         def SetGroupBy ( self, attribute, func, groupsort='@group desc' ):
 363                 """
 364                 Set grouping attribute and function.
 365                 """
 366                 assert(isinstance(attribute, str))
 367                 assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR] )
 368                 assert(isinstance(groupsort, str))
 369
 370                 self._groupby = attribute
 371                 self._groupfunc = func
 372                 self._groupsort = groupsort
 373
 374
 375         def SetGroupDistinct (self, attribute):
 376                 assert(isinstance(attribute,str))
 377                 self._groupdistinct = attribute
 378
 379
 380         def SetRetries (self, count, delay=0):
 381                 assert(isinstance(count,int) and count>=0)
 382                 assert(isinstance(delay,int) and delay>=0)
 383                 self._retrycount = count
 384                 self._retrydelay = delay
 385
 386
 387         def ResetFilters (self):
 388                 """
 389                 Clear all filters (for multi-queries).
 390                 """
 391                 self._filters = []
 392                 self._anchor = {}
 393
 394
 395         def ResetGroupBy (self):
 396                 """
 397                 Clear groupby settings (for multi-queries).
 398                 """
 399                 self._groupby = ''
 400                 self._groupfunc = SPH_GROUPBY_DAY
 401                 self._groupsort = '@group desc'
 402                 self._groupdistinct = ''
 403
 404
 405         def Query (self, query, index='*', comment=''):
 406                 """
 407                 Connect to searchd server and run given search query.
 408                 Returns None on failure; result set hash on success (see documentation for details).
 409                 """
 410                 assert(len(self._reqs)==0)
 411                 self.AddQuery(query,index,comment)
 412                 results = self.RunQueries()
 413
 414                 if not results or len(results)==0:
 415                         return None
 416                 self._error = results[0]['error']
 417                 self._warning = results[0]['warning']
 418                 if results[0]['status'] == SEARCHD_ERROR:
 419                         return None
 420                 return results[0]
 421
 422
 423         def AddQuery (self, query, index='*', comment=''):
 424                 """
 425                 Add query to batch.
 426                 """
 427                 # build request
 428                 req = [pack('>5L', self._offset, self._limit, self._mode, self._ranker, self._sort)]
 429                 req.append(pack('>L', len(self._sortby)))
 430                 req.append(self._sortby)
 431
 432                 if isinstance(query,unicode):
 433                         query = query.encode('utf-8')
 434                 assert(isinstance(query,str))
 435
 436                 req.append(pack('>L', len(query)))
 437                 req.append(query)
 438
 439                 req.append(pack('>L', len(self._weights)))
 440                 for w in self._weights:
 441                         req.append(pack('>L', w))
 442                 req.append(pack('>L', len(index)))
 443                 req.append(index)
 444                 req.append(pack('>L',0)) # id64 range marker FIXME! IMPLEMENT!
 445                 req.append(pack('>L', self._min_id))
 446                 req.append(pack('>L', self._max_id))
 447
 448                 # filters
 449                 req.append ( pack ( '>L', len(self._filters) ) )
 450                 for f in self._filters:
 451                         req.append ( pack ( '>L', len(f['attr'])) + f['attr'])
 452                         filtertype = f['type']
 453                         req.append ( pack ( '>L', filtertype))
 454                         if filtertype == SPH_FILTER_VALUES:
 455                                 req.append ( pack ('>L', len(f['values'])))
 456                                 for val in f['values']:
 457                                         req.append ( pack ('>L', val))
 458                         elif filtertype == SPH_FILTER_RANGE:
 459                                 req.append ( pack ('>2L', f['min'], f['max']))
 460                         elif filtertype == SPH_FILTER_FLOATRANGE:
 461                                 req.append ( pack ('>2f', f['min'], f['max']))
 462                         req.append ( pack ( '>L', f['exclude'] ) )
 463
 464                 # group-by, max-matches, group-sort
 465                 req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) )
 466                 req.append ( self._groupby )
 467                 req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) )
 468                 req.append ( self._groupsort )
 469                 req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay))
 470                 req.append ( pack ( '>L', len(self._groupdistinct)))
 471                 req.append ( self._groupdistinct)
 472
 473                 # anchor point
 474                 if len(self._anchor) == 0:
 475                         req.append ( pack ('>L', 0))
 476                 else:
 477                         attrlat, attrlong = self._anchor['attrlat'], self._anchor['attrlong']
 478                         latitude, longitude = self._anchor['lat'], self._anchor['long']
 479                         req.append ( pack ('>L', 1))
 480                         req.append ( pack ('>L', len(attrlat)) + attrlat)
 481                         req.append ( pack ('>L', len(attrlong)) + attrlong)
 482                         req.append ( pack ('>f', latitude) + pack ('>f', longitude))
 483
 484                 # per-index weights
 485                 req.append ( pack ('>L',len(self._indexweights)))
 486                 for indx,weight in self._indexweights.items():
 487                         req.append ( pack ('>L',len(indx)) + indx + pack ('>L',weight))
 488
 489                 # max query time
 490                 req.append ( pack ('>L', self._maxquerytime) )
 491
 492                 # per-field weights
 493                 req.append ( pack ('>L',len(self._fieldweights) ) )
 494                 for field,weight in self._fieldweights.items():
 495                         req.append ( pack ('>L',len(field)) + field + pack ('>L',weight) )
 496
 497                 # comment
 498                 req.append ( pack('>L',len(comment)) + comment )
 499
 500                 # send query, get response
 501                 req = ''.join(req)
 502
 503                 self._reqs.append(req)
 504                 return
 505
 506
 507         def RunQueries (self):
 508                 """
 509                 Run queries batch.
 510                 Returns None on network IO failure; or an array of result set hashes on success.
 511                 """
 512                 if len(self._reqs)==0:
 513                         self._error = 'no queries defined, issue AddQuery() first'
 514                         return None
 515
 516                 sock = self._Connect()
 517                 if not sock:
 518                         return None
 519
 520                 req = ''.join(self._reqs)
 521                 length = len(req)+4
 522                 req = pack('>HHLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, len(self._reqs))+req
 523                 sock.send(req)
 524
 525                 response = self._GetResponse(sock, VER_COMMAND_SEARCH)
 526                 if not response:
 527                         return None
 528
 529                 nreqs = len(self._reqs)
 530
 531                 # parse response
 532                 max_ = len(response)
 533                 p = 0
 534
 535                 results = []
 536                 for i in range(0,nreqs,1):
 537                         result = {}
 538                         result['error'] = ''
 539                         result['warning'] = ''
 540                         status = unpack('>L', response[p:p+4])[0]
 541                         p += 4
 542                         result['status'] = status
 543                         if status != SEARCHD_OK:
 544                                 length = unpack('>L', response[p:p+4])[0]
 545                                 p += 4
 546                                 message = response[p:p+length]
 547                                 p += length
 548
 549                                 if status == SEARCHD_WARNING:
 550                                         result['warning'] = message
 551                                 else:
 552                                         result['error'] = message
 553                                         continue
 554
 555                         # read schema
 556                         fields = []
 557                         attrs = []
 558
 559                         nfields = unpack('>L', response[p:p+4])[0]
 560                         p += 4
 561                         while nfields>0 and p<max_:
 562                                 nfields -= 1
 563                                 length = unpack('>L', response[p:p+4])[0]
 564                                 p += 4
 565                                 fields.append(response[p:p+length])
 566                                 p += length
 567
 568                         result['fields'] = fields
 569
 570                         nattrs = unpack('>L', response[p:p+4])[0]
 571                         p += 4
 572                         while nattrs>0 and p<max_:
 573                                 nattrs -= 1
 574                                 length = unpack('>L', response[p:p+4])[0]
 575                                 p += 4
 576                                 attr = response[p:p+length]
 577                                 p += length
 578                                 type_ = unpack('>L', response[p:p+4])[0]
 579                                 p += 4
 580                                 attrs.append([attr,type_])
 581
 582                         result['attrs'] = attrs
 583
 584                         # read match count
 585                         count = unpack('>L', response[p:p+4])[0]
 586                         p += 4
 587                         id64 = unpack('>L', response[p:p+4])[0]
 588                         p += 4
 589
 590                         # read matches
 591                         result['matches'] = []
 592                         while count>0 and p<max_:
 593                                 count -= 1
 594                                 if id64:
 595                                         dochi, doc, weight = unpack('>3L', response[p:p+12])
 596                                         doc += (dochi<<32)
 597                                         p += 12
 598                                 else:
 599                                         doc, weight = unpack('>2L', response[p:p+8])
 600                                         p += 8
 601
 602                                 match = { 'id':doc, 'weight':weight, 'attrs':{} }
 603                                 for i in range(len(attrs)):
 604                                         if attrs[i][1] == SPH_ATTR_FLOAT:
 605                                                 match['attrs'][attrs[i][0]] = unpack('>f', response[p:p+4])[0]
 606                                         elif attrs[i][1] == (SPH_ATTR_MULTI | SPH_ATTR_INTEGER):
 607                                                 match['attrs'][attrs[i][0]] = []
 608                                                 nvals = unpack('>L', response[p:p+4])[0]
 609                                                 p += 4
 610                                                 for n in range(0,nvals,1):
 611                                                         match['attrs'][attrs[i][0]].append(unpack('>L', response[p:p+4])[0])
 612                                                         p += 4
 613                                                 p -= 4
 614                                         else:
 615                                                 match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0]
 616                                         p += 4
 617
 618                                 result['matches'].append ( match )
 619
 620                         result['total'], result['total_found'], result['time'], words = unpack('>4L', response[p:p+16])
 621
 622                         result['time'] = '%.3f' % (result['time']/1000.0)
 623                         p += 16
 624
 625                         result['words'] = []
 626                         while words>0:
 627                                 words -= 1
 628                                 length = unpack('>L', response[p:p+4])[0]
 629                                 p += 4
 630                                 word = response[p:p+length]
 631                                 p += length
 632                                 docs, hits = unpack('>2L', response[p:p+8])
 633                                 p += 8
 634
 635                                 result['words'].append({'word':word, 'docs':docs, 'hits':hits})
 636
 637                         results.append(result)
 638
 639                 self._reqs = []
 640                 sock.close()
 641                 return results
 642
 643
 644         def BuildExcerpts (self, docs, index, words, opts=None):
 645                 """
 646                 Connect to searchd server and generate exceprts from given documents.
 647                 """
 648                 if not opts:
 649                         opts = {}
 650                 if isinstance(words,unicode):
 651                         words = words.encode('utf-8')
 652
 653                 assert(isinstance(docs, list))
 654                 assert(isinstance(index, str))
 655                 assert(isinstance(words, str))
 656                 assert(isinstance(opts, dict))
 657
 658                 sock = self._Connect()
 659
 660                 if not sock:
 661                         return None
 662
 663                 # fixup options
 664                 opts.setdefault('before_match', '<b>')
 665                 opts.setdefault('after_match', '</b>')
 666                 opts.setdefault('chunk_separator', ' ... ')
 667                 opts.setdefault('limit', 256)
 668                 opts.setdefault('around', 5)
 669
 670                 # build request
 671                 # v.1.0 req
 672
 673                 # mode=0, flags=1 (remove spaces)
 674                 req = [pack('>2L', 0, 1)]
 675
 676                 # req index
 677                 req.append(pack('>L', len(index)))
 678                 req.append(index)
 679
 680                 # req words
 681                 req.append(pack('>L', len(words)))
 682                 req.append(words)
 683
 684                 # options
 685                 req.append(pack('>L', len(opts['before_match'])))
 686                 req.append(opts['before_match'])
 687
 688                 req.append(pack('>L', len(opts['after_match'])))
 689                 req.append(opts['after_match'])
 690
 691                 req.append(pack('>L', len(opts['chunk_separator'])))
 692                 req.append(opts['chunk_separator'])
 693
 694                 req.append(pack('>L', int(opts['limit'])))
 695                 req.append(pack('>L', int(opts['around'])))
 696
 697                 # documents
 698                 req.append(pack('>L', len(docs)))
 699                 for doc in docs:
 700                         if isinstance(doc,unicode):
 701                                 doc = doc.encode('utf-8')
 702                         assert(isinstance(doc, str))
 703                         req.append(pack('>L', len(doc)))
 704                         req.append(doc)
 705
 706                 req = ''.join(req)
 707
 708                 # send query, get response
 709                 length = len(req)
 710
 711                 # add header
 712                 req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req
 713                 wrote = sock.send(req)
 714
 715                 response = self._GetResponse(sock, VER_COMMAND_EXCERPT )
 716                 if not response:
 717                         return []
 718
 719                 # parse response
 720                 pos = 0
 721                 res = []
 722                 rlen = len(response)
 723
 724                 for i in range(len(docs)):
 725                         length = unpack('>L', response[pos:pos+4])[0]
 726                         pos += 4
 727
 728                         if pos+length > rlen:
 729                                 self._error = 'incomplete reply'
 730                                 return []
 731
 732                         res.append(response[pos:pos+length])
 733                         pos += length
 734
 735                 return res
 736
 737
 738         def UpdateAttributes ( self, index, attrs, values ):
 739                 """
 740                 Update given attribute values on given documents in given indexes.
 741                 Returns amount of updated documents (0 or more) on success, or -1 on failure.
 742
 743                 'attrs' must be a list of strings.
 744                 'values' must be a dict with int key (document ID) and list of int values (new attribute values).
 745
 746                 Example:
 747                         res = cl.UpdateAttributes ( 'test1', [ 'group_id', 'date_added' ], { 2:[123,1000000000], 4:[456,1234567890] } )
 748                 """
 749                 assert ( isinstance ( index, str ) )
 750                 assert ( isinstance ( attrs, list ) )
 751                 assert ( isinstance ( values, dict ) )
 752                 for attr in attrs:
 753                         assert ( isinstance ( attr, str ) )
 754                 for docid, entry in values.items():
 755                         assert ( isinstance ( docid, int ) )
 756                         assert ( isinstance ( entry, list ) )
 757                         assert ( len(attrs)==len(entry) )
 758                         for val in entry:
 759                                 assert ( isinstance ( val, int ) )
 760
 761                 # build request
 762                 req = [ pack('>L',len(index)), index ]
 763
 764                 req.append ( pack('>L',len(attrs)) )
 765                 for attr in attrs:
 766                         req.append ( pack('>L',len(attr)) + attr )
 767
 768                 req.append ( pack('>L',len(values)) )
 769                 for docid, entry in values.items():
 770                         req.append ( pack('>q',docid) )
 771                         for val in entry:
 772                                 req.append ( pack('>L',val) )
 773
 774                 # connect, send query, get response
 775                 sock = self._Connect()
 776                 if not sock:
 777                         return None
 778
 779                 req = ''.join(req)
 780                 length = len(req)
 781                 req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req
 782                 wrote = sock.send ( req )
 783
 784                 response = self._GetResponse ( sock, VER_COMMAND_UPDATE )
 785                 if not response:
 786                         return -1
 787
 788                 # parse response
 789                 updated = unpack ( '>L', response[0:4] )[0]
 790                 return updated
 791
 792
 793         def BuildKeywords ( self, query, index, hits ):
 794                 """
 795                 Connect to searchd server, and generate keywords list for a given query.
 796                 Returns None on failure, or a list of keywords on success.
 797                 """
 798                 assert ( isinstance ( query, str ) )
 799                 assert ( isinstance ( index, str ) )
 800                 assert ( isinstance ( hits, int ) )
 801
 802                 # build request
 803                 req = [ pack ( '>L', len(query) ) + query ]
 804                 req.append ( pack ( '>L', len(index) ) + index )
 805                 req.append ( pack ( '>L', hits ) )
 806
 807                 # connect, send query, get response
 808                 sock = self._Connect()
 809                 if not sock:
 810                         return None
 811
 812                 req = ''.join(req)
 813                 length = len(req)
 814                 req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req
 815                 wrote = sock.send ( req )
 816
 817                 response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS )
 818                 if not response:
 819                         return None
 820
 821                 # parse response
 822                 res = []
 823
 824                 nwords = unpack ( '>L', response[0:4] )[0]
 825                 p = 4
 826                 max_ = len(response)
 827
 828                 while nwords>0 and p<max_:
 829                         nwords -= 1
 830
 831                         length = unpack ( '>L', response[p:p+4] )[0]
 832                         p += 4
 833                         tokenized = response[p:p+length]
 834                         p += length
 835
 836                         length = unpack ( '>L', response[p:p+4] )[0]
 837                         p += 4
 838                         normalized = response[p:p+length]
 839                         p += length
 840
 841                         entry = { 'tokenized':tokenized, 'normalized':normalized }
 842                         if hits:
 843                                 entry['docs'], entry['hits'] = unpack ( '>2L', response[p:p+8] )
 844                                 p += 8
 845
 846                         res.append ( entry )
 847
 848                 if nwords>0 or p>max_:
 849                         self._error = 'incomplete reply'
 850                         return None
 851
 852                 return res
 853 #
 854 # $Id: sphinxapi.py 1216 2008-03-14 23:25:39Z shodan $
 855 #