2 from sunburnt import sunburnt
8 class HLSolrConnection(sunburnt.SolrConnection):
9 def __init__(self, *args, **kw):
10 super(HLSolrConnection, self).__init__(*args, **kw)
11 self.analysis_url = self.url + "analysis/field/"
13 def highlight(self, params):
14 qs = urllib.urlencode(params)
15 url = "%s?%s" % (self.analysis_url, qs)
16 if len(url) > self.max_length_get_url:
17 warnings.warn("Long query URL encountered - POSTing instead of "
18 "GETting. This query will not be cached at the HTTP layer")
19 url = self.analysis_url
23 headers={"Content-Type": "application/x-www-form-urlencoded"},
26 kwargs = dict(method="GET")
27 r, c = self.request(url, **kwargs)
29 raise sunburnt.SolrError(r, c)
33 class HLSolrInterface(sunburnt.SolrInterface):
34 # just copied from parent and SolrConnection -> HLSolrConnection
35 def __init__(self, url, schemadoc=None, http_connection=None, mode='', retry_timeout=-1, max_length_get_url=sunburnt.MAX_LENGTH_GET_URL):
36 self.conn = HLSolrConnection(url, http_connection, retry_timeout, max_length_get_url)
37 self.schemadoc = schemadoc
39 self.writeable = False
44 def highlight(self, **kwargs):
46 raise TypeError("This Solr instance is only for writing")
48 'analysis_fieldname': kwargs['field'],
49 'analysis_showmatch': True,
50 'analysis_fieldvalue': kwargs['text'],
53 params = map(lambda (k, v): (k.replace('_', '.'), v), sunburnt.params_from_dict(**args))
55 content = self.conn.highlight(params)
56 doc = etree.fromstring(content)
57 analyzed = doc.xpath("//lst[@name='index']/arr[last()]/lst[bool/@name='match']")
60 start = int(wrd.xpath("int[@name='start']")[0].text)
61 end = int(wrd.xpath("int[@name='end']")[0].text)
62 matches.add((start, end))
65 return self.substring(kwargs['text'], matches,
66 margins=kwargs.get('margins', 30),
67 mark=kwargs.get('mark', ("<b>", "</b>")))
69 def substring(self, text, matches, margins=30, mark=("<b>", "</b>")):
73 matches_margins = map(lambda (s, e): (max(0, s - margins), min(totlen, e + margins)), matches)
74 (start, end) = matches_margins[0]
76 for (s, e) in matches_margins[1:]:
77 if end < s or start > e:
82 snip = text[start:end]
83 matches = list(matches)
84 matches.sort(lambda a, b: cmp(b[0], a[0]))
85 for (s, e) in matches:
87 snip = text[:e + off] + mark[1] + snip[e + off:]
88 snip = text[:s + off] + mark[0] + snip[s + off:]
89 # maybe break on word boundaries