class TermVectorOptions(search.Options):
- option_name = "tv"
-
def __init__(self, schema, original=None):
self.schema = schema
if original is None:
def options(self):
opts = {}
- opts['tv'] = 'true'
+ if self.positions or self.fields:
+ opts['tv'] = 'true'
if self.positions:
opts['tv.positions'] = 'true'
if self.fields:
newself.term_vectorer.update(positions, fields)
return newself
setattr(search.SolrSearch, 'term_vector', __term_vector)
-__original__init_common_modules = search.SolrSearch._init_common_modules
def __patched__init_common_modules(self):
__original__init_common_modules(self)
self.term_vectorer = TermVectorOptions(self.schema)
+__original__init_common_modules = search.SolrSearch._init_common_modules
setattr(search.SolrSearch, '_init_common_modules', __patched__init_common_modules)
end = int(wrd.xpath("int[@name='end']")[0].text)
matches.add((start, end))
- print matches
if matches:
return self.substring(kwargs['text'], matches,
margins=kwargs.get('margins', 30),
return None
def analyze(self, **kwargs):
- doc = self._analyze(self, **kwargs)
- terms = doc.xpath("/lst[@name='index']/arr[last()]/lst/str[1]")
+ doc = self._analyze(**kwargs)
+ terms = doc.xpath("//lst[@name='index']/arr[last()]/lst/str[1]")
terms = map(lambda n: unicode(n.text), terms)
return terms
snip = snip[:s + off] + mark[0] + snip[s + off:]
# maybe break on word boundaries
return snip
+
"uid": "tag%d" % tag.id
}
self.index.add(doc)
- print "%s %s" % (doc['tag_name'], doc['tag_category'])
def create_book_doc(self, book):
"""
class SearchResult(object):
- def __init__(self, doc, how_found=None, query=None):
+ def __init__(self, doc, how_found=None, query=None, query_terms=None):
# self.search = search
self.boost = 1.0
self._hits = []
self._processed_hits = None # processed hits
self.snippets = []
+ self.query_terms = query_terms
if 'score' in doc:
self._score = doc['score']
hit = (sec + (header_span,), fragment, self._score, {
'how_found': how_found,
'snippets_pos': snippets_pos,
- 'snippets_revision': snippets_rev
+ 'snippets_revision': snippets_rev,
+ 'themes': doc.get('themes', []),
+ 'themes_pl': doc.get('themes_pl', [])
})
self._hits.append(hit)
def __unicode__(self):
return u"<SR id=%d %d(%d) hits score=%f %d snippets" % \
(self.book_id, len(self._hits), self._processed_hits and len(self._processed_hits) or -1, self._score, len(self.snippets))
-
+
def __str__(self):
return unicode(self).encode('utf-8')
except catalogue.models.Fragment.DoesNotExist:
# stale index
continue
-
+ print f
# Figure out if we were searching for a token matching some word in theme name.
themes = frag.tags.filter(category='theme')
- themes_hit = []
- # if self.searched is not None:
- # tokens = self.search.get_tokens(self.searched, 'POLISH', cached=self.tokens_cache)
- # for theme in themes:
- # name_tokens = self.search.get_tokens(theme.name, 'POLISH')
- # for t in tokens:
- # if t in name_tokens:
- # if not theme in themes_hit:
- # themes_hit.append(theme)
- # break
+ themes_hit = set()
+ if self.query_terms is not None:
+ for i in range(0, len(f[self.OTHER]['themes'])):
+ tms = f[self.OTHER]['themes'][i].split(r' +') + f[self.OTHER]['themes_pl'][i].split(' ')
+ tms = map(unicode.lower, tms)
+ for qt in self.query_terms:
+ if qt in tms:
+ themes_hit.add(f[self.OTHER]['themes'][i])
+ break
+
+ def theme_by_name(n):
+ th = filter(lambda t: t.name == n, themes)
+ if th:
+ return th[0]
+ else:
+ return None
+ themes_hit = filter(lambda a: a is not None, map(theme_by_name, themes_hit))
m = {'score': f[self.SCORE],
'fragment': frag,
return [SearchResult(found, how_found=u'search_phrase') for found in res]
def search_some(self, searched, fields, book=True,
- filters=None,
- snippets=True):
+ filters=None, snippets=True, query_terms=None):
assert isinstance(fields, list)
if filters is None: filters = []
if book: filters.append(self.index.Q(is_book=True))
query = self.index.query(query)
query = self.apply_filters(query, filters).field_limit(score=True, all_fields=True)
res = query.execute()
- return [SearchResult(found, how_found='search_some') for found in res]
+ return [SearchResult(found, how_found='search_some', query_terms=query_terms) for found in res]
# def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
# """
# return books
- def search_everywhere(self, searched):
+ def search_everywhere(self, searched, query_terms=None):
"""
Tries to use search terms to match different fields of book (or its parts).
E.g. one word can be an author survey, another be a part of the title, and the rest
"""
books = []
# content only query : themes x content
-
q = self.make_term_query(searched, 'text')
q_themes = self.make_term_query(searched, 'themes_pl')
res = query.execute()
for found in res:
- books.append(SearchResult(found, how_found='search_everywhere_themesXcontent'))
+ books.append(SearchResult(found, how_found='search_everywhere_themesXcontent', query_terms=query_terms))
# query themes/content x author/title/tags
in_content = self.index.Q()
q = in_content & in_meta
res = self.index.query(q).field_limit(score=True, all_fields=True).execute()
+
for found in res:
- books.append(SearchResult(found, how_found='search_everywhere'))
+ books.append(SearchResult(found, how_found='search_everywhere', query_terms=query_terms))
return books
context_instance=RequestContext(request))
search = Search()
+ theme_terms = search.index.analyze(text=query, field="themes_pl") \
+ + search.index.analyze(text=query, field="themes")
+
# change hints
tags = search.hint_tags(query, pdcounter=True, prefix=False)
tags = split_tags(tags)
# Boost main author/title results with mixed search, and save some of its results for end of list.
# boost author, title results
- author_title_mixed = search.search_some(query, ['authors', 'title', 'tags'])
+ author_title_mixed = search.search_some(query, ['authors', 'title', 'tags'], query_terms=theme_terms)
author_title_rest = []
for b in author_title_mixed:
# Because the query is using only one field.
text_phrase = SearchResult.aggregate(
search.search_phrase(query, 'text', snippets=True, book=False),
- search.search_some(query, ['text'], snippets=True, book=False))
+ search.search_some(query, ['text'], snippets=True, book=False, query_terms=theme_terms))
- everywhere = search.search_everywhere(query)
+ everywhere = search.search_everywhere(query, query_terms=theme_terms)
def already_found(results):
def f(e):