fnp
/
wolnelektury.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
dbb6eb3
)
some fixes to last search commit
author
Marcin Koziej
<marcin@lolownia.org>
Thu, 22 Mar 2012 10:21:22 +0000
(11:21 +0100)
committer
Marcin Koziej
<marcin@lolownia.org>
Thu, 22 Mar 2012 10:21:22 +0000
(11:21 +0100)
apps/search/index.py
patch
|
blob
|
history
diff --git
a/apps/search/index.py
b/apps/search/index.py
index
9d6d598
..
312cf94
100644
(file)
--- a/
apps/search/index.py
+++ b/
apps/search/index.py
@@
-31,9
+31,11
@@
import catalogue.models
from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
from multiprocessing.pool import ThreadPool
from threading import current_thread
from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
from multiprocessing.pool import ThreadPool
from threading import current_thread
+from itertools import chain
import atexit
import traceback
import atexit
import traceback
-
+import logging
+log = logging.getLogger('search')
class WLAnalyzer(PerFieldAnalyzerWrapper):
def __init__(self):
class WLAnalyzer(PerFieldAnalyzerWrapper):
def __init__(self):
@@
-147,7
+149,6
@@
class Snippets(object):
if not os.path.exists(self.path):
break
self.revision += 1
if not os.path.exists(self.path):
break
self.revision += 1
- print "using %s" % self.path
self.file = open(self.path, mode)
self.position = 0
self.file = open(self.path, mode)
self.position = 0
@@
-218,7
+219,7
@@
class BaseIndex(IndexStore):
try:
self.index.optimize()
except JavaError, je:
try:
self.index.optimize()
except JavaError, je:
- print "Error during optimize phase, check index: %s" % je
+ log.error("Error during optimize phase, check index: %s" % je)
self.index.close()
self.index = None
self.index.close()
self.index = None
@@
-277,9
+278,9
@@
class Index(BaseIndex):
if not remove_only:
# then add them [all or just one passed]
if not tags:
if not remove_only:
# then add them [all or just one passed]
if not tags:
- tags = c
atalogue.models.Tag.objects.exclude(category='set') +
\
- PDCounterAuthor.objects.all()
+
\
- PDCounterBook.objects.all()
+ tags = c
hain(catalogue.models.Tag.objects.exclude(category='set'),
\
+ PDCounterAuthor.objects.all()
,
\
+ PDCounterBook.objects.all()
)
for tag in tags:
if isinstance(tag, PDCounterAuthor):
for tag in tags:
if isinstance(tag, PDCounterAuthor):
@@
-492,8
+493,6
@@
class Index(BaseIndex):
.setIntValue('header_span' in fields and fields['header_span'] or 1))
doc.add(Field('header_type', fields["header_type"], Field.Store.YES, Field.Index.NOT_ANALYZED))
.setIntValue('header_span' in fields and fields['header_span'] or 1))
doc.add(Field('header_type', fields["header_type"], Field.Store.YES, Field.Index.NOT_ANALYZED))
- print ">>[%s]>%s<<<" % (fields.get('fragment_anchor', ''), fields['content'])
-
doc.add(Field('content', fields["content"], Field.Store.NO, Field.Index.ANALYZED, \
Field.TermVector.WITH_POSITIONS_OFFSETS))
doc.add(Field('content', fields["content"], Field.Store.NO, Field.Index.ANALYZED, \
Field.TermVector.WITH_POSITIONS_OFFSETS))
@@
-623,7
+622,7
@@
def log_exception_wrapper(f):
try:
f(*a)
except Exception, e:
try:
f(*a)
except Exception, e:
-
print
("Error in indexing thread: %s" % e)
+
log.error
("Error in indexing thread: %s" % e)
traceback.print_exc()
raise e
return _wrap
traceback.print_exc()
raise e
return _wrap
@@
-643,7
+642,6
@@
class ReusableIndex(Index):
if ReusableIndex.index:
self.index = ReusableIndex.index
else:
if ReusableIndex.index:
self.index = ReusableIndex.index
else:
- print("opening index")
Index.open(self, analyzer, **kw)
ReusableIndex.index = self.index
atexit.register(ReusableIndex.close_reusable)
Index.open(self, analyzer, **kw)
ReusableIndex.index = self.index
atexit.register(ReusableIndex.close_reusable)
@@
-655,7
+653,6
@@
class ReusableIndex(Index):
@staticmethod
def close_reusable():
if ReusableIndex.index:
@staticmethod
def close_reusable():
if ReusableIndex.index:
- print("closing index")
ReusableIndex.index.optimize()
ReusableIndex.index.close()
ReusableIndex.index = None
ReusableIndex.index.optimize()
ReusableIndex.index.close()
ReusableIndex.index = None
@@
-808,7
+805,7
@@
class SearchResult(object):
# remove fragments with duplicated fid's and duplicated snippets
frags = remove_duplicates(frags, lambda f: f[FRAGMENT], lambda a, b: cmp(a[SCORE], b[SCORE]))
# remove fragments with duplicated fid's and duplicated snippets
frags = remove_duplicates(frags, lambda f: f[FRAGMENT], lambda a, b: cmp(a[SCORE], b[SCORE]))
- frags = remove_duplicates(frags, lambda f: f[OTHER]['snippets'] and f[OTHER]['snippets'][0] or
hash(f)
,
+ frags = remove_duplicates(frags, lambda f: f[OTHER]['snippets'] and f[OTHER]['snippets'][0] or
f[FRAGMENT]
,
lambda a, b: cmp(a[SCORE], b[SCORE]))
# remove duplicate sections
lambda a, b: cmp(a[SCORE], b[SCORE]))
# remove duplicate sections
@@
-874,7
+871,6
@@
class SearchResult(object):
for r in rl:
if r.book_id in books:
books[r.book_id].merge(r)
for r in rl:
if r.book_id in books:
books[r.book_id].merge(r)
- #print(u"already have one with score %f, and this one has score %f" % (books[book.id][0], found.score))
else:
books[r.book_id] = r
return books.values()
else:
books[r.book_id] = r
return books.values()
@@
-1010,9
+1006,8
@@
class Search(IndexStore):
def reopen(self, **unused):
reader = self.searcher.getIndexReader()
rdr = reader.reopen()
def reopen(self, **unused):
reader = self.searcher.getIndexReader()
rdr = reader.reopen()
- print "got signal to reopen index"
if not rdr.equals(reader):
if not rdr.equals(reader):
- print "will reopen index"
+ log.debug('Reopening index')
oldsearch = self.searcher
self.searcher = IndexSearcher(rdr)
oldsearch.close()
oldsearch = self.searcher
self.searcher = IndexSearcher(rdr)
oldsearch.close()
@@
-1081,7
+1076,6
@@
class Search(IndexStore):
fuzzterms = []
while True:
fuzzterms = []
while True:
- # print("fuzz %s" % unicode(fuzzterm.term()).encode('utf-8'))
ft = fuzzterm.term()
if ft:
fuzzterms.append(ft)
ft = fuzzterm.term()
if ft:
fuzzterms.append(ft)
@@
-1252,7
+1246,6
@@
class Search(IndexStore):
topDocs = self.searcher.search(q, only_in, max_results)
for found in topDocs.scoreDocs:
books.append(SearchResult(self, found, how_found='search_everywhere_themesXcontent', searched=searched))
topDocs = self.searcher.search(q, only_in, max_results)
for found in topDocs.scoreDocs:
books.append(SearchResult(self, found, how_found='search_everywhere_themesXcontent', searched=searched))
- print "* %s theme x content: %s" % (searched, books[-1]._hits)
# query themes/content x author/title/tags
q = BooleanQuery()
# query themes/content x author/title/tags
q = BooleanQuery()
@@
-1271,7
+1264,6
@@
class Search(IndexStore):
topDocs = self.searcher.search(q, only_in, max_results)
for found in topDocs.scoreDocs:
books.append(SearchResult(self, found, how_found='search_everywhere', searched=searched))
topDocs = self.searcher.search(q, only_in, max_results)
for found in topDocs.scoreDocs:
books.append(SearchResult(self, found, how_found='search_everywhere', searched=searched))
- print "* %s scatter search: %s" % (searched, books[-1]._hits)
return books
return books
@@
-1332,9
+1324,17
@@
class Search(IndexStore):
return None
revision = stored.get('snippets_revision')
if revision: revision = int(revision)
return None
revision = stored.get('snippets_revision')
if revision: revision = int(revision)
+
# locate content.
book_id = int(stored.get('book_id'))
# locate content.
book_id = int(stored.get('book_id'))
- snippets = Snippets(book_id, revision=revision).open()
+ snippets = Snippets(book_id, revision=revision)
+
+ try:
+ snippets.open()
+ except IOError, e:
+ log.error("Cannot open snippet file for book id = %d [rev=%d], %s" % (book_id, revision, e))
+ return []
+
try:
try:
text = snippets.get((int(position),
try:
try:
text = snippets.get((int(position),
@@
-1371,13
+1371,13
@@
class Search(IndexStore):
if terms:
return JArray('object')(terms, Term)
if terms:
return JArray('object')(terms, Term)
- def search_tags(self, query, filter
s
=None, max_results=40, pdcounter=False):
+ def search_tags(self, query, filter=None, max_results=40, pdcounter=False):
"""
Search for Tag objects using query.
"""
if not pdcounter:
filters = self.chain_filters([filter, self.term_filter(Term('is_pdcounter', 'true'), inverse=True)])
"""
Search for Tag objects using query.
"""
if not pdcounter:
filters = self.chain_filters([filter, self.term_filter(Term('is_pdcounter', 'true'), inverse=True)])
- tops = self.searcher.search(query, filter
s
, max_results)
+ tops = self.searcher.search(query, filter, max_results)
tags = []
for found in tops.scoreDocs:
tags = []
for found in tops.scoreDocs:
@@
-1402,8
+1402,8
@@
class Search(IndexStore):
except PDCounterAuthor.DoesNotExist: pass
except PDCounterBook.DoesNotExist: pass
except PDCounterAuthor.DoesNotExist: pass
except PDCounterBook.DoesNotExist: pass
-
# print "%s (%d) -> %f" % (tag, tag.id, found.score
)
- print 'returning %s' % tags
+
log.debug('search_tags: %s' % tags
)
+
return tags
def search_books(self, query, filter=None, max_results=10):
return tags
def search_books(self, query, filter=None, max_results=10):