self.remove_book(book, remove_snippets=False)
book_doc = self.create_book_doc(book)
- meta_fields = self.extract_metadata(book, book_info)
+ meta_fields = self.extract_metadata(book, book_info, dc_only=['source_name', 'authors', 'title'])
+ # let's not index it - it's only used for extracting publish date
+ del meta_fields['source_name']
+
for f in meta_fields.values():
if isinstance(f, list) or isinstance(f, tuple):
for elem in f:
published_date_re = re.compile("([0-9]+)[\]. ]*$")
- def extract_metadata(self, book, book_info=None):
+ def extract_metadata(self, book, book_info=None, dc_only=None):
"""
Extract metadata from book and returns a map of fields keyed by fieldname
"""
# validator, name
for field in dcparser.BookInfo.FIELDS:
+ if dc_only and field.name not in dc_only:
+ continue
if hasattr(book_info, field.name):
if not getattr(book_info, field.name):
continue
from search import Search, JVM, SearchResult
from lucene import StringReader
from suggest.forms import PublishingSuggestForm
+from time import sleep
import re
import enchant
return query
+
JVM.attachCurrentThread()
-search = Search()
+_search = None
+
+
+def get_search():
+ global _search
+
+ while _search is False:
+ sleep(1)
+
+ if _search is None:
+ _search = False
+ _search = Search()
+ return _search
def hint(request):
return JSONResponse([])
JVM.attachCurrentThread()
+ search = get_search()
hint = search.hint()
try:
tags = request.GET.get('tags', '')
return render_to_response('catalogue/search_too_short.html', {'prefix': query},
context_instance=RequestContext(request))
+ search = get_search()
# hint.tags(tag_list)
# if book:
# hint.books(book)