just choosing themes is missing

author Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>

Fri, 24 Aug 2012 08:52:10 +0000 (10:52 +0200)

committer Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>

Fri, 24 Aug 2012 08:52:10 +0000 (10:52 +0200)
author Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Fri, 24 Aug 2012 08:52:10 +0000 (10:52 +0200)
committer Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
Fri, 24 Aug 2012 08:52:10 +0000 (10:52 +0200)
diff --git a/apps/catalogue/templates/catalogue/book_searched.html b/apps/catalogue/templates/catalogue/book_searched.html

index 1a345ed..a965bfc 100644 (file)
--- a/apps/catalogue/templates/catalogue/book_searched.html
+++ b/apps/catalogue/templates/catalogue/book_searched.html
@@ -7,8 +7,8 @@
  {% block right-column %}
  <div class="snippets">
    {% for hit in hits %}
-  {% if hit.snippets %}
-  <div class="snippet-text"><a href="{% url book_text book.slug %}#sec{{hit.section_number}}">{{hit.snippets.0|safe}}</a></div>
+  {% if hit.snippet %}
+  <div class="snippet-text"><a href="{% url book_text book.slug %}#sec{{hit.section_number}}">{{hit.snippet|safe}}</a></div>
    {% else %}
    {% if hit.fragment %}
    <div class="snippet-text">
diff --git a/apps/opds/views.py b/apps/opds/views.py

index 8fe0b8f..297c212 100644 (file)
--- a/apps/opds/views.py
+++ b/apps/opds/views.py
@@ -16,7 +16,7 @@ from django.contrib.sites.models import Site
  from basicauth import logged_in_or_basicauth, factory_decorator
  from catalogue.models import Book, Tag
  
-from search.views import get_search, SearchResult, JVM
+from search.views import Search, SearchResult
  from lucene import Term, QueryWrapperFilter, TermQuery
  
  import logging
@@ -92,7 +92,7 @@ class OPDSFeed(Atom1Feed):
                                  {u"href": reverse("opds_authors"),
                                   u"rel": u"start",
                                   u"type": u"application/atom+xml"})
-        handler.addQuickElement(u"link", None, 
+        handler.addQuickElement(u"link", None,
                                  {u"href": full_url(os.path.join(settings.STATIC_URL, "opensearch.xml")),
                                   u"rel": u"search",
                                   u"type": u"application/opensearchdescription+xml"})
@@ -329,7 +329,7 @@ class SearchFeed(AcquisitionFeed):
      title = u"Wyniki wyszukiwania"
  
      INLINE_QUERY_RE = re.compile(r"(author:(?P<author>[^ ]+)|title:(?P<title>[^ ]+)|categories:(?P<categories>[^ ]+)|description:(?P<description>[^ ]+))")
-    
+
      def get_object(self, request):
          """
          For OPDS 1.1 We should handle a query for search terms
@@ -337,7 +337,7 @@ class SearchFeed(AcquisitionFeed):
          OpenSearch defines fields: atom:author, atom:contributor (treated as translator),
          atom:title. Inline query provides author, title, categories (treated as book tags),
          description (treated as content search terms).
-        
+
          if search terms are provided, we shall search for books
          according to Hint information (from author & contributror & title).
  
@@ -345,10 +345,9 @@ class SearchFeed(AcquisitionFeed):
          (perhaps for is_book=True)
  
          """
-        JVM.attachCurrentThread()
  
          query = request.GET.get('q', '')
-        
+
          inline_criteria = re.findall(self.INLINE_QUERY_RE, query)
          if inline_criteria:
              def get_criteria(criteria, name, position):
@@ -374,13 +373,13 @@ class SearchFeed(AcquisitionFeed):
              translator = request.GET.get('translator', '')
  
              # Our client didn't handle the opds placeholders
-            if author == '{atom:author}': author = ''       
+            if author == '{atom:author}': author = ''
              if title == '{atom:title}': title = ''
              if translator == '{atom:contributor}': translator = ''
              categories = None
              fuzzy = False
  
-        srch = get_search()
+        srch = Search()
          hint = srch.hint()
  
          # Scenario 1: full search terms provided.
@@ -389,12 +388,12 @@ class SearchFeed(AcquisitionFeed):
              filters = []
  
              if author:
-                log.info( "narrow to author %s" % author)
-                hint.tags(srch.search_tags(srch.make_phrase(srch.get_tokens(author, field='authors'), field='authors'), 
+                log.info("narrow to author %s" % author)
+                hint.tags(srch.search_tags(srch.make_phrase(srch.get_tokens(author, field='authors'), field='authors'),
                                              filt=srch.term_filter(Term('tag_category', 'author'))))
  
              if translator:
-                log.info( "filter by translator %s" % translator)
+                log.info("filter by translator %s" % translator)
                  filters.append(QueryWrapperFilter(
                      srch.make_phrase(srch.get_tokens(translator, field='translators'),
                                       field='translators')))
@@ -406,13 +405,13 @@ class SearchFeed(AcquisitionFeed):
  
              flt = srch.chain_filters(filters)
              if title:
-                log.info( "hint by book title %s" % title)
+                log.info("hint by book title %s" % title)
                  q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title')
                  hint.books(*srch.search_books(q, filt=flt))
  
              toks = srch.get_tokens(query)
              log.info("tokens for query: %s" % toks)
-            
+
              results = SearchResult.aggregate(srch.search_perfect_book(toks, fuzzy=fuzzy, hint=hint),
                  srch.search_perfect_parts(toks, fuzzy=fuzzy, hint=hint),
                  srch.search_everywhere(toks, fuzzy=fuzzy, hint=hint))
diff --git a/apps/search/custom.py b/apps/search/custom.py

new file mode 100644 (file)

index 0000000..fcc3bac
--- /dev/null
+++ b/apps/search/custom.py
@@ -0,0 +1,156 @@
+
+from sunburnt import sunburnt
+from lxml import etree
+import urllib
+import warnings
+from sunburnt import search
+import copy
+
+
+class TermVectorOptions(search.Options):
+    option_name = "tv"
+
+    def __init__(self, schema, original=None):
+        self.schema = schema
+        if original is None:
+            self.fields = set()
+            self.positions = False
+        else:
+            self.fields = copy.copy(original.fields)
+            self.positions = copy.copy(original.positions)
+
+    def update(self, positions=False, fields=None):
+        if fields is None:
+            fields = []
+        if isinstance(fields, basestring):
+            fields = [fields]
+        self.schema.check_fields(fields, {"stored": True})
+        self.fields.update(fields)
+        self.positions = positions
+
+    def options(self):
+        opts = {}
+        opts['tv'] = 'true'
+        if self.positions:
+            opts['tv.positions'] = 'true'
+        if self.fields:
+            opts['tv.fl'] = ','.join(sorted(self.fields))
+        return opts
+
+
+class CustomSolrConnection(sunburnt.SolrConnection):
+    def __init__(self, *args, **kw):
+        super(CustomSolrConnection, self).__init__(*args, **kw)
+        self.analysis_url = self.url + "analysis/field/"
+
+    def analyze(self, params):
+        qs = urllib.urlencode(params)
+        url = "%s?%s" % (self.analysis_url, qs)
+        if len(url) > self.max_length_get_url:
+            warnings.warn("Long query URL encountered - POSTing instead of "
+                "GETting. This query will not be cached at the HTTP layer")
+            url = self.analysis_url
+            kwargs = dict(
+                method="POST",
+                body=qs,
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            )
+        else:
+            kwargs = dict(method="GET")
+        r, c = self.request(url, **kwargs)
+        if r.status != 200:
+            raise sunburnt.SolrError(r, c)
+        return c
+
+
+# monkey patching sunburnt SolrSearch
+search.SolrSearch.option_modules += ('term_vectorer',)
+
+
+def __term_vector(self, positions=False, fields=None):
+    newself = self.clone()
+    newself.term_vectorer.update(positions, fields)
+    return newself
+setattr(search.SolrSearch, 'term_vector', __term_vector)
+__original__init_common_modules = search.SolrSearch._init_common_modules
+
+
+def __patched__init_common_modules(self):
+    __original__init_common_modules(self)
+    self.term_vectorer = TermVectorOptions(self.schema)
+setattr(search.SolrSearch, '_init_common_modules', __patched__init_common_modules)
+
+
+class CustomSolrInterface(sunburnt.SolrInterface):
+    # just copied from parent and SolrConnection -> CustomSolrConnection
+    def __init__(self, url, schemadoc=None, http_connection=None, mode='', retry_timeout=-1, max_length_get_url=sunburnt.MAX_LENGTH_GET_URL):
+        self.conn = CustomSolrConnection(url, http_connection, retry_timeout, max_length_get_url)
+        self.schemadoc = schemadoc
+        if mode == 'r':
+            self.writeable = False
+        elif mode == 'w':
+            self.readable = False
+        self.init_schema()
+
+    def _analyze(self, **kwargs):
+        if not self.readable:
+            raise TypeError("This Solr instance is only for writing")
+        args = {
+            'analysis_showmatch': True
+            }
+        if 'field' in kwargs: args['analysis_fieldname'] = kwargs['field']
+        if 'text' in kwargs: args['analysis_fieldvalue'] = kwargs['text']
+        if 'q' in kwargs: args['q'] = kwargs['q']
+        if 'query' in kwargs: args['q'] = kwargs['q']
+
+        params = map(lambda (k, v): (k.replace('_', '.'), v), sunburnt.params_from_dict(**args))
+
+        content = self.conn.analyze(params)
+        doc = etree.fromstring(content)
+        return doc
+
+    def highlight(self, **kwargs):
+        doc = self._analyze(**kwargs)
+        analyzed = doc.xpath("//lst[@name='index']/arr[last()]/lst[bool/@name='match']")
+        matches = set()
+        for wrd in analyzed:
+            start = int(wrd.xpath("int[@name='start']")[0].text)
+            end = int(wrd.xpath("int[@name='end']")[0].text)
+            matches.add((start, end))
+
+        print matches
+        if matches:
+            return self.substring(kwargs['text'], matches,
+                            margins=kwargs.get('margins', 30),
+            mark=kwargs.get('mark', ("<b>", "</b>")))
+        else:
+            return None
+
+    def analyze(self, **kwargs):
+        doc = self._analyze(self, **kwargs)
+        terms = doc.xpath("/lst[@name='index']/arr[last()]/lst/str[1]")
+        terms = map(lambda n: unicode(n.text), terms)
+        return terms
+
+    def substring(self, text, matches, margins=30, mark=("<b>", "</b>")):
+        start = None
+        end = None
+        totlen = len(text)
+        matches_margins = map(lambda (s, e): (max(0, s - margins), min(totlen, e + margins)), matches)
+        (start, end) = matches_margins[0]
+
+        for (s, e) in matches_margins[1:]:
+            if end < s or start > e:
+                continue
+            start = min(start, s)
+            end = max(end, e)
+
+        snip = text[start:end]
+        matches = list(matches)
+        matches.sort(lambda a, b: cmp(b[0], a[0]))
+        for (s, e) in matches:
+            off = - start
+            snip = snip[:e + off] + mark[1] + snip[e + off:]
+            snip = snip[:s + off] + mark[0] + snip[s + off:]
+            # maybe break on word boundaries
+        return snip
diff --git a/apps/search/highlight.py b/apps/search/highlight.py

deleted file mode 100644 (file)

index 2fd9395..0000000
--- a/apps/search/highlight.py
+++ /dev/null
@@ -1,90 +0,0 @@
-
-from sunburnt import sunburnt
-from lxml import etree
-import urllib
-import warnings
-
-
-class HLSolrConnection(sunburnt.SolrConnection):
-    def __init__(self, *args, **kw):
-        super(HLSolrConnection, self).__init__(*args, **kw)
-        self.analysis_url = self.url + "analysis/field/"
-
-    def highlight(self, params):
-        qs = urllib.urlencode(params)
-        url = "%s?%s" % (self.analysis_url, qs)
-        if len(url) > self.max_length_get_url:
-            warnings.warn("Long query URL encountered - POSTing instead of "
-                "GETting. This query will not be cached at the HTTP layer")
-            url = self.analysis_url
-            kwargs = dict(
-                method="POST",
-                body=qs,
-                headers={"Content-Type": "application/x-www-form-urlencoded"},
-            )
-        else:
-            kwargs = dict(method="GET")
-        r, c = self.request(url, **kwargs)
-        if r.status != 200:
-            raise sunburnt.SolrError(r, c)
-        return c
-
-
-class HLSolrInterface(sunburnt.SolrInterface):
-    # just copied from parent and SolrConnection -> HLSolrConnection
-    def __init__(self, url, schemadoc=None, http_connection=None, mode='', retry_timeout=-1, max_length_get_url=sunburnt.MAX_LENGTH_GET_URL):
-        self.conn = HLSolrConnection(url, http_connection, retry_timeout, max_length_get_url)
-        self.schemadoc = schemadoc
-        if mode == 'r':
-            self.writeable = False
-        elif mode == 'w':
-            self.readable = False
-        self.init_schema()
-
-    def highlight(self, **kwargs):
-        if not self.readable:
-            raise TypeError("This Solr instance is only for writing")
-        args = {
-            'analysis_fieldname': kwargs['field'],
-            'analysis_showmatch': True,
-            'analysis_fieldvalue': kwargs['text'],
-            'q': kwargs['q']
-            }
-        params = map(lambda (k, v): (k.replace('_', '.'), v), sunburnt.params_from_dict(**args))
-
-        content = self.conn.highlight(params)
-        doc = etree.fromstring(content)
-        analyzed = doc.xpath("//lst[@name='index']/arr[last()]/lst[bool/@name='match']")
-        matches = set()
-        for wrd in analyzed:
-            start = int(wrd.xpath("int[@name='start']")[0].text)
-            end = int(wrd.xpath("int[@name='end']")[0].text)
-            matches.add((start, end))
-
-        print matches
-        return self.substring(kwargs['text'], matches,
-                            margins=kwargs.get('margins', 30),
-            mark=kwargs.get('mark', ("<b>", "</b>")))
-
-    def substring(self, text, matches, margins=30, mark=("<b>", "</b>")):
-        start = None
-        end = None
-        totlen = len(text)
-        matches_margins = map(lambda (s, e): (max(0, s - margins), min(totlen, e + margins)), matches)
-        (start, end) = matches_margins[0]
-
-        for (s, e) in matches_margins[1:]:
-            if end < s or start > e:
-                continue
-            start = min(start, s)
-            end = max(end, e)
-
-        snip = text[start:end]
-        matches = list(matches)
-        matches.sort(lambda a, b: cmp(b[0], a[0]))
-        for (s, e) in matches:
-            off = - start
-            snip = text[:e + off] + mark[1] + snip[e + off:]
-            snip = text[:s + off] + mark[0] + snip[s + off:]
-            # maybe break on word boundaries
-        return snip
diff --git a/apps/search/index.py b/apps/search/index.py

index e7f28c9..32cf5f9 100644 (file)
--- a/apps/search/index.py
+++ b/apps/search/index.py
@@ -15,12 +15,13 @@ import traceback
  import logging
  log = logging.getLogger('search')
  import sunburnt
-import highlight
+import custom
+import operator
  
  
  class SolrIndex(object):
      def __init__(self, mode=None):
-        self.index = highlight.HLSolrInterface(settings.SOLR, mode=mode)
+        self.index = custom.CustomSolrInterface(settings.SOLR, mode=mode)
  
  
  class Snippets(object):
@@ -130,7 +131,7 @@ class Index(SolrIndex):
                      break
                  for res in ids:
                      uids.add(res['uid'])
-                st+=rows
+                st += rows
                  #        print "Will delete %s" % ','.join([x for x in uids])
          if uids:
              self.index.delete(uids)
@@ -179,7 +180,8 @@ class Index(SolrIndex):
                          "tag_name": tag.name,
                          "tag_name_pl": tag.name,
                          "tag_category": 'pd_author',
-                        "is_pdcounter": True
+                        "is_pdcounter": True,
+                        "uid": "tag%d_pd_a" % tag.id
                          }
                  elif isinstance(tag, PDCounterBook):
                      doc = {
@@ -187,7 +189,8 @@ class Index(SolrIndex):
                          "tag_name": tag.title,
                          "tag_name_pl": tag.title,
                          "tag_category": 'pd_book',
-                        "is_pdcounter": True
+                        "is_pdcounter": True,
+                        "uid": "tag%d_pd_b" % tag.id
                          }
                  else:
                      doc = {
@@ -195,10 +198,11 @@ class Index(SolrIndex):
                          "tag_name": tag.name,
                          "tag_name_pl": tag.name,
                          "tag_category": tag.category,
-                        "is_pdcounter": False
+                        "is_pdcounter": False,
+                        "uid": "tag%d" % tag.id
                          }
-                doc['uid'] = "tag%d" % tag.id
                  self.index.add(doc)
+                print "%s %s" % (doc['tag_name'], doc['tag_category'])
  
      def create_book_doc(self, book):
          """
@@ -248,11 +252,15 @@ class Index(SolrIndex):
          book_doc['uid'] = "book%s" % book_doc['book_id']
          self.index.add(book_doc)
          del book_doc
-
-        self.index_content(book, book_fields={
+        book_fields = {
              'title': meta_fields['title'],
              'authors': meta_fields['authors'],
-            'published_date': meta_fields['published_date']})
+            'published_date': meta_fields['published_date']
+            }
+        if 'translators' in meta_fields:
+            book_fields['translators'] = meta_fields['translators']
+
+        self.index_content(book, book_fields=book_fields)
  
      master_tags = [
          'opowiadanie',
@@ -410,7 +418,7 @@ class Index(SolrIndex):
                  doc['themes'] = fields['themes']
              doc['uid'] = "part%s%s%s" % (doc['header_index'],
                                           doc['header_span'],
-                                         doc.get('fragment_anchor',''))
+                                         doc.get('fragment_anchor', ''))
              return doc
  
          def give_me_utf8(s):
@@ -509,47 +517,51 @@ class Index(SolrIndex):
              snippets.close()
  
  
-
  class SearchResult(object):
-    def __init__(self, search, doc, how_found=None, snippets=None, searched=None, tokens_cache=None):
-        if tokens_cache is None: tokens_cache = {}
+    def __init__(self, doc, how_found=None, query=None):
+        #        self.search = search
+        self.boost = 1.0
+        self._hits = []
+        self._processed_hits = None  # processed hits
+        self.snippets = []
  
          if 'score' in doc:
              self._score = doc['score']
          else:
              self._score = 0
  
-        self.boost = 1.0
-
-        self._hits = []
-        self._processed_hits = None  # processed hits
-
          self.book_id = int(doc["book_id"])
  
-        pd = doc["published_date"]
          try:
-            self.published_date = int(pd)
+            self.published_date = int(doc.get("published_date"))
          except ValueError:
              self.published_date = 0
  
+        # content hits
          header_type = doc.get("header_type", None)
          # we have a content hit in some header of fragment
          if header_type is not None:
              sec = (header_type, int(doc["header_index"]))
              header_span = doc['header_span']
              header_span = header_span is not None and int(header_span) or 1
-
              fragment = doc.get("fragment_anchor", None)
+            snippets_pos = (doc['snippets_position'], doc['snippets_length'])
+            snippets_rev = doc['snippets_revision']
  
-            if snippets:
-                snippets = snippets.replace("/\n", "\n")
-            hit = (sec + (header_span,), fragment, self._score, {'how_found': how_found, 'snippets': snippets and [snippets] or []})
+            hit = (sec + (header_span,), fragment, self._score, {
+                'how_found': how_found,
+                'snippets_pos': snippets_pos,
+                'snippets_revision': snippets_rev
+                })
  
              self._hits.append(hit)
  
-        self.search = search
-        self.searched = searched
-        self.tokens_cache = tokens_cache
+    def __unicode__(self):
+        return u"<SR id=%d %d(%d) hits score=%f %d snippets" % \
+            (self.book_id, len(self._hits), self._processed_hits and len(self._processed_hits) or -1, self._score, len(self.snippets))
+    
+    def __str__(self):
+        return unicode(self).encode('utf-8')
  
      @property
      def score(self):
@@ -566,31 +578,32 @@ class SearchResult(object):
      def get_book(self):
          if hasattr(self, '_book'):
              return self._book
-        return catalogue.models.Book.objects.get(id=self.book_id)
+        self._book = catalogue.models.Book.objects.get(id=self.book_id)
+        return self._book
  
      book = property(get_book)
  
+    POSITION = 0
+    FRAGMENT = 1
+    POSITION_INDEX = 1
+    POSITION_SPAN = 2
+    SCORE = 2
+    OTHER = 3
+
      @property
      def hits(self):
          if self._processed_hits is not None:
              return self._processed_hits
  
-        POSITION = 0
-        FRAGMENT = 1
-        POSITION_INDEX = 1
-        POSITION_SPAN = 2
-        SCORE = 2
-        OTHER = 3
-
          # to sections and fragments
-        frags = filter(lambda r: r[FRAGMENT] is not None, self._hits)
+        frags = filter(lambda r: r[self.FRAGMENT] is not None, self._hits)
  
-        sect = filter(lambda r: r[FRAGMENT] is None, self._hits)
+        sect = filter(lambda r: r[self.FRAGMENT] is None, self._hits)
  
          # sections not covered by fragments
          sect = filter(lambda s: 0 == len(filter(
-            lambda f: s[POSITION][POSITION_INDEX] >= f[POSITION][POSITION_INDEX]
-            and s[POSITION][POSITION_INDEX] < f[POSITION][POSITION_INDEX] + f[POSITION][POSITION_SPAN],
+            lambda f: s[self.POSITION][self.POSITION_INDEX] >= f[self.POSITION][self.POSITION_INDEX]
+            and s[self.POSITION][self.POSITION_INDEX] < f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN],
              frags)), sect)
  
          hits = []
@@ -606,31 +619,31 @@ class SearchResult(object):
              return els.values()
  
          # remove fragments with duplicated fid's and duplicated snippets
-        frags = remove_duplicates(frags, lambda f: f[FRAGMENT], lambda a, b: cmp(a[SCORE], b[SCORE]))
-        frags = remove_duplicates(frags, lambda f: f[OTHER]['snippets'] and f[OTHER]['snippets'][0] or f[FRAGMENT],
-                                  lambda a, b: cmp(a[SCORE], b[SCORE]))
+        frags = remove_duplicates(frags, lambda f: f[self.FRAGMENT], lambda a, b: cmp(a[self.SCORE], b[self.SCORE]))
+        # frags = remove_duplicates(frags, lambda f: f[OTHER]['snippet_pos'] and f[OTHER]['snippet_pos'] or f[FRAGMENT],
+        #                           lambda a, b: cmp(a[SCORE], b[SCORE]))
  
          # remove duplicate sections
          sections = {}
  
          for s in sect:
-            si = s[POSITION][POSITION_INDEX]
+            si = s[self.POSITION][self.POSITION_INDEX]
              # skip existing
              if si in sections:
-                if sections[si]['score'] >= s[SCORE]:
+                if sections[si]['score'] >= s[self.SCORE]:
                      continue
  
-            m = {'score': s[SCORE],
-                 'section_number': s[POSITION][POSITION_INDEX] + 1,
+            m = {'score': s[self.SCORE],
+                 'section_number': s[self.POSITION][self.POSITION_INDEX] + 1,
                   }
-            m.update(s[OTHER])
+            m.update(s[self.OTHER])
              sections[si] = m
  
          hits = sections.values()
  
          for f in frags:
              try:
-                frag = catalogue.models.Fragment.objects.get(anchor=f[FRAGMENT], book__id=self.book_id)
+                frag = catalogue.models.Fragment.objects.get(anchor=f[self.FRAGMENT], book__id=self.book_id)
              except catalogue.models.Fragment.DoesNotExist:
                  # stale index
                  continue
@@ -638,23 +651,23 @@ class SearchResult(object):
              # Figure out if we were searching for a token matching some word in theme name.
              themes = frag.tags.filter(category='theme')
              themes_hit = []
-            if self.searched is not None:
-                tokens = self.search.get_tokens(self.searched, 'POLISH', cached=self.tokens_cache)
-                for theme in themes:
-                    name_tokens = self.search.get_tokens(theme.name, 'POLISH')
-                    for t in tokens:
-                        if t in name_tokens:
-                            if not theme in themes_hit:
-                                themes_hit.append(theme)
-                            break
-
-            m = {'score': f[SCORE],
+            # if self.searched is not None:
+            #     tokens = self.search.get_tokens(self.searched, 'POLISH', cached=self.tokens_cache)
+            #     for theme in themes:
+            #         name_tokens = self.search.get_tokens(theme.name, 'POLISH')
+            #         for t in tokens:
+            #             if t in name_tokens:
+            #                 if not theme in themes_hit:
+            #                     themes_hit.append(theme)
+            #                 break
+
+            m = {'score': f[self.SCORE],
                   'fragment': frag,
-                 'section_number': f[POSITION][POSITION_INDEX] + 1,
+                 'section_number': f[self.POSITION][self.POSITION_INDEX] + 1,
                   'themes': themes,
                   'themes_hit': themes_hit
                   }
-            m.update(f[OTHER])
+            m.update(f[self.OTHER])
              hits.append(m)
  
          hits.sort(lambda a, b: cmp(a['score'], b['score']), reverse=True)
@@ -663,9 +676,6 @@ class SearchResult(object):
  
          return hits
  
-    def __unicode__(self):
-        return u'SearchResult(book_id=%d, score=%d)' % (self.book_id, self.score)
-
      @staticmethod
      def aggregate(*result_lists):
          books = {}
@@ -685,511 +695,307 @@ class SearchResult(object):
          else:
              return c
  
+    def __len__(self):
+        return len(self.hits)
  
-class Hint(object):
-    """
-    Given some hint information (information we already know about)
-    our search target - like author, title (specific book), epoch, genre, kind
-    we can narrow down search using filters.
-    """
-    def __init__(self, search):
-        """
-        Accepts a Searcher instance.
-        """
-        self.search = search
-        self.book_tags = {}
-        self.part_tags = []
-        self._books = []
+    def snippet_pos(self, idx=0):
+        return self.hits[idx]['snippets_pos']
  
-    def books(self, *books):
-        """
-        Give a hint that we search these books.
-        """
-        self._books = books
-
-    def tags(self, tags):
-        """
-        Give a hint that these Tag objects (a list of)
-        is necessary.
-        """
-        for t in tags:
-            if t.category in ['author', 'title', 'epoch', 'genre', 'kind']:
-                lst = self.book_tags.get(t.category, [])
-                lst.append(t)
-                self.book_tags[t.category] = lst
-            if t.category in ['theme', 'theme_pl']:
-                self.part_tags.append(t)
-
-    def tag_filter(self, tags, field='tags'):
-        """
-        Given a lsit of tags and an optional field (but they are normally in tags field)
-        returns a filter accepting only books with specific tags.
-        """
-        q = BooleanQuery()
-
-        for tag in tags:
-            toks = self.search.get_tokens(tag.name, field=field)
-            tag_phrase = PhraseQuery()
-            for tok in toks:
-                tag_phrase.add(Term(field, tok))
-            q.add(BooleanClause(tag_phrase, BooleanClause.Occur.MUST))
-
-        return QueryWrapperFilter(q)
-
-    def book_filter(self):
-        """
-        Filters using book tags (all tag kinds except a theme)
-        """
-        tags = reduce(lambda a, b: a + b, self.book_tags.values(), [])
-        if tags:
-            return self.tag_filter(tags)
-        else:
+    def snippet_revision(self, idx=0):
+        try:
+            return self.hits[idx]['snippets_revision']
+        except:
              return None
  
-    def part_filter(self):
-        """
-        This filter can be used to look for book parts.
-        It filters on book id and/or themes.
-        """
-        fs = []
-        if self.part_tags:
-            fs.append(self.tag_filter(self.part_tags, field='themes'))
-
-        if self._books != []:
-            bf = BooleanFilter()
-            for b in self._books:
-                id_filter = NumericRangeFilter.newIntRange('book_id', b.id, b.id, True, True)
-                bf.add(FilterClause(id_filter, BooleanClause.Occur.SHOULD))
-            fs.append(bf)
-
-        return Search.chain_filters(fs)
-
-    def should_search_for_book(self):
-        return self._books == []
-
-    def just_search_in(self, all):
-        """Holds logic to figure out which indexes should be search, when we have some hinst already"""
-        some = []
-        for field in all:
-            if field == 'authors' and 'author' in self.book_tags:
-                continue
-            if field == 'title' and self._books != []:
-                continue
-            if (field == 'themes' or field == 'themes_pl') and self.part_tags:
-                continue
-            some.append(field)
-        return some
-
  
  class Search(SolrIndex):
      """
      Search facilities.
      """
      def __init__(self, default_field="text"):
-        IndexStore.__init__(self)
-        self.analyzer = WLAnalyzer()  # PolishAnalyzer(Version.LUCENE_34)
-        # self.analyzer = WLAnalyzer()
-        reader = IndexReader.open(self.store, True)
-        self.searcher = IndexSearcher(reader)
-        self.parser = QueryParser(Version.LUCENE_34, default_field,
-                                  self.analyzer)
-
-        self.parent_filter = TermsFilter()
-        self.parent_filter.addTerm(Term("is_book", "true"))
-        index_changed.connect(self.reopen)
-
-    def close(self):
-        reader = self.searcher.getIndexReader()
-        self.searcher.close()
-        reader.close()
-        super(Search, self).close()
-        index_changed.disconnect(self.reopen)
-
-    def reopen(self, **unused):
-        reader = self.searcher.getIndexReader()
-        rdr = reader.reopen()
-        if not rdr.equals(reader):
-            log.debug('Reopening index')
-            oldsearch = self.searcher
-            self.searcher = IndexSearcher(rdr)
-            oldsearch.close()
-            reader.close()
-
-    def query(self, query):
-        """Parse query in default Lucene Syntax. (for humans)
-        """
-        return self.parser.parse(query)
-
-    def simple_search(self, query, max_results=50):
-        """Runs a query for books using lucene syntax. (for humans)
-        Returns (books, total_hits)
-        """
-
-        tops = self.searcher.search(self.query(query), max_results)
-        bks = []
-        for found in tops.scoreDocs:
-            doc = self.searcher.doc(found.doc)
-            bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id")))
-        return (bks, tops.totalHits)
-
-    def get_tokens(self, searched, field='text', cached=None):
-        """returns tokens analyzed by a proper (for a field) analyzer
-        argument can be: StringReader, string/unicode, or tokens. In the last case
-        they will just be returned (so we can reuse tokens, if we don't change the analyzer)
-        """
-        if cached is not None and field in cached:
-            return cached[field]
+        super(Search, self).__init__()
  
-        if isinstance(searched, str) or isinstance(searched, unicode):
-            searched = StringReader(searched)
-        elif isinstance(searched, list):
-            return searched
-
-        searched.reset()
-        tokens = self.analyzer.reusableTokenStream(field, searched)
-        toks = []
-        while tokens.incrementToken():
-            cta = tokens.getAttribute(CharTermAttribute.class_)
-            toks.append(cta.toString())
-
-        if cached is not None:
-            cached[field] = toks
-
-        return toks
-
-    @staticmethod
-    def fuzziness(fuzzy):
-        """Helper method to sanitize fuzziness"""
-        if not fuzzy:
-            return None
-        if isinstance(fuzzy, float) and fuzzy > 0.0 and fuzzy <= 1.0:
-            return fuzzy
-        else:
-            return 0.5
-
-    def make_phrase(self, tokens, field='text', slop=2, fuzzy=False):
-        """
-        Return a PhraseQuery with a series of tokens.
-        """
-        if fuzzy:
-            phrase = MultiPhraseQuery()
-            for t in tokens:
-                term = Term(field, t)
-                fuzzterm = FuzzyTermEnum(self.searcher.getIndexReader(), term, self.fuzziness(fuzzy))
-                fuzzterms = []
-
-                while True:
-                    ft = fuzzterm.term()
-                    if ft:
-                        fuzzterms.append(ft)
-                    if not fuzzterm.next(): break
-                if fuzzterms:
-                    phrase.add(JArray('object')(fuzzterms, Term))
-                else:
-                    phrase.add(term)
-        else:
-            phrase = PhraseQuery()
-            phrase.setSlop(slop)
-            for t in tokens:
-                term = Term(field, t)
-                phrase.add(term)
-        return phrase
-
-    @staticmethod
-    def make_term_query(tokens, field='text', modal='BooleanClause.Occur.SHOULD XXX', fuzzy=False):
+    # def get_tokens(self, searched, field='text', cached=None):
+    #     """returns tokens analyzed by a proper (for a field) analyzer
+    #     argument can be: StringReader, string/unicode, or tokens. In the last case
+    #     they will just be returned (so we can reuse tokens, if we don't change the analyzer)
+    #     """
+    #     if cached is not None and field in cached:
+    #         return cached[field]
+
+    #     if isinstance(searched, str) or isinstance(searched, unicode):
+    #         searched = StringReader(searched)
+    #     elif isinstance(searched, list):
+    #         return searched
+
+    #     searched.reset()
+    #     tokens = self.analyzer.reusableTokenStream(field, searched)
+    #     toks = []
+    #     while tokens.incrementToken():
+    #         cta = tokens.getAttribute(CharTermAttribute.class_)
+    #         toks.append(cta.toString())
+
+    #     if cached is not None:
+    #         cached[field] = toks
+
+    #     return toks
+
+    # @staticmethod
+    # def fuzziness(fuzzy):
+    #     """Helper method to sanitize fuzziness"""
+    #     if not fuzzy:
+    #         return None
+    #     if isinstance(fuzzy, float) and fuzzy > 0.0 and fuzzy <= 1.0:
+    #         return fuzzy
+    #     else:
+    #         return 0.5
+
+    # def make_phrase(self, tokens, field='text', slop=2, fuzzy=False):
+    #     """
+    #     Return a PhraseQuery with a series of tokens.
+    #     """
+    #     if fuzzy:
+    #         phrase = MultiPhraseQuery()
+    #         for t in tokens:
+    #             term = Term(field, t)
+    #             fuzzterm = FuzzyTermEnum(self.searcher.getIndexReader(), term, self.fuzziness(fuzzy))
+    #             fuzzterms = []
+
+    #             while True:
+    #                 ft = fuzzterm.term()
+    #                 if ft:
+    #                     fuzzterms.append(ft)
+    #                 if not fuzzterm.next(): break
+    #             if fuzzterms:
+    #                 phrase.add(JArray('object')(fuzzterms, Term))
+    #             else:
+    #                 phrase.add(term)
+    #     else:
+    #         phrase = PhraseQuery()
+    #         phrase.setSlop(slop)
+    #         for t in tokens:
+    #             term = Term(field, t)
+    #             phrase.add(term)
+    #     return phrase
+
+    def make_term_query(self, query, field='text', modal=operator.or_):
          """
          Returns term queries joined by boolean query.
          modal - applies to boolean query
          fuzzy - should the query by fuzzy.
          """
-        q = BooleanQuery()
-        for t in tokens:
-            term = Term(field, t)
-            if fuzzy:
-                term = FuzzyQuery(term, self.fuzziness(fuzzy))
-            else:
-                term = TermQuery(term)
-            q.add(BooleanClause(term, modal))
+        q = self.index.Q()
+        q = reduce(modal, map(lambda s: self.index.Q(**{field: s}),
+                        query.split(r" ")), q)
+
          return q
  
-    def search_phrase(self, searched, field, book=True, max_results=20, fuzzy=False,
-                      filters=None, tokens_cache=None, boost=None, snippets=False, slop=2):
+    def search_phrase(self, searched, field='text', book=False,
+                      filters=None,
+                      snippets=False):
          if filters is None: filters = []
-        if tokens_cache is None: tokens_cache = {}
-
-        tokens = self.get_tokens(searched, field, cached=tokens_cache)
-
-        query = self.make_phrase(tokens, field=field, fuzzy=fuzzy, slop=slop)
-        if book:
-            filters.append(self.term_filter(Term('is_book', 'true')))
-        top = self.searcher.search(query, self.chain_filters(filters), max_results)
+        if book: filters.append(self.index.Q(is_book=True))
  
-        return [SearchResult(self, found, snippets=(snippets and self.get_snippets(found, query) or None), searched=searched) for found in top.scoreDocs]
+        q = self.index.query(**{field: searched})
+        q = self.apply_filters(q, filters).field_limit(score=True, all_fields=True)
+        res = q.execute()
+        return [SearchResult(found, how_found=u'search_phrase') for found in res]
  
-    def search_some(self, searched, fields, book=True, max_results=20, fuzzy=False,
-                    filters=None, tokens_cache=None, boost=None, snippets=True):
+    def search_some(self, searched, fields, book=True,
+                    filters=None,
+                    snippets=True):
+        assert isinstance(fields, list)
          if filters is None: filters = []
-        if tokens_cache is None: tokens_cache = {}
+        if book: filters.append(self.index.Q(is_book=True))
  
-        if book:
-            filters.append(self.term_filter(Term('is_book', 'true')))
-
-        query = BooleanQuery()
+        query = self.index.Q()
  
          for fld in fields:
-            tokens = self.get_tokens(searched, fld, cached=tokens_cache)
-
-            query.add(BooleanClause(self.make_term_query(tokens, field=fld,
-                                fuzzy=fuzzy), BooleanClause.Occur.SHOULD))
-
-        top = self.searcher.search(query, self.chain_filters(filters), max_results)
-
-        return [SearchResult(self, found, searched=searched, tokens_cache=tokens_cache,
-                             snippets=(snippets and self.get_snippets(found, query) or None)) for found in top.scoreDocs]
-
-    def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
-        """
-        Search for perfect book matches. Just see if the query matches with some author or title,
-        taking hints into account.
-        """
-        fields_to_search = ['authors', 'title']
-        only_in = None
-        if hint:
-            if not hint.should_search_for_book():
-                return []
-            fields_to_search = hint.just_search_in(fields_to_search)
-            only_in = hint.book_filter()
-
-        qrys = [self.make_phrase(self.get_tokens(searched, field=fld), field=fld, fuzzy=fuzzy) for fld in fields_to_search]
-
-        books = []
-        for q in qrys:
-            top = self.searcher.search(q,
-                self.chain_filters([only_in, self.term_filter(Term('is_book', 'true'))]),
-                max_results)
-            for found in top.scoreDocs:
-                books.append(SearchResult(self, found, how_found="search_perfect_book"))
-        return books
-
-    def search_book(self, searched, max_results=20, fuzzy=False, hint=None):
-        fields_to_search = ['tags', 'authors', 'title']
-
-        only_in = None
-        if hint:
-            if not hint.should_search_for_book():
-                return []
-            fields_to_search = hint.just_search_in(fields_to_search)
-            only_in = hint.book_filter()
-
-        tokens = self.get_tokens(searched, field='SIMPLE')
-
-        q = BooleanQuery()
-
-        for fld in fields_to_search:
-            q.add(BooleanClause(self.make_term_query(tokens, field=fld,
-                                fuzzy=fuzzy), BooleanClause.Occur.SHOULD))
+            query = self.index.Q(query | self.make_term_query(searched, fld))
  
-        books = []
-        top = self.searcher.search(q,
-                                   self.chain_filters([only_in, self.term_filter(Term('is_book', 'true'))]),
-            max_results)
-        for found in top.scoreDocs:
-            books.append(SearchResult(self, found, how_found="search_book"))
-
-        return books
+        query = self.index.query(query)
+        query = self.apply_filters(query, filters).field_limit(score=True, all_fields=True)
+        res = query.execute()
+        return [SearchResult(found, how_found='search_some') for found in res]
  
-    def search_perfect_parts(self, searched, max_results=20, fuzzy=False, hint=None):
-        """
-        Search for book parts which contains a phrase perfectly matching (with a slop of 2, default for make_phrase())
-        some part/fragment of the book.
-        """
-        qrys = [self.make_phrase(self.get_tokens(searched), field=fld, fuzzy=fuzzy) for fld in ['text']]
+    # def search_perfect_book(self, searched, max_results=20, fuzzy=False, hint=None):
+    #     """
+    #     Search for perfect book matches. Just see if the query matches with some author or title,
+    #     taking hints into account.
+    #     """
+    #     fields_to_search = ['authors', 'title']
+    #     only_in = None
+    #     if hint:
+    #         if not hint.should_search_for_book():
+    #             return []
+    #         fields_to_search = hint.just_search_in(fields_to_search)
+    #         only_in = hint.book_filter()
+
+    #     qrys = [self.make_phrase(self.get_tokens(searched, field=fld), field=fld, fuzzy=fuzzy) for fld in fields_to_search]
+
+    #     books = []
+    #     for q in qrys:
+    #         top = self.searcher.search(q,
+    #             self.chain_filters([only_in, self.term_filter(Term('is_book', 'true'))]),
+    #             max_results)
+    #         for found in top.scoreDocs:
+    #             books.append(SearchResult(self, found, how_found="search_perfect_book"))
+    #     return books
+
+    # def search_book(self, searched, max_results=20, fuzzy=False, hint=None):
+    #     fields_to_search = ['tags', 'authors', 'title']
+
+    #     only_in = None
+    #     if hint:
+    #         if not hint.should_search_for_book():
+    #             return []
+    #         fields_to_search = hint.just_search_in(fields_to_search)
+    #         only_in = hint.book_filter()
+
+    #     tokens = self.get_tokens(searched, field='SIMPLE')
+
+    #     q = BooleanQuery()
+
+    #     for fld in fields_to_search:
+    #         q.add(BooleanClause(self.make_term_query(tokens, field=fld,
+    #                             fuzzy=fuzzy), BooleanClause.Occur.SHOULD))
+
+    #     books = []
+    #     top = self.searcher.search(q,
+    #                                self.chain_filters([only_in, self.term_filter(Term('is_book', 'true'))]),
+    #         max_results)
+    #     for found in top.scoreDocs:
+    #         books.append(SearchResult(self, found, how_found="search_book"))
+
+    #     return books
+
+    # def search_perfect_parts(self, searched, max_results=20, fuzzy=False, hint=None):
+    #     """
+    #     Search for book parts which contains a phrase perfectly matching (with a slop of 2, default for make_phrase())
+    #     some part/fragment of the book.
+    #     """
+    #     qrys = [self.make_phrase(self.get_tokens(searched), field=fld, fuzzy=fuzzy) for fld in ['text']]
  
-        flt = None
-        if hint:
-            flt = hint.part_filter()
+    #     flt = None
+    #     if hint:
+    #         flt = hint.part_filter()
  
-        books = []
-        for q in qrys:
-            top = self.searcher.search(q,
-                                       self.chain_filters([self.term_filter(Term('is_book', 'true'), inverse=True),
-                                                           flt]),
-                                       max_results)
-            for found in top.scoreDocs:
-                books.append(SearchResult(self, found, snippets=self.get_snippets(found, q), how_found='search_perfect_parts'))
+    #     books = []
+    #     for q in qrys:
+    #         top = self.searcher.search(q,
+    #                                    self.chain_filters([self.term_filter(Term('is_book', 'true'), inverse=True),
+    #                                                        flt]),
+    #                                    max_results)
+    #         for found in top.scoreDocs:
+    #             books.append(SearchResult(self, found, snippets=self.get_snippets(found, q), how_found='search_perfect_parts'))
  
-        return books
+    #     return books
  
-    def search_everywhere(self, searched, max_results=20, fuzzy=False, hint=None, tokens_cache=None):
+    def search_everywhere(self, searched):
          """
          Tries to use search terms to match different fields of book (or its parts).
          E.g. one word can be an author survey, another be a part of the title, and the rest
          are some words from third chapter.
          """
-        if tokens_cache is None: tokens_cache = {}
          books = []
-        only_in = None
-
-        if hint:
-            only_in = hint.part_filter()
-
          # content only query : themes x content
-        q = BooleanQuery()
-
-        tokens_pl = self.get_tokens(searched, field='text', cached=tokens_cache)
-        tokens = self.get_tokens(searched, field='SIMPLE', cached=tokens_cache)
  
-        # only search in themes when we do not already filter by themes
-        if hint is None or hint.just_search_in(['themes']) != []:
-            q.add(BooleanClause(self.make_term_query(tokens_pl, field='themes_pl',
-                                                     fuzzy=fuzzy), BooleanClause.Occur.MUST))
+        q = self.make_term_query(searched, 'text')
+        q_themes = self.make_term_query(searched, 'themes_pl')
  
-        q.add(BooleanClause(self.make_term_query(tokens_pl, field='text',
-                                                 fuzzy=fuzzy), BooleanClause.Occur.SHOULD))
+        query = self.index.query(q).query(q_themes).field_limit(score=True, all_fields=True)
+        res = query.execute()
  
-        topDocs = self.searcher.search(q, only_in, max_results)
-        for found in topDocs.scoreDocs:
-            books.append(SearchResult(self, found, how_found='search_everywhere_themesXcontent', searched=searched))
+        for found in res:
+            books.append(SearchResult(found, how_found='search_everywhere_themesXcontent'))
  
          # query themes/content x author/title/tags
-        q = BooleanQuery()
-        in_content = BooleanQuery()
-        in_meta = BooleanQuery()
+        in_content = self.index.Q()
+        in_meta = self.index.Q()
  
          for fld in ['themes_pl', 'text']:
-            in_content.add(BooleanClause(self.make_term_query(tokens_pl, field=fld, fuzzy=False), BooleanClause.Occur.SHOULD))
+            in_content |= self.make_term_query(searched, field=fld)
  
          for fld in ['tags', 'authors', 'title']:
-            in_meta.add(BooleanClause(self.make_term_query(tokens, field=fld, fuzzy=False), BooleanClause.Occur.SHOULD))
-
-        q.add(BooleanClause(in_content, BooleanClause.Occur.MUST))
-        q.add(BooleanClause(in_meta, BooleanClause.Occur.SHOULD))
+            in_meta |= self.make_term_query(searched, field=fld)
  
-        topDocs = self.searcher.search(q, only_in, max_results)
-        for found in topDocs.scoreDocs:
-            books.append(SearchResult(self, found, how_found='search_everywhere', searched=searched))
+        q = in_content & in_meta
+        res = self.index.query(q).field_limit(score=True, all_fields=True).execute()
+        for found in res:
+            books.append(SearchResult(found, how_found='search_everywhere'))
  
          return books
  
-    # def multisearch(self, query, max_results=50):
-    #     """
-    #     Search strategy:
-    #     - (phrase) OR -> content
-    #                   -> title
-    #                   -> authors
-    #     - (keywords)  -> authors
-    #                   -> motyw
-    #                   -> tags
-    #                   -> content
-    #     """
-        # queryreader = StringReader(query)
-        # tokens = self.get_tokens(queryreader)
-
-        # top_level = BooleanQuery()
-        # Should = BooleanClause.Occur.SHOULD
-
-        # phrase_level = BooleanQuery()
-        # phrase_level.setBoost(1.3)
-
-        # p_content = self.make_phrase(tokens, joined=True)
-        # p_title = self.make_phrase(tokens, 'title')
-        # p_author = self.make_phrase(tokens, 'author')
-
-        # phrase_level.add(BooleanClause(p_content, Should))
-        # phrase_level.add(BooleanClause(p_title, Should))
-        # phrase_level.add(BooleanClause(p_author, Should))
-
-        # kw_level = BooleanQuery()
-
-        # kw_level.add(self.make_term_query(tokens, 'author'), Should)
-        # j_themes = self.make_term_query(tokens, 'themes', joined=True)
-        # kw_level.add(j_themes, Should)
-        # kw_level.add(self.make_term_query(tokens, 'tags'), Should)
-        # j_con = self.make_term_query(tokens, joined=True)
-        # kw_level.add(j_con, Should)
-
-        # top_level.add(BooleanClause(phrase_level, Should))
-        # top_level.add(BooleanClause(kw_level, Should))
-
-        # return None
-
-    def get_snippets(self, scoreDoc, query, field='text'):
+    def get_snippets(self, searchresult, query, field='text', num=1):
          """
          Returns a snippet for found scoreDoc.
          """
-        htmlFormatter = SimpleHTMLFormatter()
-        highlighter = Highlighter(htmlFormatter, QueryScorer(query))
-
-        stored = self.searcher.doc(scoreDoc.doc)
-
-        position = stored.get('snippets_position')
-        length = stored.get('snippets_length')
-        if position is None or length is None:
-            return None
-        revision = stored.get('snippets_revision')
-        if revision: revision = int(revision)
-
-        # locate content.
-        book_id = int(stored.get('book_id'))
+        maxnum = len(searchresult)
+        if num is None or num < 0 or num > maxnum:
+            num = maxnum
+        book_id = searchresult.book_id
+        revision = searchresult.snippet_revision()
          snippets = Snippets(book_id, revision=revision)
-
+        snips = [None] * maxnum
          try:
              snippets.open()
+            idx = 0
+            while idx < maxnum and num > 0:
+                position, length = searchresult.snippet_pos(idx)
+                if position is None or length is None:
+                    continue
+                text = snippets.get((int(position),
+                                     int(length)))
+                print "== %s -- %s ==" % (query, text)
+                snip = self.index.highlight(text=text, field=field, q=query)
+                snips[idx] = snip
+                if snip:
+                    num -= 1
+                idx += 1
+
          except IOError, e:
              log.error("Cannot open snippet file for book id = %d [rev=%d], %s" % (book_id, revision, e))
              return []
+        finally:
+            snippets.close()
  
-        try:
-            try:
-                text = snippets.get((int(position),
-                                     int(length)))
-            finally:
-                snippets.close()
-
-            tokenStream = TokenSources.getAnyTokenStream(self.searcher.getIndexReader(), scoreDoc.doc, field, self.analyzer)
-            #  highlighter.getBestTextFragments(tokenStream, text, False, 10)
-            snip = highlighter.getBestFragments(tokenStream, text, 3, "...")
+            # remove verse end markers..
+        snips = map(lambda s: s and s.replace("/\n", "\n"), snips)
  
-        except Exception, e:
-            e2 = e
-            if hasattr(e, 'getJavaException'):
-                e2 = unicode(e.getJavaException())
-            raise Exception("Problem fetching snippets for book %d, @%d len=%d" % (book_id, int(position), int(length)),
-                e2)
-        return snip
+        searchresult.snippets = snips
+        return snips
  
-    @staticmethod
-    def enum_to_array(enum):
+    def hint_tags(self, query, pdcounter=True, prefix=True):
          """
-        Converts a lucene TermEnum to array of Terms, suitable for
-        addition to queries
+        Return auto-complete hints for tags
+        using prefix search.
          """
-        terms = []
-
-        while True:
-            t = enum.term()
-            if t:
-                terms.append(t)
-            if not enum.next(): break
+        q = self.index.Q()
+        query = query.strip()
+        for field in ['tag_name', 'tag_name_pl']:
+            if prefix:
+                q |= self.index.Q(**{field: query + "*"})
+            else:
+                q |= self.make_term_query(query, field=field)
+        qu = self.index.query(q).exclude(tag_category="book")
  
-        if terms:
-            return JArray('object')(terms, Term)
+        return self.search_tags(qu, pdcounter=pdcounter)
  
-    def search_tags(self, query, filt=None, max_results=40, pdcounter=False):
+    def search_tags(self, query, filters=None, pdcounter=False):
          """
          Search for Tag objects using query.
          """
+        if not filters: filters = []
          if not pdcounter:
-            filters = self.chain_filters([filt, self.term_filter(Term('is_pdcounter', 'true'), inverse=True)])
-        tops = self.searcher.search(query, filt, max_results)
+            filters.append(~self.index.Q(is_pdcounter=True))
+        res = self.apply_filters(query, filters).execute()
  
          tags = []
-        for found in tops.scoreDocs:
-            doc = self.searcher.doc(found.doc)
-            is_pdcounter = doc.get('is_pdcounter')
+        for doc in res:
+            is_pdcounter = doc.get('is_pdcounter', False)
              category = doc.get('tag_category')
              try:
-                if is_pdcounter == 'true':
+                if is_pdcounter == True:
                      if category == 'pd_author':
                          tag = PDCounterAuthor.objects.get(id=doc.get('tag_id'))
                      elif category == 'pd_book':
@@ -1211,98 +1017,81 @@ class Search(SolrIndex):
  
          return tags
  
-    def search_books(self, query, filt=None, max_results=10):
+    def hint_books(self, query, prefix=True):
+        """
+        Returns auto-complete hints for book titles
+        Because we do not index 'pseudo' title-tags.
+        Prefix search.
+        """
+        q = self.index.Q()
+        query = query.strip()
+        if prefix:
+            q |= self.index.Q(title=query + "*")
+        else:
+            q |= self.make_term_query(query, field='title')
+        qu = self.index.query(q)
+        only_books = self.index.Q(is_book=True)
+        return self.search_books(qu, [only_books])
+
+    def search_books(self, query, filters=None, max_results=10):
          """
          Searches for Book objects using query
          """
          bks = []
-        tops = self.searcher.search(query, filt, max_results)
-        for found in tops.scoreDocs:
-            doc = self.searcher.doc(found.doc)
+        res = self.apply_filters(query, filters).field_limit(['book_id'])
+        for r in res:
              try:
-                bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id")))
+                bks.append(catalogue.models.Book.objects.get(id=r['book_id']))
              except catalogue.models.Book.DoesNotExist: pass
          return bks
+ 
+    # def make_prefix_phrase(self, toks, field):
+    #     q = MultiPhraseQuery()
+    #     for i in range(len(toks)):
+    #         t = Term(field, toks[i])
+    #         if i == len(toks) - 1:
+    #             pterms = Search.enum_to_array(PrefixTermEnum(self.searcher.getIndexReader(), t))
+    #             if pterms:
+    #                 q.add(pterms)
+    #             else:
+    #                 q.add(t)
+    #         else:
+    #             q.add(t)
+    #     return q
+
+    # @staticmethod
+    # def term_filter(term, inverse=False):
+    #     only_term = TermsFilter()
+    #     only_term.addTerm(term)
+
+    #     if inverse:
+    #         neg = BooleanFilter()
+    #         neg.add(FilterClause(only_term, BooleanClause.Occur.MUST_NOT))
+    #         only_term = neg
+
+    #     return only_term
  
-    def make_prefix_phrase(self, toks, field):
-        q = MultiPhraseQuery()
-        for i in range(len(toks)):
-            t = Term(field, toks[i])
-            if i == len(toks) - 1:
-                pterms = Search.enum_to_array(PrefixTermEnum(self.searcher.getIndexReader(), t))
-                if pterms:
-                    q.add(pterms)
-                else:
-                    q.add(t)
-            else:
-                q.add(t)
-        return q
-
-    @staticmethod
-    def term_filter(term, inverse=False):
-        only_term = TermsFilter()
-        only_term.addTerm(term)
-
-        if inverse:
-            neg = BooleanFilter()
-            neg.add(FilterClause(only_term, BooleanClause.Occur.MUST_NOT))
-            only_term = neg
  
-        return only_term
-
-    def hint_tags(self, string, max_results=50, pdcounter=True, prefix=True, fuzzy=False):
-        """
-        Return auto-complete hints for tags
-        using prefix search.
-        """
-        toks = self.get_tokens(string, field='SIMPLE')
-        top = BooleanQuery()
-
-        for field in ['tag_name', 'tag_name_pl']:
-            if prefix:
-                q = self.make_prefix_phrase(toks, field)
-            else:
-                q = self.make_term_query(toks, field, fuzzy=fuzzy)
-            top.add(BooleanClause(q, BooleanClause.Occur.SHOULD))
-
-        no_book_cat = self.term_filter(Term("tag_category", "book"), inverse=True)
-
-        return self.search_tags(top, no_book_cat, max_results=max_results, pdcounter=pdcounter)
-
-    def hint_books(self, string, max_results=50, prefix=True, fuzzy=False):
-        """
-        Returns auto-complete hints for book titles
-        Because we do not index 'pseudo' title-tags.
-        Prefix search.
-        """
-        toks = self.get_tokens(string, field='SIMPLE')
-
-        if prefix:
-            q = self.make_prefix_phrase(toks, 'title')
-        else:
-            q = self.make_term_query(toks, 'title', fuzzy=fuzzy)
-
-        return self.search_books(q, self.term_filter(Term("is_book", "true")), max_results=max_results)
  
      @staticmethod
-    def chain_filters(filters, op='XXXChainedFilter.AND'):
+    def apply_filters(query, filters):
          """
-        Chains a filter list together
+        Apply filters to a query
          """
+        if filters is None: filters = []
          filters = filter(lambda x: x is not None, filters)
-        if not filters or filters is []:
-            return None
-        chf = ChainedFilter(JArray('object')(filters, Filter), op)
-        return chf
+        for f in filters:
+            query = query.query(f)
+        return query
  
-    def filtered_categories(self, tags):
-        """
-        Return a list of tag categories, present in tags list.
-        """
-        cats = {}
-        for t in tags:
-            cats[t.category] = True
-        return cats.keys()
+    # def filtered_categories(self, tags):
+    #     """
+    #     Return a list of tag categories, present in tags list.
+    #     """
+    #     cats = {}
+    #     for t in tags:
+    #         cats[t.category] = True
+    #     return cats.keys()
  
-    def hint(self):
-        return Hint(self)
+    # def hint(self):
+    #     return Hint(self)
diff --git a/apps/search/templatetags/search_tags.py b/apps/search/templatetags/search_tags.py

index 97deb9d..b19593e 100644 (file)
--- a/apps/search/templatetags/search_tags.py
+++ b/apps/search/templatetags/search_tags.py
@@ -35,18 +35,25 @@ def book_searched(context, result):
  
      # We don't need hits which lead to sections but do not have
      # snippets.
-    hits = filter(lambda h: 'fragment' in h or
-                  h['snippets'], result.hits)[0:5]
-
-    for hit in hits:
-        hit['snippets'] = map(lambda s: s.replace("\n", "<br />").replace('---', '&mdash;'), hit['snippets'])
+    hits = filter(lambda (idx, h):
+                  result.snippets[idx] is not None
+                  or 'fragment' in h, enumerate(result.hits))
+    print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits))
+
+    for (idx, hit) in hits:
+        # currently we generate one snipper per hit though.
+        if 'fragment' in hit:
+            continue
+        snip = result.snippets[idx]
+        # fix some formattting
+        snip = snip.replace("\n", "<br />").replace('---', '&mdash;')
+        hit['snippet'] = snip
  
      return {
          'related': book.related_info(),
          'book': book,
          'main_link': book.get_absolute_url(),
          'request': context.get('request'),
-        'hits': hits,
+        'hits': zip(*hits)[1],
          'main_link': book.get_absolute_url(),
      }
-
diff --git a/apps/search/views.py b/apps/search/views.py

index ec8275b..11ce8e8 100644 (file)
--- a/apps/search/views.py
+++ b/apps/search/views.py
@@ -12,7 +12,7 @@ from catalogue.utils import split_tags
  from catalogue.models import Book, Tag, Fragment
  from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
  from catalogue.views import JSONResponse
-from search import Search, JVM, SearchResult
+from search import Search, SearchResult
  from lucene import StringReader
  from suggest.forms import PublishingSuggestForm
  from time import sleep
@@ -20,8 +20,6 @@ import re
  #import enchant
  import json
  
-#dictionary = enchant.Dict('en_US')
-
  
  def match_word_re(word):
      if 'sqlite' in settings.DATABASES['default']['ENGINE']:
@@ -31,60 +29,37 @@ def match_word_re(word):
  
  
  def did_you_mean(query, tokens):
-    change = {}
-    for t in tokens:
-        authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
-        if len(authors) > 0:
-            continue
-
-        if False:
-            if not dictionary.check(t):
-                try:
-                    change_to = dictionary.suggest(t)[0].lower()
-                    if change_to != t.lower():
-                        change[t] = change_to
-                except IndexError:
-                    pass
-
-    if change == {}:
-        return None
-
-    for frm, to in change.items():
-        query = query.replace(frm, to)
-
      return query
+    # change = {}
+    # for t in tokens:
+    #     authors = Tag.objects.filter(category='author', name__iregex=match_word_re(t))
+    #     if len(authors) > 0:
+    #         continue
  
+    #     if False:
+    #         if not dictionary.check(t):
+    #             try:
+    #                 change_to = dictionary.suggest(t)[0].lower()
+    #                 if change_to != t.lower():
+    #                     change[t] = change_to
+    #             except IndexError:
+    #                 pass
  
-JVM.attachCurrentThread()
-_search = None
-
+    # if change == {}:
+    #     return None
  
-def get_search():
-    global _search
+    # for frm, to in change.items():
+    #     query = query.replace(frm, to)
  
-    while _search is False:
-        sleep(1)
-
-    if _search is None:
-        _search = False
-        _search = Search()
-    return _search
+    # return query
  
  
  def hint(request):
      prefix = request.GET.get('term', '')
      if len(prefix) < 2:
          return JSONResponse([])
-    JVM.attachCurrentThread()
-
-    search = get_search()
-    hint = search.hint()
-    try:
-        tags = request.GET.get('tags', '')
-        hint.tags(Tag.get_tag_list(tags))
-    except:
-        pass
  
+    search = Search()
      # tagi beda ograniczac tutaj
      # ale tagi moga byc na ksiazce i na fragmentach
      # jezeli tagi dot tylko ksiazki, to wazne zeby te nowe byly w tej samej ksiazce
@@ -93,7 +68,6 @@ def hint(request):
      tags = search.hint_tags(prefix, pdcounter=True)
      books = search.hint_books(prefix)
  
-    
      def is_dupe(tag):
          if isinstance(tag, PDCounterAuthor):
              if filter(lambda t: t.slug == tag.slug and t != tag, tags):
@@ -126,64 +100,48 @@ def hint(request):
                              content_type="application/json; charset=utf-8")
      else:
          return JSONResponse(data)
-            
  
  
  def main(request):
      results = {}
-    JVM.attachCurrentThread()  # where to put this?
  
      results = None
      query = None
-    fuzzy = False #0.8
  
-    query = request.GET.get('q','')
-    # book_id = request.GET.get('book', None)
-    # book = None
-    # if book_id is not None:
-    #     book = get_object_or_404(Book, id=book_id)
-
-    # hint = search.hint()
-    # try:
-    #     tag_list = Tag.get_tag_list(tags)
-    # except:
-    #     tag_list = []
+    query = request.GET.get('q', '')
  
      if len(query) < 2:
-        return render_to_response('catalogue/search_too_short.html', {'prefix': query},
-                                  context_instance=RequestContext(request))
-
-    search = get_search()
-    # hint.tags(tag_list)
-    # if book:
-    #     hint.books(book)
-    tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy)
-    tags = split_tags(tags)
+        return render_to_response('catalogue/search_too_short.html',
+                                  {'prefix': query},
+            context_instance=RequestContext(request))
+    search = Search()
  
-    toks = StringReader(query)
-    tokens_cache = {}
+            # change hints
+    tags = search.hint_tags(query, pdcounter=True, prefix=False)
+    tags = split_tags(tags)
  
-    author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache)
-    title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache)
+    author_results = search.search_phrase(query, 'authors', book=True)
+    title_results = search.search_phrase(query, 'title', book=True)
  
      # Boost main author/title results with mixed search, and save some of its results for end of list.
      # boost author, title results
-    author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache)
+    author_title_mixed = search.search_some(query, ['authors', 'title', 'tags'])
      author_title_rest = []
+
      for b in author_title_mixed:
-        bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
-        for b2 in bks:
+        also_in_mixed = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results)
+        for b2 in also_in_mixed:
              b2.boost *= 1.1
-        if bks is []:
+        if also_in_mixed is []:
              author_title_rest.append(b)
  
      # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere,
      # Because the query is using only one field.
      text_phrase = SearchResult.aggregate(
-        search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4),
-        search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False))
+        search.search_phrase(query, 'text', snippets=True, book=False),
+        search.search_some(query, ['text'], snippets=True, book=False))
  
-    everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache)
+    everywhere = search.search_everywhere(query)
  
      def already_found(results):
          def f(e):
@@ -202,15 +160,23 @@ def main(request):
  
      everywhere = SearchResult.aggregate(everywhere, author_title_rest)
  
-    for res in [author_results, title_results, text_phrase, everywhere]:
+    for field, res in [('authors', author_results),
+                       ('title', title_results),
+                       ('text', text_phrase),
+                       ('text', everywhere)]:
          res.sort(reverse=True)
+        print "get snips %s, res size %d" % (field, len(res))
          for r in res:
-            for h in r.hits:
-                h['snippets'] = map(lambda s:
-                                    re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
-                                            re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
+            print "Get snippets for %s" % r
+            search.get_snippets(r, query, field, 3)
+        # for r in res:
+        #     for h in r.hits:
+        #         h['snippets'] = map(lambda s:
+        #                             re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"",
+        #                                     re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets'])
  
-    suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
+    # suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE"))
+    suggestion = u''
  
      def ensure_exists(r):
          try:
@@ -246,9 +212,9 @@ def main(request):
      return render_to_response('catalogue/search_multiple_hits.html',
                                {'tags': tags,
                                 'prefix': query,
-                               'results': { 'author': author_results,
-                                            'title': title_results,
-                                            'content': text_phrase,
-                                            'other': everywhere},
+                               'results': {'author': author_results,
+                                           'title': title_results,
+                                           'content': text_phrase,
+                                           'other': everywhere},
                                 'did_you_mean': suggestion},
          context_instance=RequestContext(request))
diff --git a/doc/schema.xml b/doc/schema.xml

new file mode 100644 (file)

index 0000000..510a961
--- /dev/null
+++ b/doc/schema.xml
@@ -0,0 +1,1150 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--  
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default) 
+ or located where the classloader for the Solr webapp can find it.
+
+ This example schema is the recommended starting point for users.
+ It should be kept correct and concise, usable out-of-the-box.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+
+ PERFORMANCE NOTE: this schema includes many optional features and should not
+ be used for benchmarking.  To improve performance one could
+  - set stored="false" for all fields possible (esp large fields) when you
+    only need to search on the field but don't need to return the original
+    value.
+  - set indexed="false" if you don't need to search on the field, but only
+    return the field as a result of searching on other indexed fields.
+  - remove all unneeded copyField statements
+  - for best index size and searching performance, set "index" to false
+    for all general text fields, use copyField to copy them to the
+    catchall "text" field, and use that for searching.
+  - For maximum indexing performance, use the StreamingUpdateSolrServer
+    java client.
+  - Remember to run the JVM in server mode, and use a higher logging level
+    that avoids logging every request
+-->
+
+<schema name="example" version="1.5">
+  <!-- attribute "name" is the name of this schema and is only used for display purposes.
+       version="x.y" is Solr's version number for the schema syntax and semantics.  It should
+       not normally be changed by applications.
+       1.0: multiValued attribute did not exist, all fields are multiValued by nature
+       1.1: multiValued attribute introduced, false by default 
+       1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
+       1.3: removed optional field compress feature
+       1.4: default auto-phrase (QueryParser feature) to off
+       1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...)
+     -->
+
+ <fields>
+   <!-- Valid attributes for fields:
+     name: mandatory - the name for the field
+     type: mandatory - the name of a field type from the 
+       <types> fieldType section
+     indexed: true if this field should be indexed (searchable or sortable)
+     stored: true if this field should be retrievable
+     multiValued: true if this field may contain multiple values per document
+     omitNorms: (expert) set to true to omit the norms associated with
+       this field (this disables length normalization and index-time
+       boosting for the field, and saves some memory).  Only full-text
+       fields or fields that need an index-time boost need norms.
+       Norms are omitted for primitive (non-analyzed) types by default.
+     termVectors: [false] set to true to store the term vector for a
+       given field.
+       When using MoreLikeThis, fields used for similarity should be
+       stored for best performance.
+     termPositions: Store position information with the term vector.  
+       This will increase storage costs.
+     termOffsets: Store offset information with the term vector. This 
+       will increase storage costs.
+     required: The field is required.  It will throw an error if the
+       value does not exist
+     default: a value that should be used if no value is specified
+       when adding a document.
+   -->
+
+   <!-- field names should consist of alphanumeric or underscore characters only and
+      not start with a digit.  This is not currently strictly enforced,
+      but other field names will not have first class support from all components
+      and back compatibility is not guaranteed.  Names with both leading and
+      trailing underscores (e.g. _version_) are reserved.
+   -->
+        
+   <!-- <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />  -->
+   <!-- <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/> -->
+   <!-- <field name="name" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/> -->
+   <!-- <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/> -->
+   <!-- <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/> -->
+   <!-- <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> -->
+
+   <!-- <field name="weight" type="float" indexed="true" stored="true"/> -->
+   <!-- <field name="price"  type="float" indexed="true" stored="true"/> -->
+   <!-- <field name="popularity" type="int" indexed="true" stored="true" /> -->
+   <!-- <field name="inStock" type="boolean" indexed="true" stored="true" /> -->
+
+   <!-- <field name="store" type="location" indexed="true" stored="true"/> -->
+
+   <!-- Common metadata fields, named specifically to match up with
+     SolrCell metadata when parsing rich documents such as Word, PDF.
+     Some fields are multiValued only because Tika currently may return
+     multiple values for them. Some metadata is parsed from the documents,
+     but there are some which come from the client context:
+       "content_type": From the HTTP headers of incoming stream
+       "resourcename": From SolrCell request param resource.name
+   -->
+   <!-- <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> -->
+   <!-- <field name="subject" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="description" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="comments" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="author" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="keywords" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="category" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="resourcename" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="url" type="text_general" indexed="true" stored="true"/> -->
+   <!-- <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> -->
+   <!-- <field name="last_modified" type="date" indexed="true" stored="true"/> -->
+   <!-- <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> -->
+
+   <field name="book_id" type="int" indexed="true" stored="true" />
+   <field name="parent_id" type="int" indexed="false" stored="true" />
+   <field name="slug" type="text_general" stored="false" indexed="true" omitNorms="true"/> <!-- no norms -->
+   <field name="tags" type="lowercase" stored="false" indexed="true" multiValued="true"/>
+   <field name="is_book" type="boolean" stored="false" indexed="true"/>
+   <field name="authors" type="text_general" stored="false" indexed="true" multiValued="true"/>
+   <field name="translators" type="text_general" stored="false" indexed="true" multiValued="true"/>
+   <field name="title" type="text_pl" stored="false" indexed="true"/>
+   <field name="title_orig" type="text_general" stored="false" indexed="true"/>
+<!--   <field name="published_date" type="tdate" stored="false" indexed="true"/>-->
+   <field name="published_date" type="string" stored="true" indexed="true"/>
+
+   <field name="themes" type="lowercase" stored="true" intexed="true" termVectors="true" termPositions="true" multiValued="true" />
+   <field name="themes_pl" type="text_pl" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
+   <field name="header_index" type="int" stored="true" indexed="true"/>
+   <field name="header_span" type="int" stored="true" indexed="true"/>
+   <field name="header_type" type="lowercase" stored="true" indexed="false"/>
+   <field name="text" type="text_pl" stored="false" indexed="true" termPositions="true" />
+
+   <field name="snippets_position" type="int" stored="true" indexed="false"/>
+   <field name="snippets_length" type="int" stored="true" indexed="false"/>
+   <field name="snippets_revision" type="int" stored="true" indexed="false"/>
+   <field name="fragment_anchor" type="string" stored="true" indexed="false"/>
+
+   <field name="tag_id" type="int" stored="true" indexed="true"/>
+   <field name="tag_name" type="lowercase" stored="true" intexed="true" />
+   <field name="tag_name_pl" type="text_pl" stored="false" indexed="true" multiValued="true"/>
+   <field name="tag_category" type="string" stored="true" indexed="true" />
+   <field name="is_pdcounter" type="boolean" stored="true" indexed="true" />
+
+   <!-- Main body of document extracted by SolrCell.
+        NOTE: This field is not indexed by default, since it is also copied to "text"
+        using copyField below. This is to save space. Use this field for returning and
+        highlighting document content. Use the "text" field to search the content. -->
+   <!-- <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/> -->
+   
+
+   <!-- catchall field, containing all other searchable text fields (implemented
+        via copyField further on in this schema  -->
+   <!-- <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> -->
+
+   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
+        leading wildcard queries. -->
+   <!-- <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/> -->
+
+   <!-- non-tokenized version of manufacturer to make it easier to sort or group
+        results by manufacturer.  copied from "manu" via copyField -->
+   <!-- <field name="manu_exact" type="string" indexed="true" stored="false"/> -->
+
+   <!-- <field name="payloads" type="payloads" indexed="true" stored="true"/> -->
+
+   <!-- <field name="_version_" type="long" indexed="true" stored="true"/> -->
+
+   <!-- Uncommenting the following will create a "timestamp" field using
+        a default value of "NOW" to indicate when each document was indexed.
+     -->
+   <!--
+   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
+     -->
+
+   <!-- Dynamic field definitions allow using convention over configuration
+       for fields via the specification of patterns to match field names. 
+       EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
+       RESTRICTION: the glob-like pattern in the name attribute must have
+       a "*" only at the start or the end.  -->
+   
+   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
+   <dynamicField name="*_is" type="int"    indexed="true"  stored="true"  multiValued="true"/>
+   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true" />
+   <dynamicField name="*_ss" type="string"  indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
+   <dynamicField name="*_ls" type="long"   indexed="true"  stored="true"  multiValued="true"/>
+   <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/>
+   <dynamicField name="*_txt" type="text_general"   indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="*_en"  type="text_en"    indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="*_b"  type="boolean" indexed="true" stored="true"/>
+   <dynamicField name="*_bs" type="boolean" indexed="true" stored="true"  multiValued="true"/>
+   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
+   <dynamicField name="*_fs" type="float"  indexed="true"  stored="true"  multiValued="true"/>
+   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
+   <dynamicField name="*_ds" type="double" indexed="true"  stored="true"  multiValued="true"/>
+
+   <!-- Type used to index the lat and lon components for the "location" FieldType -->
+   <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false" />
+
+   <dynamicField name="*_dt"  type="date"    indexed="true"  stored="true"/>
+   <dynamicField name="*_dts" type="date"    indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="*_p"  type="location" indexed="true" stored="true"/>
+
+   <!-- some trie-coded dynamic fields for faster range queries -->
+   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
+   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
+   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
+   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
+   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
+
+   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
+<!--   <dynamicField name="*_c"   type="currency" indexed="true"  stored="true"/>-->
+
+   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+   <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
+
+   <dynamicField name="random_*" type="random" />
+
+   <!-- uncomment the following to ignore any fields that don't already match an existing 
+        field name or dynamic field, rather than reporting them as an error. 
+        alternately, change the type="ignored" to some other type e.g. "text" if you want 
+        unknown fields indexed and/or stored by default --> 
+   <!--dynamicField name="*" type="ignored" multiValued="true" /-->
+    <field name="uid" type="string" indexed="true" stored="true"/>
+ </fields>
+
+
+ <!-- Field to use to determine and enforce document uniqueness. 
+      Unless this field is marked with required="false", it will be a required field
+   -->
+ <uniqueKey>uid</uniqueKey>
+
+ <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
+  parsing a query string that isn't explicit about the field.  Machine (non-user)
+  generated queries are best made explicit, or they can use the "df" request parameter
+  which takes precedence over this.
+  Note: Un-commenting defaultSearchField will be insufficient if your request handler
+  in solrconfig.xml defines "df", which takes precedence. That would need to be removed.
+ <defaultSearchField>text</defaultSearchField> -->
+
+ <!-- DEPRECATED: The defaultOperator (AND|OR) is consulted by various query parsers
+  when parsing a query string to determine if a clause of the query should be marked as
+  required or optional, assuming the clause isn't already marked by some operator.
+  The default is OR, which is generally assumed so it is not a good idea to change it
+  globally here.  The "q.op" request parameter takes precedence over this.
+ <solrQueryParser defaultOperator="OR"/> -->
+
+  <!-- copyField commands copy one field to another at the time a document
+        is added to the index.  It's used either to index the same field differently,
+        or to add multiple fields to the same field for easier/faster searching.  -->
+
+  <copyField source="themes" dest="themes_pl"/>
+  <copyField source="tag_name" dest="tag_name_pl"/>
+
+<!--
+   <copyField source="cat" dest="text"/>
+   <copyField source="name" dest="text"/>
+   <copyField source="manu" dest="text"/>
+   <copyField source="features" dest="text"/>
+   <copyField source="includes" dest="text"/>
+   <copyField source="manu" dest="manu_exact"/>
+-->
+   <!-- Copy the price into a currency enabled field (default USD) -->
+<!--   <copyField source="price" dest="price_c"/>-->
+
+   <!-- Text fields from SolrCell to search by default in our catch-all field -->
+<!--   <copyField source="title" dest="text"/>
+   <copyField source="author" dest="text"/>
+   <copyField source="description" dest="text"/>
+   <copyField source="keywords" dest="text"/>
+   <copyField source="content" dest="text"/>
+   <copyField source="content_type" dest="text"/>
+   <copyField source="resourcename" dest="text"/>
+   <copyField source="url" dest="text"/>-->
+
+   <!-- Create a string version of author for faceting -->
+<!--   <copyField source="author" dest="author_s"/>-->
+       
+   <!-- Above, multiple source fields are copied to the [text] field. 
+         Another way to map multiple source fields to the same 
+         destination field is to use the dynamic field syntax. 
+         copyField also supports a maxChars to copy setting.  -->
+          
+   <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
+
+   <!-- copy name to alphaNameSort, a field designed for sorting by name -->
+   <!-- <copyField source="name" dest="alphaNameSort"/> -->
+ 
+  <types>
+    <!-- field type definitions. The "name" attribute is
+       just a label to be used by field definitions.  The "class"
+       attribute and any other attributes determine the real
+       behavior of the fieldType.
+         Class names starting with "solr" refer to java classes in a
+       standard package such as org.apache.solr.analysis
+    -->
+
+    <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
+
+    <!-- boolean type: "true" or "false" -->
+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+
+    <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
+         currently supported on types that are sorted internally as strings
+         and on numeric types.
+            This includes "string","boolean", and, as of 3.5 (and 4.x),
+            int, float, long, date, double, including the "Trie" variants.
+       - If sortMissingLast="true", then a sort on this field will cause documents
+         without the field to come after documents with the field,
+         regardless of the requested sort order (asc or desc).
+       - If sortMissingFirst="true", then a sort on this field will cause documents
+         without the field to come before documents with the field,
+         regardless of the requested sort order.
+       - If sortMissingLast="false" and sortMissingFirst="false" (the default),
+         then default lucene sorting will be used which places docs without the
+         field first in an ascending sort and last in a descending sort.
+    -->    
+
+    <!--
+      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
+    -->
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
+
+    <!--
+     Numeric field types that index each value at various levels of precision
+     to accelerate range queries when the number of values between the range
+     endpoints is large. See the javadoc for NumericRangeQuery for internal
+     implementation details.
+
+     Smaller precisionStep values (specified in bits) will lead to more tokens
+     indexed per value, slightly larger index size, and faster range queries.
+     A precisionStep of 0 disables indexing at different precision levels.
+    -->
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
+
+    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
+         is a more restricted form of the canonical representation of dateTime
+         http://www.w3.org/TR/xmlschema-2/#dateTime    
+         The trailing "Z" designates UTC time and is mandatory.
+         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
+         All other components are mandatory.
+
+         Expressions can also be used to denote calculations that should be
+         performed relative to "NOW" to determine the value, ie...
+
+               NOW/HOUR
+                  ... Round to the start of the current hour
+               NOW-1DAY
+                  ... Exactly 1 day prior to now
+               NOW/DAY+6MONTHS+3DAYS
+                  ... 6 months and 3 days in the future from the start of
+                      the current day
+                      
+         Consult the DateField javadocs for more information.
+
+         Note: For faster range queries, consider the tdate type
+      -->
+    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
+
+    <!-- A Trie based date field for faster date range queries and date faceting. -->
+    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
+
+
+    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
+    <fieldtype name="binary" class="solr.BinaryField"/>
+
+    <!--
+      Note:
+      These should only be used for compatibility with existing indexes (created with lucene or older Solr versions).
+      Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last
+      
+      Plain numeric field types that store and index the text
+      value verbatim (and hence don't correctly support range queries, since the
+      lexicographic ordering isn't equal to the numeric ordering)
+    -->
+    <fieldType name="pint" class="solr.IntField"/>
+    <fieldType name="plong" class="solr.LongField"/>
+    <fieldType name="pfloat" class="solr.FloatField"/>
+    <fieldType name="pdouble" class="solr.DoubleField"/>
+    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
+
+    <!-- The "RandomSortField" is not used to store or search any
+         data.  You can declare fields of this type it in your schema
+         to generate pseudo-random orderings of your docs for sorting 
+         or function purposes.  The ordering is generated based on the field
+         name and the version of the index. As long as the index version
+         remains unchanged, and the same field name is reused,
+         the ordering of the docs will be consistent.  
+         If you want different psuedo-random orderings of documents,
+         for the same version of the index, use a dynamicField and
+         change the field name in the request.
+     -->
+    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
+
+    <!-- solr.TextField allows the specification of custom text analyzers
+         specified as a tokenizer and a list of token filters. Different
+         analyzers may be specified for indexing and querying.
+
+         The optional positionIncrementGap puts space between multiple fields of
+         this type on the same document, with the purpose of preventing false phrase
+         matching across fields.
+
+         For more info on customizing your analyzer chain, please see
+         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+     -->
+
+    <!-- One can also specify an existing Analyzer class that has a
+         default constructor via the class attribute on the analyzer element.
+         Example:
+    <fieldType name="text_greek" class="solr.TextField">
+      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
+    </fieldType>
+    -->
+
+    <fieldType name="uuid" class="solr.UUIDField" indexed="true" />
+
+
+    <!-- A text field that only splits on whitespace for exact matching of words -->
+    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A general text field that has reasonable, generic
+         cross-language defaults: it tokenizes with StandardTokenizer,
+        removes stop words from case-insensitive "stopwords.txt"
+        (empty by default), and down cases.  At query time only, it
+        also applies synonyms. -->
+    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English: it
+         tokenizes with StandardTokenizer, removes English stop words
+         (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
+         finally applies Porter's stemming.  The query time analyzer
+         also applies synonyms from synonyms.txt. -->
+    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+          add enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="lang/stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+       <filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+       <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+       -->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="lang/stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+       <filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+       <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+       -->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English, plus
+        aggressive word-splitting and autophrase features enabled.
+        This field is just like text_en, except it adds
+        WordDelimiterFilter to enable splitting and matching of
+        words on case-change, alpha numeric boundaries, and
+        non-alphanumeric chars.  This means certain compound word
+        cases will work, for example query "wi fi" will match
+        document "WiFi" or "wi-fi".
+        -->
+    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+          add enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="lang/stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="lang/stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
+         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
+    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
+             possible with WordDelimiterFilter in conjuncton with stemming. -->
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Just like text_general except it reverses the characters of
+        each token, to enable more efficient leading wildcard queries. -->
+    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- charFilter + WhitespaceTokenizer  -->
+    <!--
+    <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
+      <analyzer>
+        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+    -->
+
+    <!-- This is an example of using the KeywordTokenizer along
+         With various TokenFilterFactories to produce a sortable field
+         that does not include some properties of the source text
+      -->
+    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+      <analyzer>
+        <!-- KeywordTokenizer does no actual tokenizing, so the entire
+             input string is preserved as a single token
+          -->
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <!-- The LowerCase TokenFilter does what you expect, which can be
+             when you want your sorting to be case insensitive
+          -->
+        <filter class="solr.LowerCaseFilterFactory" />
+        <!-- The TrimFilter removes any leading or trailing whitespace -->
+        <filter class="solr.TrimFilterFactory" />
+        <!-- The PatternReplaceFilter gives you the flexibility to use
+             Java Regular expression to replace any sequence of characters
+             matching a pattern with an arbitrary replacement string, 
+             which may include back references to portions of the original
+             string matched by the pattern.
+             
+             See the Java Regular Expression documentation for more
+             information on pattern and replacement string syntax.
+             
+             http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
+          -->
+        <filter class="solr.PatternReplaceFilterFactory"
+                pattern="([^a-z])" replacement="" replace="all"
+        />
+      </analyzer>
+    </fieldType>
+    
+    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!--
+        The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
+        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
+        Attributes of the DelimitedPayloadTokenFilterFactory : 
+         "delimiter" - a one character delimiter. Default is | (pipe)
+        "encoder" - how to encode the following value into a playload
+           float -> org.apache.lucene.analysis.payloads.FloatEncoder,
+           integer -> o.a.l.a.p.IntegerEncoder
+           identity -> o.a.l.a.p.IdentityEncoder
+            Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
+         -->
+        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- lowercases the entire field value, keeping it as a single token.  -->
+    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+    <!-- 
+      Example of using PathHierarchyTokenizerFactory at index time, so
+      queries for paths match documents at that path, or in descendent paths
+    -->
+    <fieldType name="descendent_path" class="solr.TextField">
+      <analyzer type="index">
+       <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+      </analyzer>
+      <analyzer type="query">
+       <tokenizer class="solr.KeywordTokenizerFactory" />
+      </analyzer>
+    </fieldType>
+    <!-- 
+      Example of using PathHierarchyTokenizerFactory at query time, so
+      queries for paths match documents at that path, or in ancestor paths
+    -->
+    <fieldType name="ancestor_path" class="solr.TextField">
+      <analyzer type="index">
+       <tokenizer class="solr.KeywordTokenizerFactory" />
+      </analyzer>
+      <analyzer type="query">
+       <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+      </analyzer>
+    </fieldType>
+
+    <!-- since fields of this type are by default not stored or indexed,
+         any data added to them will be ignored outright.  --> 
+    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
+
+    <!-- This point type indexes the coordinates as separate fields (subFields)
+      If subFieldType is defined, it references a type, and a dynamic field
+      definition is created matching *___<typename>.  Alternately, if 
+      subFieldSuffix is defined, that is used to create the subFields.
+      Example: if subFieldType="double", then the coordinates would be
+        indexed in fields myloc_0___double,myloc_1___double.
+      Example: if subFieldSuffix="_d" then the coordinates would be indexed
+        in fields myloc_0_d,myloc_1_d
+      The subFields are an implementation detail of the fieldType, and end
+      users normally should not need to know about them.
+     -->
+    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
+
+    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
+    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
+
+   <!--
+    A Geohash is a compact representation of a latitude longitude pair in a single field.
+    See http://wiki.apache.org/solr/SpatialSearch
+   -->
+    <fieldtype name="geohash" class="solr.GeoHashField"/>
+
+   <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
+        Parameters:
+          defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
+          precisionStep:   Specifies the precisionStep for the TrieLong field used for the amount
+          providerClass:   Lets you plug in other exchange provider backend:
+                           solr.FileExchangeRateProvider is the default and takes one parameter:
+                             currencyConfig: name of an xml file holding exhange rates
+                           solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
+                             ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
+                             refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
+   -->
+<!--    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" /> - nie dziala -->
+             
+
+
+   <!-- some examples for different languages (generally ordered by ISO code) -->
+
+    <!-- Arabic -->
+    <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- for any non-arabic -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/>
+        <!-- normalizes ﻯ to ﻱ, etc -->
+        <filter class="solr.ArabicNormalizationFilterFactory"/>
+        <filter class="solr.ArabicStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Bulgarian -->
+    <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/> 
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/>
+        <filter class="solr.BulgarianStemFilterFactory"/>       
+      </analyzer>
+    </fieldType>
+    
+    <!-- Catalan -->
+    <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes l', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>       
+      </analyzer>
+    </fieldType>
+    
+    <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
+    <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- normalize width before bigram, as e.g. half-width dakuten combine  -->
+        <filter class="solr.CJKWidthFilterFactory"/>
+        <!-- for any non-CJK -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.CJKBigramFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Czech -->
+    <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/>
+        <filter class="solr.CzechStemFilterFactory"/>       
+      </analyzer>
+    </fieldType>
+    
+    <!-- Danish -->
+    <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>       
+      </analyzer>
+    </fieldType>
+    
+    <!-- German -->
+    <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.GermanNormalizationFilterFactory"/>
+        <filter class="solr.GermanLightStemFilterFactory"/>
+        <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Greek -->
+    <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- greek specific lowercase for sigma -->
+        <filter class="solr.GreekLowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
+        <filter class="solr.GreekStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Spanish -->
+    <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.SpanishLightStemFilterFactory"/>
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Basque -->
+    <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Persian -->
+    <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <!-- for ZWNJ -->
+        <charFilter class="solr.PersianCharFilterFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ArabicNormalizationFilterFactory"/>
+        <filter class="solr.PersianNormalizationFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Finnish -->
+    <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
+        <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- French -->
+    <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes l', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.FrenchLightStemFilterFactory"/>
+        <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Irish -->
+    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes d', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
+        <!-- removes n-, etc. position increments is intentionally false! -->
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
+        <filter class="solr.IrishLowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Galician -->
+    <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/>
+        <filter class="solr.GalicianStemFilterFactory"/>
+        <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Hindi -->
+    <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <!-- normalizes unicode representation -->
+        <filter class="solr.IndicNormalizationFilterFactory"/>
+        <!-- normalizes variation in spelling -->
+        <filter class="solr.HindiNormalizationFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/>
+        <filter class="solr.HindiStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Hungarian -->
+    <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
+        <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->   
+      </analyzer>
+    </fieldType>
+    
+    <!-- Armenian -->
+    <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Indonesian -->
+    <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/>
+        <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
+        <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Italian -->
+    <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- removes l', etc -->
+        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.ItalianLightStemFilterFactory"/>
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
+
+         NOTE: If you want to optimize search for precision, use default operator AND in your query
+         parser config with <solrQueryParser defaultOperator="AND"/> further down in this file.  Use 
+         OR if you would like to optimize for recall (default).
+    -->
+    <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
+      <analyzer>
+      <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
+
+           Kuromoji has a search mode (default) that does segmentation useful for search.  A heuristic
+           is used to segment compounds into its parts and the compound itself is kept as synonym.
+
+           Valid values for attribute mode are:
+              normal: regular segmentation
+              search: segmentation useful for search with synonyms compounds (default)
+            extended: same as search mode, but unigrams unknown words (experimental)
+
+           For some applications it might be good to use search mode for indexing and normal mode for
+           queries to reduce recall and prevent parts of compounds from being matched and highlighted.
+           Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
+
+           Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
+           model with your own entries for segmentation, part-of-speech tags and readings without a need
+           to specify weights.  Notice that user dictionaries have not been subject to extensive testing.
+
+           User dictionary attributes are:
+                     userDictionary: user dictionary filename
+             userDictionaryEncoding: user dictionary encoding (default is UTF-8)
+
+           See lang/userdict_ja.txt for a sample user dictionary file.
+
+           Punctuation characters are discarded by default.  Use discardPunctuation="false" to keep them.
+
+           See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
+        -->
+        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
+        <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
+        <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
+        <filter class="solr.JapaneseBaseFormFilterFactory"/>
+        <!-- Removes tokens with certain part-of-speech tags -->
+        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
+        <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
+        <filter class="solr.CJKWidthFilterFactory"/>
+        <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
+        <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
+        <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
+        <!-- Lower-cases romaji characters -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Latvian -->
+    <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/>
+        <filter class="solr.LatvianStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Dutch -->
+    <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Norwegian -->
+    <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
+        <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
+        <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+
+    <!-- Polish -->
+    <fieldType name="text_pl" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pl.txt" format="snowball" enablePositionIncrements="true"/>
+       <filter class="solr.MorfologikFilterFactory" dictionary="MORFOLOGIK" />
+        <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
+        <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    
+    <!-- Portuguese -->
+    <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.PortugueseLightStemFilterFactory"/>
+        <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
+        <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
+        <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Romanian -->
+    <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Russian -->
+    <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
+        <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Swedish -->
+    <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
+        <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
+      </analyzer>
+    </fieldType>
+    
+    <!-- Thai -->
+    <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ThaiWordFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/>
+      </analyzer>
+    </fieldType>
+    
+    <!-- Turkish -->
+    <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
+      <analyzer> 
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.TurkishLowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/>
+        <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
+      </analyzer>
+    </fieldType>
+
+ </types>
+  
+  <!-- Similarity is the scoring routine for each document vs. a query.
+       A custom Similarity or SimilarityFactory may be specified here, but 
+       the default is fine for most applications.  
+       For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
+    -->
+  <!--
+     <similarity class="com.example.solr.CustomSimilarityFactory">
+       <str name="paramkey">param value</str>
+     </similarity>
+    -->
+
+</schema>
diff --git a/doc/solrconfig.xml b/doc/solrconfig.xml

new file mode 100644 (file)

index 0000000..a27c213
--- /dev/null
+++ b/doc/solrconfig.xml
@@ -0,0 +1,1752 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- 
+     For more details about configurations options that may appear in
+     this file, see http://wiki.apache.org/solr/SolrConfigXml. 
+-->
+<config>
+  <!-- In all configuration below, a prefix of "solr." for class names
+       is an alias that causes solr to search appropriate packages,
+       including org.apache.solr.(search|update|request|core|analysis)
+
+       You may also specify a fully qualified Java classname if you
+       have your own custom plugins.
+    -->
+
+  <!-- Controls what version of Lucene various components of Solr
+       adhere to.  Generally, you want to use the latest version to
+       get all bug fixes and improvements. It is highly recommended
+       that you fully re-index after changing this setting as it can
+       affect both how text is indexed and queried.
+  -->
+  <luceneMatchVersion>LUCENE_40</luceneMatchVersion>
+
+  <!-- lib directives can be used to instruct Solr to load an Jars
+       identified and use them to resolve any "plugins" specified in
+       your solrconfig.xml or schema.xml (ie: Analyzers, Request
+       Handlers, etc...).
+
+       All directories and paths are resolved relative to the
+       instanceDir.
+
+       If a "./lib" directory exists in your instanceDir, all files
+       found in it are included as if you had used the following
+       syntax...
+       
+              <lib dir="./lib" />
+    -->
+
+  <!-- A 'dir' option by itself adds any files found in the directory 
+       to the classpath, this is useful for including all jars in a
+       directory.
+    -->
+  <!--
+     <lib dir="../add-everything-found-in-this-dir-to-the-classpath" />
+  -->
+
+  <!-- When a 'regex' is specified in addition to a 'dir', only the
+       files in that directory which completely match the regex
+       (anchored on both ends) will be included.
+    -->
+  <lib dir="../../../dist/" regex="apache-solr-cell-\d.*\.jar" />
+  <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
+
+  <lib dir="../../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
+  <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
+
+  <lib dir="../../../dist/" regex="apache-solr-langid-\d.*\.jar" />
+  <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
+
+  <lib dir="../../../dist/" regex="apache-solr-velocity-\d.*\.jar" />
+  <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
+
+  <!-- If a 'dir' option (with or without a regex) is used and nothing
+       is found that matches, it will be ignored
+    -->
+  <lib dir="/total/crap/dir/ignored" /> 
+
+  <!-- an exact 'path' can be used instead of a 'dir' to specify a 
+       specific file.  This will cause a serious error to be logged if 
+       it can't be loaded.
+    -->
+  <!--
+     <lib path="../a-jar-that-does-not-exist.jar" /> 
+  -->
+  
+  <!-- Data Directory
+
+       Used to specify an alternate directory to hold all index data
+       other than the default ./data under the Solr home.  If
+       replication is in use, this should match the replication
+       configuration.
+    -->
+  <dataDir>${solr.data.dir:}</dataDir>
+
+
+  <!-- The DirectoryFactory to use for indexes.
+       
+       solr.StandardDirectoryFactory is filesystem
+       based and tries to pick the best implementation for the current
+       JVM and platform.  solr.NRTCachingDirectoryFactory, the default,
+       wraps solr.StandardDirectoryFactory and caches small files in memory
+       for better NRT performance.
+
+       One can force a particular implementation via solr.MMapDirectoryFactory,
+       solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
+
+       solr.RAMDirectoryFactory is memory based, not
+       persistent, and doesn't work with replication.
+    -->
+  <directoryFactory name="DirectoryFactory" 
+                    class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> 
+
+  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       Index Config - These settings control low-level behavior of indexing
+       Most example settings here show the default value, but are commented
+       out, to more easily see where customizations have been made.
+       
+       Note: This replaces <indexDefaults> and <mainIndex> from older versions
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
+  <indexConfig>
+    <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a 
+         LimitTokenCountFilterFactory in your fieldType definition. E.g. 
+     <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
+    -->
+    <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
+    <!-- <writeLockTimeout>1000</writeLockTimeout>  -->
+
+    <!-- Expert: Enabling compound file will use less files for the index, 
+         using fewer file descriptors on the expense of performance decrease. 
+         Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
+    <!-- <useCompoundFile>false</useCompoundFile> -->
+
+    <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
+         indexing for buffering added documents and deletions before they are
+         flushed to the Directory.
+         maxBufferedDocs sets a limit on the number of documents buffered
+         before flushing.
+         If both ramBufferSizeMB and maxBufferedDocs is set, then
+         Lucene will flush based on whichever limit is hit first.  -->
+    <!-- <ramBufferSizeMB>32</ramBufferSizeMB> -->
+    <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
+
+    <!-- Expert: Merge Policy 
+         The Merge Policy in Lucene controls how merging of segments is done.
+         The default since Solr/Lucene 3.3 is TieredMergePolicy.
+         The default since Lucene 2.3 was the LogByteSizeMergePolicy,
+         Even older versions of Lucene used LogDocMergePolicy.
+      -->
+    <!--
+        <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
+          <int name="maxMergeAtOnce">10</int>
+          <int name="segmentsPerTier">10</int>
+        </mergePolicy>
+      -->
+       
+    <!-- Merge Factor
+         The merge factor controls how many segments will get merged at a time.
+         For TieredMergePolicy, mergeFactor is a convenience parameter which
+         will set both MaxMergeAtOnce and SegmentsPerTier at once.
+         For LogByteSizeMergePolicy, mergeFactor decides how many new segments
+         will be allowed before they are merged into one.
+         Default is 10 for both merge policies.
+      -->
+    <!-- 
+    <mergeFactor>10</mergeFactor>
+      -->
+
+    <!-- Expert: Merge Scheduler
+         The Merge Scheduler in Lucene controls how merges are
+         performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
+         can perform merges in the background using separate threads.
+         The SerialMergeScheduler (Lucene 2.2 default) does not.
+     -->
+    <!-- 
+       <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
+       -->
+
+    <!-- LockFactory 
+
+         This option specifies which Lucene LockFactory implementation
+         to use.
+      
+         single = SingleInstanceLockFactory - suggested for a
+                  read-only index or when there is no possibility of
+                  another process trying to modify the index.
+         native = NativeFSLockFactory - uses OS native file locking.
+                  Do not use when multiple solr webapps in the same
+                  JVM are attempting to share a single index.
+         simple = SimpleFSLockFactory  - uses a plain file for locking
+
+         Defaults: 'native' is default for Solr3.6 and later, otherwise
+                   'simple' is the default
+
+         More details on the nuances of each LockFactory...
+         http://wiki.apache.org/lucene-java/AvailableLockFactories
+    -->
+    <!-- <lockType>native</lockType> -->
+
+    <!-- Unlock On Startup
+
+         If true, unlock any held write or commit locks on startup.
+         This defeats the locking mechanism that allows multiple
+         processes to safely access a lucene index, and should be used
+         with care. Default is "false".
+
+         This is not needed if lock type is 'none' or 'single'
+     -->
+    <!--
+    <unlockOnStartup>false</unlockOnStartup>
+      -->
+    
+    <!-- Expert: Controls how often Lucene loads terms into memory
+         Default is 128 and is likely good for most everyone.
+      -->
+    <!-- <termIndexInterval>128</termIndexInterval> -->
+
+    <!-- If true, IndexReaders will be reopened (often more efficient)
+         instead of closed and then opened. Default: true
+      -->
+    <!-- 
+    <reopenReaders>true</reopenReaders>
+      -->
+
+    <!-- Commit Deletion Policy
+
+         Custom deletion policies can be specified here. The class must
+         implement org.apache.lucene.index.IndexDeletionPolicy.
+
+         http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html
+
+         The default Solr IndexDeletionPolicy implementation supports
+         deleting index commit points on number of commits, age of
+         commit point and optimized status.
+         
+         The latest commit point should always be preserved regardless
+         of the criteria.
+    -->
+    <!-- 
+    <deletionPolicy class="solr.SolrDeletionPolicy">
+    -->
+      <!-- The number of commit points to be kept -->
+      <!-- <str name="maxCommitsToKeep">1</str> -->
+      <!-- The number of optimized commit points to be kept -->
+      <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
+      <!--
+          Delete all commit points once they have reached the given age.
+          Supports DateMathParser syntax e.g.
+        -->
+      <!--
+         <str name="maxCommitAge">30MINUTES</str>
+         <str name="maxCommitAge">1DAY</str>
+      -->
+    <!-- 
+    </deletionPolicy>
+    -->
+
+    <!-- Lucene Infostream
+       
+         To aid in advanced debugging, Lucene provides an "InfoStream"
+         of detailed information when indexing.
+
+         Setting The value to true will instruct the underlying Lucene
+         IndexWriter to write its debugging info the specified file
+      -->
+     <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> 
+  </indexConfig>
+
+
+  <!-- JMX
+       
+       This example enables JMX if and only if an existing MBeanServer
+       is found, use this if you want to configure JMX through JVM
+       parameters. Remove this to disable exposing Solr configuration
+       and statistics to JMX.
+
+       For more details see http://wiki.apache.org/solr/SolrJmx
+    -->
+  <jmx />
+  <!-- If you want to connect to a particular server, specify the
+       agentId 
+    -->
+  <!-- <jmx agentId="myAgent" /> -->
+  <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
+  <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
+    -->
+
+  <!-- The default high-performance update handler -->
+  <updateHandler class="solr.DirectUpdateHandler2">
+
+    <!-- AutoCommit
+
+         Perform a hard commit automatically under certain conditions.
+         Instead of enabling autoCommit, consider using "commitWithin"
+         when adding documents. 
+
+         http://wiki.apache.org/solr/UpdateXmlMessages
+
+         maxDocs - Maximum number of documents to add since the last
+                   commit before automatically triggering a new commit.
+
+         maxTime - Maximum amount of time in ms that is allowed to pass
+                   since a document was added before automaticly
+                   triggering a new commit. 
+         openSearcher - if false, the commit causes recent index changes
+         to be flushed to stable storage, but does not cause a new
+         searcher to be opened to make those changes visible.
+      -->
+     <autoCommit> 
+       <maxTime>15000</maxTime> 
+       <openSearcher>false</openSearcher> 
+     </autoCommit>
+
+    <!-- softAutoCommit is like autoCommit except it causes a
+         'soft' commit which only ensures that changes are visible
+         but does not ensure that data is synced to disk.  This is
+         faster and more near-realtime friendly than a hard commit.
+      -->
+     <!--
+       <autoSoftCommit> 
+         <maxTime>1000</maxTime> 
+       </autoSoftCommit>
+      -->
+
+    <!-- Update Related Event Listeners
+         
+         Various IndexWriter related events can trigger Listeners to
+         take actions.
+
+         postCommit - fired after every commit or optimize command
+         postOptimize - fired after every optimize command
+      -->
+    <!-- The RunExecutableListener executes an external command from a
+         hook such as postCommit or postOptimize.
+         
+         exe - the name of the executable to run
+         dir - dir to use as the current working directory. (default=".")
+         wait - the calling thread waits until the executable returns. 
+                (default="true")
+         args - the arguments to pass to the program.  (default is none)
+         env - environment variables to set.  (default is none)
+      -->
+    <!-- This example shows how RunExecutableListener could be used
+         with the script based replication...
+         http://wiki.apache.org/solr/CollectionDistribution
+      -->
+    <!--
+       <listener event="postCommit" class="solr.RunExecutableListener">
+         <str name="exe">solr/bin/snapshooter</str>
+         <str name="dir">.</str>
+         <bool name="wait">true</bool>
+         <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
+         <arr name="env"> <str>MYVAR=val1</str> </arr>
+       </listener>
+      -->
+
+    <!-- Enables a transaction log, currently used for real-time get.
+         "dir" - the target directory for transaction logs, defaults to the
+            solr data directory.  --> 
+    <updateLog>
+      <str name="dir">${solr.data.dir:}</str>
+    </updateLog>
+   
+
+  </updateHandler>
+  
+  <!-- IndexReaderFactory
+
+       Use the following format to specify a custom IndexReaderFactory,
+       which allows for alternate IndexReader implementations.
+
+       ** Experimental Feature **
+
+       Please note - Using a custom IndexReaderFactory may prevent
+       certain other features from working. The API to
+       IndexReaderFactory may change without warning or may even be
+       removed from future releases if the problems cannot be
+       resolved.
+
+
+       ** Features that may not work with custom IndexReaderFactory **
+
+       The ReplicationHandler assumes a disk-resident index. Using a
+       custom IndexReader implementation may cause incompatibility
+       with ReplicationHandler and may cause replication to not work
+       correctly. See SOLR-1366 for details.
+
+    -->
+  <!--
+  <indexReaderFactory name="IndexReaderFactory" class="package.class">
+    <str name="someArg">Some Value</str>
+  </indexReaderFactory >
+  -->
+  <!-- By explicitly declaring the Factory, the termIndexDivisor can
+       be specified.
+    -->
+  <!--
+     <indexReaderFactory name="IndexReaderFactory" 
+                         class="solr.StandardIndexReaderFactory">
+       <int name="setTermIndexDivisor">12</int>
+     </indexReaderFactory >
+    -->
+
+  <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       Query section - these settings control query time things like caches
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
+  <query>
+    <!-- Max Boolean Clauses
+
+         Maximum number of clauses in each BooleanQuery,  an exception
+         is thrown if exceeded.
+
+         ** WARNING **
+         
+         This option actually modifies a global Lucene property that
+         will affect all SolrCores.  If multiple solrconfig.xml files
+         disagree on this property, the value at any given moment will
+         be based on the last SolrCore to be initialized.
+         
+      -->
+    <maxBooleanClauses>1024</maxBooleanClauses>
+
+
+    <!-- Solr Internal Query Caches
+
+         There are two implementations of cache available for Solr,
+         LRUCache, based on a synchronized LinkedHashMap, and
+         FastLRUCache, based on a ConcurrentHashMap.  
+
+         FastLRUCache has faster gets and slower puts in single
+         threaded operation and thus is generally faster than LRUCache
+         when the hit ratio of the cache is high (> 75%), and may be
+         faster under other scenarios on multi-cpu systems.
+    -->
+
+    <!-- Filter Cache
+
+         Cache used by SolrIndexSearcher for filters (DocSets),
+         unordered sets of *all* documents that match a query.  When a
+         new searcher is opened, its caches may be prepopulated or
+         "autowarmed" using data from caches in the old searcher.
+         autowarmCount is the number of items to prepopulate.  For
+         LRUCache, the autowarmed items will be the most recently
+         accessed items.
+
+         Parameters:
+           class - the SolrCache implementation LRUCache or
+               (LRUCache or FastLRUCache)
+           size - the maximum number of entries in the cache
+           initialSize - the initial capacity (number of entries) of
+               the cache.  (see java.util.HashMap)
+           autowarmCount - the number of entries to prepopulate from
+               and old cache.  
+      -->
+    <filterCache class="solr.FastLRUCache"
+                 size="512"
+                 initialSize="512"
+                 autowarmCount="0"/>
+
+    <!-- Query Result Cache
+         
+         Caches results of searches - ordered lists of document ids
+         (DocList) based on a query, a sort, and the range of documents requested.  
+      -->
+    <queryResultCache class="solr.LRUCache"
+                     size="512"
+                     initialSize="512"
+                     autowarmCount="0"/>
+   
+    <!-- Document Cache
+
+         Caches Lucene Document objects (the stored fields for each
+         document).  Since Lucene internal document ids are transient,
+         this cache will not be autowarmed.  
+      -->
+    <documentCache class="solr.LRUCache"
+                   size="512"
+                   initialSize="512"
+                   autowarmCount="0"/>
+    
+    <!-- Field Value Cache
+         
+         Cache used to hold field values that are quickly accessible
+         by document id.  The fieldValueCache is created by default
+         even if not configured here.
+      -->
+    <!--
+       <fieldValueCache class="solr.FastLRUCache"
+                        size="512"
+                        autowarmCount="128"
+                        showItems="32" />
+      -->
+
+    <!-- Custom Cache
+
+         Example of a generic cache.  These caches may be accessed by
+         name through SolrIndexSearcher.getCache(),cacheLookup(), and
+         cacheInsert().  The purpose is to enable easy caching of
+         user/application level data.  The regenerator argument should
+         be specified as an implementation of solr.CacheRegenerator 
+         if autowarming is desired.  
+      -->
+    <!--
+       <cache name="myUserCache"
+              class="solr.LRUCache"
+              size="4096"
+              initialSize="1024"
+              autowarmCount="1024"
+              regenerator="com.mycompany.MyRegenerator"
+              />
+      -->
+
+
+    <!-- Lazy Field Loading
+
+         If true, stored fields that are not requested will be loaded
+         lazily.  This can result in a significant speed improvement
+         if the usual case is to not load all stored fields,
+         especially if the skipped fields are large compressed text
+         fields.
+    -->
+    <enableLazyFieldLoading>true</enableLazyFieldLoading>
+
+   <!-- Use Filter For Sorted Query
+
+        A possible optimization that attempts to use a filter to
+        satisfy a search.  If the requested sort does not include
+        score, then the filterCache will be checked for a filter
+        matching the query. If found, the filter will be used as the
+        source of document ids, and then the sort will be applied to
+        that.
+
+        For most situations, this will not be useful unless you
+        frequently get the same search repeatedly with different sort
+        options, and none of them ever use "score"
+     -->
+   <!--
+      <useFilterForSortedQuery>true</useFilterForSortedQuery>
+     -->
+
+   <!-- Result Window Size
+
+        An optimization for use with the queryResultCache.  When a search
+        is requested, a superset of the requested number of document ids
+        are collected.  For example, if a search for a particular query
+        requests matching documents 10 through 19, and queryWindowSize is 50,
+        then documents 0 through 49 will be collected and cached.  Any further
+        requests in that range can be satisfied via the cache.  
+     -->
+   <queryResultWindowSize>20</queryResultWindowSize>
+
+   <!-- Maximum number of documents to cache for any entry in the
+        queryResultCache. 
+     -->
+   <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
+
+   <!-- Query Related Event Listeners
+
+        Various IndexSearcher related events can trigger Listeners to
+        take actions.
+
+        newSearcher - fired whenever a new searcher is being prepared
+        and there is a current searcher handling requests (aka
+        registered).  It can be used to prime certain caches to
+        prevent long request times for certain requests.
+
+        firstSearcher - fired whenever a new searcher is being
+        prepared but there is no current registered searcher to handle
+        requests or to gain autowarming data from.
+
+        
+     -->
+    <!-- QuerySenderListener takes an array of NamedList and executes a
+         local query request for each NamedList in sequence. 
+      -->
+    <listener event="newSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <!--
+           <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
+           <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
+          -->
+      </arr>
+    </listener>
+    <listener event="firstSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <lst>
+          <str name="q">static firstSearcher warming in solrconfig.xml</str>
+        </lst>
+      </arr>
+    </listener>
+
+    <!-- Use Cold Searcher
+
+         If a search request comes in and there is no current
+         registered searcher, then immediately register the still
+         warming searcher and use it.  If "false" then all requests
+         will block until the first searcher is done warming.
+      -->
+    <useColdSearcher>false</useColdSearcher>
+
+    <!-- Max Warming Searchers
+         
+         Maximum number of searchers that may be warming in the
+         background concurrently.  An error is returned if this limit
+         is exceeded.
+
+         Recommend values of 1-2 for read-only slaves, higher for
+         masters w/o cache warming.
+      -->
+    <maxWarmingSearchers>2</maxWarmingSearchers>
+
+  </query>
+
+
+  <!-- Request Dispatcher
+
+       This section contains instructions for how the SolrDispatchFilter
+       should behave when processing requests for this SolrCore.
+
+       handleSelect is a legacy option that affects the behavior of requests
+       such as /select?qt=XXX
+
+       handleSelect="true" will cause the SolrDispatchFilter to process
+       the request and dispatch the query to a handler specified by the
+       "qt" param, assuming "/select" isn't already registered.
+
+       handleSelect="false" will cause the SolrDispatchFilter to
+       ignore "/select" requests, resulting in a 404 unless a handler
+       is explicitly registered with the name "/select"
+
+       handleSelect="true" is not recommended for new users, but is the default
+       for backwards compatibility
+    -->
+  <requestDispatcher handleSelect="false" >
+    <!-- Request Parsing
+
+         These settings indicate how Solr Requests may be parsed, and
+         what restrictions may be placed on the ContentStreams from
+         those requests
+
+         enableRemoteStreaming - enables use of the stream.file
+         and stream.url parameters for specifying remote streams.
+
+         multipartUploadLimitInKB - specifies the max size of
+         Multipart File Uploads that Solr will allow in a Request.
+         
+         *** WARNING ***
+         The settings below authorize Solr to fetch remote files, You
+         should make sure your system has some authentication before
+         using enableRemoteStreaming="true"
+
+      --> 
+    <requestParsers enableRemoteStreaming="true" 
+                    multipartUploadLimitInKB="2048000" />
+
+    <!-- HTTP Caching
+
+         Set HTTP caching related parameters (for proxy caches and clients).
+
+         The options below instruct Solr not to output any HTTP Caching
+         related headers
+      -->
+    <httpCaching never304="true" />
+    <!-- If you include a <cacheControl> directive, it will be used to
+         generate a Cache-Control header (as well as an Expires header
+         if the value contains "max-age=")
+         
+         By default, no Cache-Control header is generated.
+         
+         You can use the <cacheControl> option even if you have set
+         never304="true"
+      -->
+    <!--
+       <httpCaching never304="true" >
+         <cacheControl>max-age=30, public</cacheControl> 
+       </httpCaching>
+      -->
+    <!-- To enable Solr to respond with automatically generated HTTP
+         Caching headers, and to response to Cache Validation requests
+         correctly, set the value of never304="false"
+         
+         This will cause Solr to generate Last-Modified and ETag
+         headers based on the properties of the Index.
+
+         The following options can also be specified to affect the
+         values of these headers...
+
+         lastModFrom - the default value is "openTime" which means the
+         Last-Modified value (and validation against If-Modified-Since
+         requests) will all be relative to when the current Searcher
+         was opened.  You can change it to lastModFrom="dirLastMod" if
+         you want the value to exactly correspond to when the physical
+         index was last modified.
+
+         etagSeed="..." is an option you can change to force the ETag
+         header (and validation against If-None-Match requests) to be
+         different even if the index has not changed (ie: when making
+         significant changes to your config file)
+
+         (lastModifiedFrom and etagSeed are both ignored if you use
+         the never304="true" option)
+      -->
+    <!--
+       <httpCaching lastModifiedFrom="openTime"
+                    etagSeed="Solr">
+         <cacheControl>max-age=30, public</cacheControl> 
+       </httpCaching>
+      -->
+  </requestDispatcher>
+
+  <!-- Request Handlers 
+
+       http://wiki.apache.org/solr/SolrRequestHandler
+
+       Incoming queries will be dispatched to a specific handler by name
+       based on the path specified in the request.
+
+       Legacy behavior: If the request path uses "/select" but no Request
+       Handler has that name, and if handleSelect="true" has been specified in
+       the requestDispatcher, then the Request Handler is dispatched based on
+       the qt parameter.  Handlers without a leading '/' are accessed this way
+       like so: http://host/app/[core/]select?qt=name  If no qt is
+       given, then the requestHandler that declares default="true" will be
+       used or the one named "standard".
+       
+       If a Request Handler is declared with startup="lazy", then it will
+       not be initialized until the first request that uses it.
+
+    -->
+  <!-- SearchHandler
+
+       http://wiki.apache.org/solr/SearchHandler
+
+       For processing Search Queries, the primary Request Handler
+       provided with Solr is "SearchHandler" It delegates to a sequent
+       of SearchComponents (see below) and supports distributed
+       queries across multiple shards
+    -->
+  <requestHandler name="/select" class="solr.SearchHandler">
+    <!-- default values for query parameters can be specified, these
+         will be overridden by parameters in the request
+      -->
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <int name="rows">50</int>
+       <str name="df">text</str>
+      <bool name="tv">true</bool>
+     </lst>
+    <!-- In addition to defaults, "appends" params can be specified
+         to identify values which should be appended to the list of
+         multi-val params from the query (or the existing "defaults").
+      -->
+    <!-- In this example, the param "fq=instock:true" would be appended to
+         any query time fq params the user may specify, as a mechanism for
+         partitioning the index, independent of any user selected filtering
+         that may also be desired (perhaps as a result of faceted searching).
+
+         NOTE: there is *absolutely* nothing a client can do to prevent these
+         "appends" values from being used, so don't use this mechanism
+         unless you are sure you always want it.
+      -->
+    <!--
+       <lst name="appends">
+         <str name="fq">inStock:true</str>
+       </lst>
+      -->
+    <!-- "invariants" are a way of letting the Solr maintainer lock down
+         the options available to Solr clients.  Any params values
+         specified here are used regardless of what values may be specified
+         in either the query, the "defaults", or the "appends" params.
+
+         In this example, the facet.field and facet.query params would
+         be fixed, limiting the facets clients can use.  Faceting is
+         not turned on by default - but if the client does specify
+         facet=true in the request, these are the only facets they
+         will be able to see counts for; regardless of what other
+         facet.field or facet.query params they may specify.
+
+         NOTE: there is *absolutely* nothing a client can do to prevent these
+         "invariants" values from being used, so don't use this mechanism
+         unless you are sure you always want it.
+      -->
+    <!--
+       <lst name="invariants">
+         <str name="facet.field">cat</str>
+         <str name="facet.field">manu_exact</str>
+         <str name="facet.query">price:[* TO 500]</str>
+         <str name="facet.query">price:[500 TO *]</str>
+       </lst>
+      -->
+    <!-- If the default list of SearchComponents is not desired, that
+         list can either be overridden completely, or components can be
+         prepended or appended to the default list.  (see below)
+      -->
+    <!--
+       <arr name="components">
+         <str>nameOfCustomComponent1</str>
+         <str>nameOfCustomComponent2</str>
+       </arr>
+      -->
+    <arr name="last-components">
+      <str>tvComponent</str>
+    </arr>
+    </requestHandler>
+
+  <!-- A request handler that returns indented JSON by default -->
+  <requestHandler name="/query" class="solr.SearchHandler">
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+       <str name="df">text</str>
+     </lst>
+  </requestHandler>
+
+
+  <!-- realtime get handler, guaranteed to return the latest stored fields of
+       any document, without the need to commit or open a new searcher.  The
+       current implementation relies on the updateLog feature being enabled. -->
+  <requestHandler name="/get" class="solr.RealTimeGetHandler">
+     <lst name="defaults">
+       <str name="omitHeader">true</str>
+       <str name="wt">json</str>
+       <str name="indent">true</str>
+     </lst>
+  </requestHandler>
+
+ 
+  <!-- A Robust Example 
+       
+       This example SearchHandler declaration shows off usage of the
+       SearchHandler with many defaults declared
+
+       Note that multiple instances of the same Request Handler
+       (SearchHandler) can be registered multiple times with different
+       names (and different init parameters)
+    -->
+  <requestHandler name="/browse" class="solr.SearchHandler">
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+
+       <!-- VelocityResponseWriter settings -->
+       <str name="wt">velocity</str>
+       <str name="v.template">browse</str>
+       <str name="v.layout">layout</str>
+       <str name="title">Solritas</str>
+
+       <!-- Query settings -->
+       <str name="defType">edismax</str>
+       <str name="qf">
+          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+          title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+       </str>
+       <str name="df">text</str>
+       <str name="mm">100%</str>
+       <str name="q.alt">*:*</str>
+       <str name="rows">10</str>
+       <str name="fl">*,score</str>
+
+       <str name="mlt.qf">
+         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+         title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
+       </str>
+       <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
+       <int name="mlt.count">3</int>
+
+       <!-- Faceting defaults -->
+       <str name="facet">on</str>
+       <str name="facet.field">cat</str>
+       <str name="facet.field">manu_exact</str>
+       <str name="facet.field">content_type</str>
+       <str name="facet.field">author_s</str>
+       <str name="facet.query">ipod</str>
+       <str name="facet.query">GB</str>
+       <str name="facet.mincount">1</str>
+       <str name="facet.pivot">cat,inStock</str>
+       <str name="facet.range.other">after</str>
+       <str name="facet.range">price</str>
+       <int name="f.price.facet.range.start">0</int>
+       <int name="f.price.facet.range.end">600</int>
+       <int name="f.price.facet.range.gap">50</int>
+       <str name="facet.range">popularity</str>
+       <int name="f.popularity.facet.range.start">0</int>
+       <int name="f.popularity.facet.range.end">10</int>
+       <int name="f.popularity.facet.range.gap">3</int>
+       <str name="facet.range">manufacturedate_dt</str>
+       <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
+       <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
+       <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
+       <str name="f.manufacturedate_dt.facet.range.other">before</str>
+       <str name="f.manufacturedate_dt.facet.range.other">after</str>
+
+       <!-- Highlighting defaults -->
+       <str name="hl">on</str>
+       <str name="hl.fl">content features title name</str>
+       <str name="hl.encoder">html</str>
+       <str name="hl.simple.pre">&lt;b&gt;</str>
+       <str name="hl.simple.post">&lt;/b&gt;</str>
+       <str name="f.title.hl.fragsize">0</str>
+       <str name="f.title.hl.alternateField">title</str>
+       <str name="f.name.hl.fragsize">0</str>
+       <str name="f.name.hl.alternateField">name</str>
+       <str name="f.content.hl.snippets">3</str>
+       <str name="f.content.hl.fragsize">200</str>
+       <str name="f.content.hl.alternateField">content</str>
+       <str name="f.content.hl.maxAlternateFieldLength">750</str>
+
+       <!-- Spell checking defaults -->
+       <str name="spellcheck">on</str>
+       <str name="spellcheck.extendedResults">false</str>       
+       <str name="spellcheck.count">5</str>
+       <str name="spellcheck.alternativeTermCount">2</str>
+       <str name="spellcheck.maxResultsForSuggest">5</str>       
+       <str name="spellcheck.collate">true</str>
+       <str name="spellcheck.collateExtendedResults">true</str>  
+       <str name="spellcheck.maxCollationTries">5</str>
+       <str name="spellcheck.maxCollations">3</str>           
+     </lst>
+
+     <!-- append spellchecking to our list of components -->
+     <arr name="last-components">
+       <str>spellcheck</str>
+     </arr>
+  </requestHandler>
+
+
+  <!-- Update Request Handler.  
+       
+       http://wiki.apache.org/solr/UpdateXmlMessages
+
+       The canonical Request Handler for Modifying the Index through
+       commands specified using XML, JSON, CSV, or JAVABIN
+
+       Note: Since solr1.1 requestHandlers requires a valid content
+       type header if posted in the body. For example, curl now
+       requires: -H 'Content-type:text/xml; charset=utf-8'
+       
+       To override the request content type and force a specific 
+       Content-type, use the request parameter: 
+         ?update.contentType=text/csv
+       
+       This handler will pick a response format to match the input
+       if the 'wt' parameter is not explicit
+    -->
+  <requestHandler name="/update" class="solr.UpdateRequestHandler">
+    <!-- See below for information on defining 
+         updateRequestProcessorChains that can be used by name 
+         on each Update Request
+      -->
+    <!--
+       <lst name="defaults">
+         <str name="update.chain">dedupe</str>
+       </lst>
+       -->
+  </requestHandler>
+  
+
+  <!-- Solr Cell Update Request Handler
+
+       http://wiki.apache.org/solr/ExtractingRequestHandler 
+
+    -->
+  <requestHandler name="/update/extract" 
+                  startup="lazy"
+                  class="solr.extraction.ExtractingRequestHandler" >
+    <lst name="defaults">
+      <str name="lowernames">true</str>
+      <str name="uprefix">ignored_</str>
+
+      <!-- capture link hrefs but ignore div attributes -->
+      <str name="captureAttr">true</str>
+      <str name="fmap.a">links</str>
+      <str name="fmap.div">ignored_</str>
+    </lst>
+  </requestHandler>
+
+
+  <!-- Field Analysis Request Handler
+
+       RequestHandler that provides much the same functionality as
+       analysis.jsp. Provides the ability to specify multiple field
+       types and field names in the same request and outputs
+       index-time and query-time analysis for each of them.
+
+       Request parameters are:
+       analysis.fieldname - field name whose analyzers are to be used
+
+       analysis.fieldtype - field type whose analyzers are to be used
+       analysis.fieldvalue - text for index-time analysis
+       q (or analysis.q) - text for query time analysis
+       analysis.showmatch (true|false) - When set to true and when
+           query analysis is performed, the produced tokens of the
+           field value analysis will be marked as "matched" for every
+           token that is produces by the query analysis
+   -->
+  <requestHandler name="/analysis/field" 
+                  startup="lazy"
+                  class="solr.FieldAnalysisRequestHandler" />
+
+
+  <!-- Document Analysis Handler
+
+       http://wiki.apache.org/solr/AnalysisRequestHandler
+
+       An analysis handler that provides a breakdown of the analysis
+       process of provided documents. This handler expects a (single)
+       content stream with the following format:
+
+       <docs>
+         <doc>
+           <field name="id">1</field>
+           <field name="name">The Name</field>
+           <field name="text">The Text Value</field>
+         </doc>
+         <doc>...</doc>
+         <doc>...</doc>
+         ...
+       </docs>
+
+    Note: Each document must contain a field which serves as the
+    unique key. This key is used in the returned response to associate
+    an analysis breakdown to the analyzed document.
+
+    Like the FieldAnalysisRequestHandler, this handler also supports
+    query analysis by sending either an "analysis.query" or "q"
+    request parameter that holds the query text to be analyzed. It
+    also supports the "analysis.showmatch" parameter which when set to
+    true, all field tokens that match the query tokens will be marked
+    as a "match". 
+  -->
+  <requestHandler name="/analysis/document" 
+                  class="solr.DocumentAnalysisRequestHandler" 
+                  startup="lazy" />
+
+  <!-- Admin Handlers
+
+       Admin Handlers - This will register all the standard admin
+       RequestHandlers.  
+    -->
+  <requestHandler name="/admin/" 
+                  class="solr.admin.AdminHandlers" />
+  <!-- This single handler is equivalent to the following... -->
+  <!--
+     <requestHandler name="/admin/luke"       class="solr.admin.LukeRequestHandler" />
+     <requestHandler name="/admin/system"     class="solr.admin.SystemInfoHandler" />
+     <requestHandler name="/admin/plugins"    class="solr.admin.PluginInfoHandler" />
+     <requestHandler name="/admin/threads"    class="solr.admin.ThreadDumpHandler" />
+     <requestHandler name="/admin/properties" class="solr.admin.PropertiesRequestHandler" />
+     <requestHandler name="/admin/file"       class="solr.admin.ShowFileRequestHandler" >
+    -->
+  <!-- If you wish to hide files under ${solr.home}/conf, explicitly
+       register the ShowFileRequestHandler using: 
+    -->
+  <!--
+     <requestHandler name="/admin/file" 
+                     class="solr.admin.ShowFileRequestHandler" >
+       <lst name="invariants">
+         <str name="hidden">synonyms.txt</str> 
+         <str name="hidden">anotherfile.txt</str> 
+       </lst>
+     </requestHandler>
+    -->
+
+  <!-- ping/healthcheck -->
+  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
+    <lst name="invariants">
+      <str name="q">solrpingquery</str>
+    </lst>
+    <lst name="defaults">
+      <str name="echoParams">all</str>
+    </lst>
+    <!-- An optional feature of the PingRequestHandler is to configure the 
+         handler with a "healthcheckFile" which can be used to enable/disable 
+         the PingRequestHandler.
+         relative paths are resolved against the data dir 
+      -->
+    <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
+  </requestHandler>
+
+  <!-- Echo the request contents back to the client -->
+  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
+    <lst name="defaults">
+     <str name="echoParams">explicit</str> 
+     <str name="echoHandler">true</str>
+    </lst>
+  </requestHandler>
+  
+  <!-- Solr Replication
+
+       The SolrReplicationHandler supports replicating indexes from a
+       "master" used for indexing and "slaves" used for queries.
+
+       http://wiki.apache.org/solr/SolrReplication 
+
+       In the example below, remove the <lst name="master"> section if
+       this is just a slave and remove  the <lst name="slave"> section
+       if this is just a master.
+    -->
+  <!--
+     <requestHandler name="/replication" class="solr.ReplicationHandler" >
+       <lst name="master">
+         <str name="replicateAfter">commit</str>
+         <str name="replicateAfter">startup</str>
+         <str name="confFiles">schema.xml,stopwords.txt</str>
+       </lst>
+       <lst name="slave">
+         <str name="masterUrl">http://localhost:8983/solr</str>
+         <str name="pollInterval">00:00:60</str>
+       </lst>
+     </requestHandler>
+    -->
+    
+    <!-- Solr Replication for SolrCloud Recovery
+    
+         This is the config need for SolrCloud's recovery replication.
+    -->
+       <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" /> 
+
+
+  <!-- Search Components
+
+       Search components are registered to SolrCore and used by 
+       instances of SearchHandler (which can access them by name)
+       
+       By default, the following components are available:
+       
+       <searchComponent name="query"     class="solr.QueryComponent" />
+       <searchComponent name="facet"     class="solr.FacetComponent" />
+       <searchComponent name="mlt"       class="solr.MoreLikeThisComponent" />
+       <searchComponent name="highlight" class="solr.HighlightComponent" />
+       <searchComponent name="stats"     class="solr.StatsComponent" />
+       <searchComponent name="debug"     class="solr.DebugComponent" />
+   
+       Default configuration in a requestHandler would look like:
+
+       <arr name="components">
+         <str>query</str>
+         <str>facet</str>
+         <str>mlt</str>
+         <str>highlight</str>
+         <str>stats</str>
+         <str>debug</str>
+       </arr>
+
+       If you register a searchComponent to one of the standard names, 
+       that will be used instead of the default.
+
+       To insert components before or after the 'standard' components, use:
+    
+       <arr name="first-components">
+         <str>myFirstComponentName</str>
+       </arr>
+    
+       <arr name="last-components">
+         <str>myLastComponentName</str>
+       </arr>
+
+       NOTE: The component registered with the name "debug" will
+       always be executed after the "last-components" 
+       
+     -->
+  
+   <!-- Spell Check
+
+        The spell check component can return a list of alternative spelling
+        suggestions.  
+
+        http://wiki.apache.org/solr/SpellCheckComponent
+     -->
+  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
+
+    <str name="queryAnalyzerFieldType">textSpell</str>
+
+    <!-- Multiple "Spell Checkers" can be declared and used by this
+         component
+      -->
+
+    <!-- a spellchecker built from a field of the main index -->
+    <lst name="spellchecker">
+      <str name="name">default</str>
+      <str name="field">name</str>
+      <str name="classname">solr.DirectSolrSpellChecker</str>
+      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
+      <str name="distanceMeasure">internal</str>
+      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
+      <float name="accuracy">0.5</float>
+      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
+      <int name="maxEdits">2</int>
+      <!-- the minimum shared prefix when enumerating terms -->
+      <int name="minPrefix">1</int>
+      <!-- maximum number of inspections per result. -->
+      <int name="maxInspections">5</int>
+      <!-- minimum length of a query term to be considered for correction -->
+      <int name="minQueryLength">4</int>
+      <!-- maximum threshold of documents a query term can appear to be considered for correction -->
+      <float name="maxQueryFrequency">0.01</float>
+      <!-- uncomment this to require suggestions to occur in 1% of the documents
+       <float name="thresholdTokenFrequency">.01</float>
+      -->
+    </lst>
+    
+    <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
+    <lst name="spellchecker">
+      <str name="name">wordbreak</str>
+      <str name="classname">solr.WordBreakSolrSpellChecker</str>      
+      <str name="field">name</str>
+      <str name="combineWords">true</str>
+      <str name="breakWords">true</str>
+      <int name="maxChanges">10</int>
+    </lst>
+
+    <!-- a spellchecker that uses a different distance measure -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">jarowinkler</str>
+         <str name="field">spell</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="distanceMeasure">
+           org.apache.lucene.search.spell.JaroWinklerDistance
+         </str>
+       </lst>
+     -->
+
+    <!-- a spellchecker that use an alternate comparator 
+
+         comparatorClass be one of:
+          1. score (default)
+          2. freq (Frequency first, then score)
+          3. A fully qualified class name
+      -->
+    <!--
+       <lst name="spellchecker">
+         <str name="name">freq</str>
+         <str name="field">lowerfilt</str>
+         <str name="classname">solr.DirectSolrSpellChecker</str>
+         <str name="comparatorClass">freq</str>
+      -->
+
+    <!-- A spellchecker that reads the list of words from a file -->
+    <!--
+       <lst name="spellchecker">
+         <str name="classname">solr.FileBasedSpellChecker</str>
+         <str name="name">file</str>
+         <str name="sourceLocation">spellings.txt</str>
+         <str name="characterEncoding">UTF-8</str>
+         <str name="spellcheckIndexDir">spellcheckerFile</str>
+       </lst>
+      -->
+  </searchComponent>
+
+  <!-- A request handler for demonstrating the spellcheck component.  
+
+       NOTE: This is purely as an example.  The whole purpose of the
+       SpellCheckComponent is to hook it into the request handler that
+       handles your normal user queries so that a separate request is
+       not needed to get suggestions.
+
+       IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
+       NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
+       
+       See http://wiki.apache.org/solr/SpellCheckComponent for details
+       on the request parameters.
+    -->
+  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="df">text</str>
+      <!-- Solr will use suggestions from both the 'default' spellchecker
+           and from the 'wordbreak' spellchecker and combine them.
+           collations (re-written queries) can include a combination of
+           corrections from both spellcheckers -->
+      <str name="spellcheck.dictionary">default</str>
+      <str name="spellcheck.dictionary">wordbreak</str>
+      <str name="spellcheck">on</str>
+      <str name="spellcheck.extendedResults">true</str>       
+      <str name="spellcheck.count">10</str>
+      <str name="spellcheck.alternativeTermCount">5</str>
+      <str name="spellcheck.maxResultsForSuggest">5</str>       
+      <str name="spellcheck.collate">true</str>
+      <str name="spellcheck.collateExtendedResults">true</str>  
+      <str name="spellcheck.maxCollationTries">10</str>
+      <str name="spellcheck.maxCollations">5</str>         
+    </lst>
+    <arr name="last-components">
+      <str>spellcheck</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Term Vector Component
+
+       http://wiki.apache.org/solr/TermVectorComponent
+    -->
+  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
+
+  <!-- A request handler for demonstrating the term vector component
+
+       This is purely as an example.
+
+       In reality you will likely want to add the component to your 
+       already specified request handlers. 
+    -->
+  <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="df">text</str>
+      <bool name="tv">true</bool>
+    </lst>
+    <arr name="last-components">
+      <str>tvComponent</str>
+    </arr>
+  </requestHandler>
+
+  <!-- Clustering Component
+
+       http://wiki.apache.org/solr/ClusteringComponent
+
+       You'll need to set the solr.cluster.enabled system property
+       when running solr to run with clustering enabled:
+
+            java -Dsolr.clustering.enabled=true -jar start.jar
+
+    -->
+  <searchComponent name="clustering"
+                   enable="${solr.clustering.enabled:false}"
+                   class="solr.clustering.ClusteringComponent" >
+    <!-- Declare an engine -->
+    <lst name="engine">
+      <!-- The name, only one can be named "default" -->
+      <str name="name">default</str>
+
+      <!-- Class name of Carrot2 clustering algorithm.
+
+           Currently available algorithms are:
+           
+           * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
+           * org.carrot2.clustering.stc.STCClusteringAlgorithm
+           * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
+           
+           See http://project.carrot2.org/algorithms.html for the
+           algorithm's characteristics.
+        -->
+      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+
+      <!-- Overriding values for Carrot2 default algorithm attributes.
+
+           For a description of all available attributes, see:
+           http://download.carrot2.org/stable/manual/#chapter.components.
+           Use attribute key as name attribute of str elements
+           below. These can be further overridden for individual
+           requests by specifying attribute key as request parameter
+           name and attribute value as parameter value.
+        -->
+      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
+
+      <!-- Location of Carrot2 lexical resources.
+
+           A directory from which to load Carrot2-specific stop words
+           and stop labels. Absolute or relative to Solr config directory.
+           If a specific resource (e.g. stopwords.en) is present in the
+           specified dir, it will completely override the corresponding
+           default one that ships with Carrot2.
+
+           For an overview of Carrot2 lexical resources, see:
+           http://download.carrot2.org/head/manual/#chapter.lexical-resources
+        -->
+      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
+
+      <!-- The language to assume for the documents.
+
+           For a list of allowed values, see:
+           http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+       -->
+      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
+    </lst>
+    <lst name="engine">
+      <str name="name">stc</str>
+      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+    </lst>
+  </searchComponent>
+
+  <!-- A request handler for demonstrating the clustering component
+
+       This is purely as an example.
+
+       In reality you will likely want to add the component to your 
+       already specified request handlers. 
+    -->
+  <requestHandler name="/clustering"
+                  startup="lazy"
+                  enable="${solr.clustering.enabled:false}"
+                  class="solr.SearchHandler">
+    <lst name="defaults">
+      <bool name="clustering">true</bool>
+      <str name="clustering.engine">default</str>
+      <bool name="clustering.results">true</bool>
+      <!-- The title field -->
+      <str name="carrot.title">name</str>
+      <str name="carrot.url">id</str>
+      <!-- The field to cluster on -->
+       <str name="carrot.snippet">features</str>
+       <!-- produce summaries -->
+       <bool name="carrot.produceSummary">true</bool>
+       <!-- the maximum number of labels per cluster -->
+       <!--<int name="carrot.numDescriptions">5</int>-->
+       <!-- produce sub clusters -->
+       <bool name="carrot.outputSubClusters">false</bool>
+       
+       <str name="defType">edismax</str>
+       <str name="qf">
+         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+       </str>
+       <str name="q.alt">*:*</str>
+       <str name="rows">10</str>
+       <str name="fl">*,score</str>
+    </lst>     
+    <arr name="last-components">
+      <str>clustering</str>
+    </arr>
+  </requestHandler>
+  
+  <!-- Terms Component
+
+       http://wiki.apache.org/solr/TermsComponent
+
+       A component to return terms and document frequency of those
+       terms
+    -->
+  <searchComponent name="terms" class="solr.TermsComponent"/>
+
+  <!-- A request handler for demonstrating the terms component -->
+  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
+     <lst name="defaults">
+      <bool name="terms">true</bool>
+    </lst>     
+    <arr name="components">
+      <str>terms</str>
+    </arr>
+  </requestHandler>
+
+
+  <!-- Query Elevation Component
+
+       http://wiki.apache.org/solr/QueryElevationComponent
+
+       a search component that enables you to configure the top
+       results for a given query regardless of the normal lucene
+       scoring.
+    -->
+<!--  <searchComponent name="elevator" class="solr.QueryElevationComponent" >-->
+    <!-- pick a fieldType to analyze queries -->
+    <!--<str name="queryFieldType">string</str>
+    <str name="config-file">elevate.xml</str>
+  </searchComponent>-->
+
+  <!-- A request handler for demonstrating the elevator component -->
+<!--  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+      <str name="df">text</str>
+    </lst>
+    <arr name="last-components">
+      <str>elevator</str>
+    </arr>
+  </requestHandler>-->
+
+  <!-- Highlighting Component
+
+       http://wiki.apache.org/solr/HighlightingParameters
+    -->
+  <searchComponent class="solr.HighlightComponent" name="highlight">
+    <highlighting>
+      <!-- Configure the standard fragmenter -->
+      <!-- This could most likely be commented out in the "default" case -->
+      <fragmenter name="gap" 
+                  default="true"
+                  class="solr.highlight.GapFragmenter">
+        <lst name="defaults">
+          <int name="hl.fragsize">100</int>
+        </lst>
+      </fragmenter>
+
+      <!-- A regular-expression-based fragmenter 
+           (for sentence extraction) 
+        -->
+      <fragmenter name="regex" 
+                  class="solr.highlight.RegexFragmenter">
+        <lst name="defaults">
+          <!-- slightly smaller fragsizes work better because of slop -->
+          <int name="hl.fragsize">70</int>
+          <!-- allow 50% slop on fragment sizes -->
+          <float name="hl.regex.slop">0.5</float>
+          <!-- a basic sentence pattern -->
+          <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
+        </lst>
+      </fragmenter>
+
+      <!-- Configure the standard formatter -->
+      <formatter name="html" 
+                 default="true"
+                 class="solr.highlight.HtmlFormatter">
+        <lst name="defaults">
+          <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+          <str name="hl.simple.post"><![CDATA[</em>]]></str>
+        </lst>
+      </formatter>
+
+      <!-- Configure the standard encoder -->
+      <encoder name="html" 
+               class="solr.highlight.HtmlEncoder" />
+
+      <!-- Configure the standard fragListBuilder -->
+      <fragListBuilder name="simple" 
+                       class="solr.highlight.SimpleFragListBuilder"/>
+      
+      <!-- Configure the single fragListBuilder -->
+      <fragListBuilder name="single" 
+                       class="solr.highlight.SingleFragListBuilder"/>
+      
+      <!-- Configure the weighted fragListBuilder -->
+      <fragListBuilder name="weighted" 
+                       default="true"
+                       class="solr.highlight.WeightedFragListBuilder"/>
+      
+      <!-- default tag FragmentsBuilder -->
+      <fragmentsBuilder name="default" 
+                        default="true"
+                        class="solr.highlight.ScoreOrderFragmentsBuilder">
+        <!-- 
+        <lst name="defaults">
+          <str name="hl.multiValuedSeparatorChar">/</str>
+        </lst>
+        -->
+      </fragmentsBuilder>
+
+      <!-- multi-colored tag FragmentsBuilder -->
+      <fragmentsBuilder name="colored" 
+                        class="solr.highlight.ScoreOrderFragmentsBuilder">
+        <lst name="defaults">
+          <str name="hl.tag.pre"><![CDATA[
+               <b style="background:yellow">,<b style="background:lawgreen">,
+               <b style="background:aquamarine">,<b style="background:magenta">,
+               <b style="background:palegreen">,<b style="background:coral">,
+               <b style="background:wheat">,<b style="background:khaki">,
+               <b style="background:lime">,<b style="background:deepskyblue">]]></str>
+          <str name="hl.tag.post"><![CDATA[</b>]]></str>
+        </lst>
+      </fragmentsBuilder>
+      
+      <boundaryScanner name="default" 
+                       default="true"
+                       class="solr.highlight.SimpleBoundaryScanner">
+        <lst name="defaults">
+          <str name="hl.bs.maxScan">10</str>
+          <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
+        </lst>
+      </boundaryScanner>
+      
+      <boundaryScanner name="breakIterator" 
+                       class="solr.highlight.BreakIteratorBoundaryScanner">
+        <lst name="defaults">
+          <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
+          <str name="hl.bs.type">WORD</str>
+          <!-- language and country are used when constructing Locale object.  -->
+          <!-- And the Locale object will be used when getting instance of BreakIterator -->
+          <str name="hl.bs.language">en</str>
+          <str name="hl.bs.country">US</str>
+        </lst>
+      </boundaryScanner>
+    </highlighting>
+  </searchComponent>
+
+  <!-- Update Processors
+
+       Chains of Update Processor Factories for dealing with Update
+       Requests can be declared, and then used by name in Update
+       Request Processors
+
+       http://wiki.apache.org/solr/UpdateRequestProcessor
+
+    --> 
+  <!-- Deduplication
+
+       An example dedup update processor that creates the "id" field
+       on the fly based on the hash code of some other fields.  This
+       example has overwriteDupes set to false since we are using the
+       id field as the signatureField and Solr will maintain
+       uniqueness based on that anyway.  
+       
+    -->
+  <!--
+     <updateRequestProcessorChain name="dedupe">
+       <processor class="solr.processor.SignatureUpdateProcessorFactory">
+         <bool name="enabled">true</bool>
+         <str name="signatureField">id</str>
+         <bool name="overwriteDupes">false</bool>
+         <str name="fields">name,features,cat</str>
+         <str name="signatureClass">solr.processor.Lookup3Signature</str>
+       </processor>
+       <processor class="solr.LogUpdateProcessorFactory" />
+       <processor class="solr.RunUpdateProcessorFactory" />
+     </updateRequestProcessorChain>
+    -->
+  
+  <!-- Language identification
+
+       This example update chain identifies the language of the incoming
+       documents using the langid contrib. The detected language is
+       written to field language_s. No field name mapping is done.
+       The fields used for detection are text, title, subject and description,
+       making this example suitable for detecting languages form full-text
+       rich documents injected via ExtractingRequestHandler.
+       See more about langId at http://wiki.apache.org/solr/LanguageDetection
+    -->
+    <!--
+     <updateRequestProcessorChain name="langid">
+       <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
+         <str name="langid.fl">text,title,subject,description</str>
+         <str name="langid.langField">language_s</str>
+         <str name="langid.fallback">en</str>
+       </processor>
+       <processor class="solr.LogUpdateProcessorFactory" />
+       <processor class="solr.RunUpdateProcessorFactory" />
+     </updateRequestProcessorChain>
+    -->
+
+  <!-- Script update processor
+
+    This example hooks in an update processor implemented using JavaScript.
+
+    See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
+  -->
+  <!--
+    <updateRequestProcessorChain name="script">
+      <processor class="solr.StatelessScriptUpdateProcessorFactory">
+        <str name="script">update-script.js</str>
+        <lst name="params">
+          <str name="config_param">example config parameter</str>
+        </lst>
+      </processor>
+      <processor class="solr.RunUpdateProcessorFactory" />
+    </updateRequestProcessorChain>
+  -->
+ 
+  <!-- Response Writers
+
+       http://wiki.apache.org/solr/QueryResponseWriter
+
+       Request responses will be written using the writer specified by
+       the 'wt' request parameter matching the name of a registered
+       writer.
+
+       The "default" writer is the default and will be used if 'wt' is
+       not specified in the request.
+    -->
+  <!-- The following response writers are implicitly configured unless
+       overridden...
+    -->
+  <!--
+     <queryResponseWriter name="xml" 
+                          default="true"
+                          class="solr.XMLResponseWriter" />
+     <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
+     <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
+     <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
+     <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
+     <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
+     <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
+    -->
+
+  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
+     <!-- For the purposes of the tutorial, JSON responses are written as
+      plain text so that they are easy to read in *any* browser.
+      If you expect a MIME type of "application/json" just remove this override.
+     -->
+    <str name="content-type">text/plain; charset=UTF-8</str>
+  </queryResponseWriter>
+  
+  <!--
+     Custom response writers can be declared as needed...
+    -->
+    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
+  
+
+  <!-- XSLT response writer transforms the XML output by any xslt file found
+       in Solr's conf/xslt directory.  Changes to xslt files are checked for
+       every xsltCacheLifetimeSeconds.  
+    -->
+  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
+    <int name="xsltCacheLifetimeSeconds">5</int>
+  </queryResponseWriter>
+
+  <!-- Query Parsers
+
+       http://wiki.apache.org/solr/SolrQuerySyntax
+
+       Multiple QParserPlugins can be registered by name, and then
+       used in either the "defType" param for the QueryComponent (used
+       by SearchHandler) or in LocalParams
+    -->
+  <!-- example of registering a query parser -->
+  <!--
+     <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
+    -->
+
+  <!-- Function Parsers
+
+       http://wiki.apache.org/solr/FunctionQuery
+
+       Multiple ValueSourceParsers can be registered by name, and then
+       used as function names when using the "func" QParser.
+    -->
+  <!-- example of registering a custom function parser  -->
+  <!--
+     <valueSourceParser name="myfunc" 
+                        class="com.mycompany.MyValueSourceParser" />
+    -->
+    
+  
+  <!-- Document Transformers
+       http://wiki.apache.org/solr/DocTransformers
+    -->
+  <!--
+     Could be something like:
+     <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
+       <int name="connection">jdbc://....</int>
+     </transformer>
+     
+     To add a constant value to all docs, use:
+     <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
+       <int name="value">5</int>
+     </transformer>
+     
+     If you want the user to still be able to change it with _value:something_ use this:
+     <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
+       <double name="defaultValue">5</double>
+     </transformer>
+
+      If you are using the QueryElevationComponent, you may wish to mark documents that get boosted.  The
+      EditorialMarkerFactory will do exactly that:
+     <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
+    -->
+    
+
+  <!-- Legacy config for the admin interface -->
+  <admin>
+    <defaultQuery>*:*</defaultQuery>
+  </admin>
+
+</config>
author	Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
	Fri, 24 Aug 2012 08:52:10 +0000 (10:52 +0200)
committer	Marcin Koziej <marcin.koziej@nowoczesnapolska.org.pl>
	Fri, 24 Aug 2012 08:52:10 +0000 (10:52 +0200)
apps/catalogue/templates/catalogue/book_searched.html		patch \| blob \| history
apps/opds/views.py		patch \| blob \| history
apps/search/custom.py	[new file with mode: 0644]	patch \| blob
apps/search/highlight.py	[deleted file]	patch \| blob \| history
apps/search/index.py		patch \| blob \| history
apps/search/templatetags/search_tags.py		patch \| blob \| history
apps/search/views.py		patch \| blob \| history
doc/schema.xml	[new file with mode: 0644]	patch \| blob
doc/solrconfig.xml	[new file with mode: 0644]	patch \| blob