samples/LuceneInAction/lia/extsearch/queryparser/CustomQueryParser.py

   1 # ====================================================================
   2 #   Licensed under the Apache License, Version 2.0 (the "License");
   3 #   you may not use this file except in compliance with the License.
   4 #   You may obtain a copy of the License at
   5 #
   6 #       http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 #   Unless required by applicable law or agreed to in writing, software
   9 #   distributed under the License is distributed on an "AS IS" BASIS,
  10 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 #   See the License for the specific language governing permissions and
  12 #   limitations under the License.
  13 # ====================================================================
  14
  15 from lucene import \
  16     PythonQueryParser, PythonMultiFieldQueryParser, \
  17     PhraseQuery, TermRangeQuery, SpanNearQuery, SpanTermQuery, \
  18     Term, PhraseQuery, Version
  19
  20 from lia.extsearch.queryparser.NumberUtils import NumberUtils
  21
  22 #
  23 # A QueryParser extension
  24 #
  25
  26 class CustomQueryParser(PythonQueryParser):
  27
  28     def __init__(self, field, analyzer):
  29         super(CustomQueryParser, self).__init__(Version.LUCENE_CURRENT, field, analyzer)
  30
  31     def getFuzzyQuery(self, field, termText, minSimilarity):
  32         raise AssertionError, "Fuzzy queries not allowed"
  33
  34     def getWildcardQuery(self, field, termText):
  35         raise AssertionError, "Wildcard queries not allowed"
  36
  37     #
  38     # Special handling for the "id" field, pads each part
  39     # to match how it was indexed.
  40     #
  41     def getRangeQuery(self, field, part1, part2, inclusive):
  42
  43         if field == "id":
  44
  45             num1 = int(part1)
  46             num2 = int(part2)
  47
  48             return TermRangeQuery(field,
  49                                   NumberUtils.pad(num1),
  50                                   NumberUtils.pad(num2),
  51                                   inclusive, True)
  52
  53         if field == "special":
  54             print part1, "->", part2
  55
  56             return TermRangeQuery("field", part1, part2, inclusive, True)
  57
  58         return super(CustomQueryParser,
  59                      self).getRangeQuery(field, part1, part2, inclusive)
  60
  61
  62     def getFieldQuery_quoted(self, field, queryText, quoted):
  63
  64         return super(CustomQueryParser,
  65                      self).getFieldQuery_quoted_super(field, queryText, quoted)
  66
  67     #
  68     # Replace PhraseQuery with SpanNearQuery to force in-order
  69     # phrase matching rather than reverse.
  70     #
  71     def getFieldQuery_slop(self, field, queryText, slop):
  72
  73         orig = super(CustomQueryParser,
  74                      self).getFieldQuery_slop_super(field, queryText, slop)
  75
  76         if not PhraseQuery.instance_(orig):
  77             return orig
  78
  79         pq = PhraseQuery.cast_(orig)
  80         clauses = [SpanTermQuery(term) for term in pq.getTerms()]
  81
  82         return SpanNearQuery(clauses, slop, True);
  83
  84
  85
  86 class MultiFieldCustomQueryParser(PythonMultiFieldQueryParser):
  87
  88     def __init__(self, fields, analyzer):
  89         super(MultiFieldCustomQueryParser, self).__init__(Version.LUCENE_CURRENT, fields, analyzer)
  90
  91     def getFuzzyQuery(self, super, field, termText, minSimilarity):
  92         raise AssertionError, "Fuzzy queries not allowed"
  93
  94     def getWildcardQuery(self, super, field, termText):
  95         raise AssertionError, "Wildcard queries not allowed"
  96
  97     #
  98     # Special handling for the "id" field, pads each part
  99     # to match how it was indexed.
 100     #
 101     def getRangeQuery(self, field, part1, part2, inclusive):
 102
 103         if field == "id":
 104
 105             num1 = int(part1)
 106             num2 = int(part2)
 107
 108             return TermRangeQuery(field,
 109                                   NumberUtils.pad(num1),
 110                                   NumberUtils.pad(num2),
 111                                   inclusive, True)
 112
 113         if field == "special":
 114             print part1, "->", part2
 115
 116             return TermRangeQuery("field", part1, part2, inclusive, True)
 117
 118         return super(CustomQueryParser,
 119                      self).getRangeQuery(field, part1, part2, inclusive)
 120
 121     def getFieldQuery_quoted(self, field, queryText, quoted):
 122
 123         return super(CustomQueryParser,
 124                      self).getFieldQuery_quoted_super(field, queryText, quoted)
 125
 126     #
 127     # Replace PhraseQuery with SpanNearQuery to force in-order
 128     # phrase matching rather than reverse.
 129     #
 130     def getFieldQuery_slop(self, field, queryText, slop):
 131
 132         # let QueryParser's implementation do the analysis
 133         orig = super(CustomQueryParser,
 134                      self).getFieldQuery_slop_super(field, queryText, slop)
 135
 136         if not PhraseQuery.instance_(orig):
 137             return orig
 138
 139         pq = PhraseQuery.cast_(orig)
 140         clauses = [SpanTermQuery(term) for term in pq.getTerms()]
 141
 142         return SpanNearQuery(clauses, slop, True);