X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java diff --git a/lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java b/lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java deleted file mode 100644 index 6359faf..0000000 --- a/lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java +++ /dev/null @@ -1,402 +0,0 @@ -package org.apache.lucene.queryParser.complexPhrase; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.queryParser.ParseException; -import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanNotQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.Version; - -/** - * QueryParser which permits complex phrase query syntax eg "(john jon - * jonathan~) peters*". - *

- * Performs potentially multiple passes over Query text to parse any nested - * logic in PhraseQueries. - First pass takes any PhraseQuery content between - * quotes and stores for subsequent pass. All other query content is parsed as - * normal - Second pass parses any stored PhraseQuery content, checking all - * embedded clauses are referring to the same field and therefore can be - * rewritten as Span queries. All PhraseQuery clauses are expressed as - * ComplexPhraseQuery objects - *

- *

- * This could arguably be done in one pass using a new QueryParser but here I am - * working within the constraints of the existing parser as a base class. This - * currently simply feeds all phrase content through an analyzer to select - * phrase terms - any "special" syntax such as * ~ * etc are not given special - * status - *

- * - */ -public class ComplexPhraseQueryParser extends QueryParser { - private ArrayList complexPhrases = null; - - private boolean isPass2ResolvingPhrases; - - private ComplexPhraseQuery currentPhraseQuery = null; - - public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) { - super(matchVersion, f, a); - } - - @Override - protected Query getFieldQuery(String field, String queryText, int slop) { - ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop); - complexPhrases.add(cpq); // add to list of phrases to be parsed once - // we - // are through with this pass - return cpq; - } - - @Override - public Query parse(String query) throws ParseException { - if (isPass2ResolvingPhrases) { - MultiTermQuery.RewriteMethod oldMethod = getMultiTermRewriteMethod(); - try { - // Temporarily force BooleanQuery rewrite so that Parser will - // generate visible - // collection of terms which we can convert into SpanQueries. - // ConstantScoreRewrite mode produces an - // opaque ConstantScoreQuery object which cannot be interrogated for - // terms in the same way a BooleanQuery can. - // QueryParser is not guaranteed threadsafe anyway so this temporary - // state change should not - // present an issue - setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - return super.parse(query); - } finally { - setMultiTermRewriteMethod(oldMethod); - } - } - - // First pass - parse the top-level query recording any PhraseQuerys - // which will need to be resolved - complexPhrases = new ArrayList(); - Query q = super.parse(query); - - // Perform second pass, using this QueryParser to parse any nested - // PhraseQueries with different - // set of syntax restrictions (i.e. all fields must be same) - isPass2ResolvingPhrases = true; - try { - for (Iterator iterator = complexPhrases.iterator(); iterator.hasNext();) { - currentPhraseQuery = iterator.next(); - // in each phrase, now parse the contents between quotes as a - // separate parse operation - currentPhraseQuery.parsePhraseElements(this); - } - } finally { - isPass2ResolvingPhrases = false; - } - return q; - } - - // There is No "getTermQuery throws ParseException" method to override so - // unfortunately need - // to throw a runtime exception here if a term for another field is embedded - // in phrase query - @Override - protected Query newTermQuery(Term term) { - if (isPass2ResolvingPhrases) { - try { - checkPhraseClauseIsForSameField(term.field()); - } catch (ParseException pe) { - throw new RuntimeException("Error parsing complex phrase", pe); - } - } - return super.newTermQuery(term); - } - - // Helper method used to report on any clauses that appear in query syntax - private void checkPhraseClauseIsForSameField(String field) - throws ParseException { - if (!field.equals(currentPhraseQuery.field)) { - throw new ParseException("Cannot have clause for field \"" + field - + "\" nested in phrase " + " for field \"" + currentPhraseQuery.field - + "\""); - } - } - - @Override - protected Query getWildcardQuery(String field, String termStr) - throws ParseException { - if (isPass2ResolvingPhrases) { - checkPhraseClauseIsForSameField(field); - } - return super.getWildcardQuery(field, termStr); - } - - @Override - protected Query getRangeQuery(String field, String part1, String part2, - boolean inclusive) throws ParseException { - if (isPass2ResolvingPhrases) { - checkPhraseClauseIsForSameField(field); - } - return super.getRangeQuery(field, part1, part2, inclusive); - } - - @Override - protected Query newRangeQuery(String field, String part1, String part2, - boolean inclusive) { - if (isPass2ResolvingPhrases) { - // Must use old-style RangeQuery in order to produce a BooleanQuery - // that can be turned into SpanOr clause - TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive, - getRangeCollator()); - rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - return rangeQuery; - } - return super.newRangeQuery(field, part1, part2, inclusive); - } - - @Override - protected Query getFuzzyQuery(String field, String termStr, - float minSimilarity) throws ParseException { - if (isPass2ResolvingPhrases) { - checkPhraseClauseIsForSameField(field); - } - return super.getFuzzyQuery(field, termStr, minSimilarity); - } - - /* - * Used to handle the query content in between quotes and produced Span-based - * interpretations of the clauses. - */ - static class ComplexPhraseQuery extends Query { - - String field; - - String phrasedQueryStringContents; - - int slopFactor; - - private Query contents; - - public ComplexPhraseQuery(String field, String phrasedQueryStringContents, - int slopFactor) { - super(); - this.field = field; - this.phrasedQueryStringContents = phrasedQueryStringContents; - this.slopFactor = slopFactor; - } - - // Called by ComplexPhraseQueryParser for each phrase after the main - // parse - // thread is through - protected void parsePhraseElements(QueryParser qp) throws ParseException { - // TODO ensure that field-sensitivity is preserved ie the query - // string below is parsed as - // field+":("+phrasedQueryStringContents+")" - // but this will need code in rewrite to unwrap the first layer of - // boolean query - contents = qp.parse(phrasedQueryStringContents); - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - // ArrayList spanClauses = new ArrayList(); - if (contents instanceof TermQuery) { - return contents; - } - // Build a sequence of Span clauses arranged in a SpanNear - child - // clauses can be complex - // Booleans e.g. nots and ors etc - int numNegatives = 0; - if (!(contents instanceof BooleanQuery)) { - throw new IllegalArgumentException("Unknown query type \"" - + contents.getClass().getName() - + "\" found in phrase query string \"" + phrasedQueryStringContents - + "\""); - } - BooleanQuery bq = (BooleanQuery) contents; - BooleanClause[] bclauses = bq.getClauses(); - SpanQuery[] allSpanClauses = new SpanQuery[bclauses.length]; - // For all clauses e.g. one* two~ - for (int i = 0; i < bclauses.length; i++) { - // HashSet bclauseterms=new HashSet(); - Query qc = bclauses[i].getQuery(); - // Rewrite this clause e.g one* becomes (one OR onerous) - qc = qc.rewrite(reader); - if (bclauses[i].getOccur().equals(BooleanClause.Occur.MUST_NOT)) { - numNegatives++; - } - - if (qc instanceof BooleanQuery) { - ArrayList sc = new ArrayList(); - addComplexPhraseClause(sc, (BooleanQuery) qc); - if (sc.size() > 0) { - allSpanClauses[i] = sc.get(0); - } else { - // Insert fake term e.g. phrase query was for "Fred Smithe*" and - // there were no "Smithe*" terms - need to - // prevent match on just "Fred". - allSpanClauses[i] = new SpanTermQuery(new Term(field, - "Dummy clause because no terms found - must match nothing")); - } - } else { - if (qc instanceof TermQuery) { - TermQuery tq = (TermQuery) qc; - allSpanClauses[i] = new SpanTermQuery(tq.getTerm()); - } else { - throw new IllegalArgumentException("Unknown query type \"" - + qc.getClass().getName() - + "\" found in phrase query string \"" - + phrasedQueryStringContents + "\""); - } - - } - } - if (numNegatives == 0) { - // The simple case - no negative elements in phrase - return new SpanNearQuery(allSpanClauses, slopFactor, true); - } - // Complex case - we have mixed positives and negatives in the - // sequence. - // Need to return a SpanNotQuery - ArrayList positiveClauses = new ArrayList(); - for (int j = 0; j < allSpanClauses.length; j++) { - if (!bclauses[j].getOccur().equals(BooleanClause.Occur.MUST_NOT)) { - positiveClauses.add(allSpanClauses[j]); - } - } - - SpanQuery[] includeClauses = positiveClauses - .toArray(new SpanQuery[positiveClauses.size()]); - - SpanQuery include = null; - if (includeClauses.length == 1) { - include = includeClauses[0]; // only one positive clause - } else { - // need to increase slop factor based on gaps introduced by - // negatives - include = new SpanNearQuery(includeClauses, slopFactor + numNegatives, - true); - } - // Use sequence of positive and negative values as the exclude. - SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, slopFactor, - true); - SpanNotQuery snot = new SpanNotQuery(include, exclude); - return snot; - } - - private void addComplexPhraseClause(List spanClauses, BooleanQuery qc) { - ArrayList ors = new ArrayList(); - ArrayList nots = new ArrayList(); - BooleanClause[] bclauses = qc.getClauses(); - - // For all clauses e.g. one* two~ - for (int i = 0; i < bclauses.length; i++) { - Query childQuery = bclauses[i].getQuery(); - - // select the list to which we will add these options - ArrayList chosenList = ors; - if (bclauses[i].getOccur() == BooleanClause.Occur.MUST_NOT) { - chosenList = nots; - } - - if (childQuery instanceof TermQuery) { - TermQuery tq = (TermQuery) childQuery; - SpanTermQuery stq = new SpanTermQuery(tq.getTerm()); - stq.setBoost(tq.getBoost()); - chosenList.add(stq); - } else if (childQuery instanceof BooleanQuery) { - BooleanQuery cbq = (BooleanQuery) childQuery; - addComplexPhraseClause(chosenList, cbq); - } else { - // TODO alternatively could call extract terms here? - throw new IllegalArgumentException("Unknown query type:" - + childQuery.getClass().getName()); - } - } - if (ors.size() == 0) { - return; - } - SpanOrQuery soq = new SpanOrQuery(ors - .toArray(new SpanQuery[ors.size()])); - if (nots.size() == 0) { - spanClauses.add(soq); - } else { - SpanOrQuery snqs = new SpanOrQuery(nots - .toArray(new SpanQuery[nots.size()])); - SpanNotQuery snq = new SpanNotQuery(soq, snqs); - spanClauses.add(snq); - } - } - - @Override - public String toString(String field) { - return "\"" + phrasedQueryStringContents + "\""; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((field == null) ? 0 : field.hashCode()); - result = prime - * result - + ((phrasedQueryStringContents == null) ? 0 - : phrasedQueryStringContents.hashCode()); - result = prime * result + slopFactor; - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - ComplexPhraseQuery other = (ComplexPhraseQuery) obj; - if (field == null) { - if (other.field != null) - return false; - } else if (!field.equals(other.field)) - return false; - if (phrasedQueryStringContents == null) { - if (other.phrasedQueryStringContents != null) - return false; - } else if (!phrasedQueryStringContents - .equals(other.phrasedQueryStringContents)) - return false; - if (slopFactor != other.slopFactor) - return false; - return true; - } - } -}