pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.4.0 / lucene / contrib / xml-query-parser / LuceneCoreQuery.dtd
diff --git a/lucene-java-3.4.0/lucene/contrib/xml-query-parser/LuceneCoreQuery.dtd b/lucene-java-3.4.0/lucene/contrib/xml-query-parser/LuceneCoreQuery.dtd

deleted file mode 100644 (file)

index fdeaf6f..0000000
--- a/lucene-java-3.4.0/lucene/contrib/xml-query-parser/LuceneCoreQuery.dtd
+++ /dev/null
@@ -1,475 +0,0 @@
-<!--
-       <h3>Background</h3>
-       This DTD describes the XML syntax used to perform advanced searches using the core Lucene search engine. The motivation behind the XML query syntax is:
-       <ol>
-       <li>To open up Lucene functionality to clients other than Java</li>
-       <li>To offer a form of expressing queries that can easily be
-           <ul>
-               <li>Persisted for logging/auditing purposes</li>
-               <li>Changed by editing text query templates (XSLT) without requiring a recompile/redeploy of applications</li>
-               <li>Serialized across networks (without requiring Java bytecode for Query logic deployed on clients)</li>
-           </ul>
-       </li>
-       <li>To provide a shorthand way of expressing query logic which echos the logical tree structure of query objects more closely than reading procedural Java query construction code</li>
-       <li>To bridge the growing gap between Lucene query/filtering functionality and the set of functionality accessible throught the standard Lucene QueryParser syntax</li>
-       <li>To provide a simply extensible syntax that does not require complex parser skills such as knowledge of JavaCC syntax</li>
-       </ol>
-       
-       
-       <h3>Syntax overview</h3>
-       Search syntax consists of two types of elements:
-       <ul>
-       <li><i>Queries</i></li>
-       <li><i>Filters</i></li>
-       </ul>
-
-       <h4>Queries</h4>
-       The root of any XML search must be a <i>Query</i> type element used to select content.
-       Queries typically score matches on documents using a number of different factors in order to provide relevant results first. 
-       One common example of a query tag is the <a href="#UserQuery">UserQuery</a> element which uses the standard 
-       Lucene QueryParser to parse Google-style search syntax provided by end users.
-       
-       <h4>Filters</h4>
-       Unlike Queries, <i>Filters</i> are not used to select or score content - they are simply used to filter <i>Query</i> output (see <a href="#FilteredQuery">FilteredQuery</a> for an example use of query filtering).
-       Because Filters simply offer a yes/no decision for each document in the index their output can be efficiently cached in memory as a <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/BitSet.html">Bitset</a> for
-       subsequent reuse (see <a href="#CachedFilter">CachedFilter</a> tag).
-
-       <h4>Nesting elements</h4>
-       Many of the the elements can nest other elements to produce queries/filters of an arbitrary depth and complexity. 
-       The <a href="#BooleanQuery">BooleanQuery</a> element is one such example which provides a means for combining other queries (including other BooleanQueries) using Boolean 
-       logic to determine mandatory or optional elements. 
-
-       
-       <h3>Advanced topics</h3>        
-       <h4>Advanced positional testing - span queries</h4>
-       The <i>SpanQuery</i> class of queries allow for complex positional tests which not only look for certain combinations of words but in particular 
-       positions in relation to each other and the documents containing them.
-       
-       
-       CoreParser.java is the Java class that encapsulates this parser behaviour.
-       
-       
-       @title Core Lucene      
--->
-
-<!-- @hidden Define core types of XML elements -->
-<!ENTITY % coreSpanQueries "SpanOr|SpanNear|SpanOrTerms|SpanFirst|SpanNot|SpanTerm|BoostingTermQuery" >
-<!ENTITY % coreQueries "BooleanQuery|UserQuery|FilteredQuery|TermQuery|TermsQuery|MatchAllDocsQuery|ConstantScoreQuery|BoostingTermQuery|NumericRangeQuery" >
-<!ENTITY % coreFilters "RangeFilter|NumericRangeFilter|CachedFilter" >
-
-<!-- @hidden Allow for extensions -->
-<!ENTITY % extendedSpanQueries1 " " >
-<!ENTITY % extendedQueries1 " " >
-<!ENTITY % extendedFilters1 " " >
-
-<!ENTITY % spanQueries "%coreSpanQueries;%extendedSpanQueries1;" >
-<!ENTITY % queries "%coreQueries;|%spanQueries;%extendedQueries1;" >
-
-
-<!ENTITY % filters "%coreFilters;%extendedFilters1;" >
-
-<!--
-       BooleanQuerys implement Boolean logic which controls how multiple Clauses should be interpreted.
-       Some clauses may represent optional Query criteria while others represent mandatory criteria.   
-       @example 
-               <em>Find articles about banks, preferably talking about mergers but nothing to do with "sumitomo"</em>
-               %                 
-            <BooleanQuery fieldName="contents">
-                    <Clause occurs="should">
-                             <TermQuery>merger</TermQuery>
-                    </Clause>
-                    <Clause occurs="mustnot">
-                             <TermQuery>sumitomo</TermQuery>
-                    </Clause>
-                    <Clause occurs="must">
-                             <TermQuery>bank</TermQuery>
-                    </Clause>
-            </BooleanQuery>
-
-                %
--->    
-<!ELEMENT BooleanQuery (Clause)+>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST BooleanQuery boost CDATA "1.0">
-<!-- fieldName can optionally be defined here as a default attribute used by all child elements -->    
-<!ATTLIST BooleanQuery fieldName CDATA #IMPLIED>
-<!-- The "Coordination factor" rewards documents that contain more of the optional clauses in this list. This flag can be used to turn off this factor. -->
-<!ATTLIST BooleanQuery disableCoord (true | false) "false">
-<!-- The minimum number of optional clauses that should be present in any one document before it is considered to be a match. -->
-<!ATTLIST BooleanQuery minimumNumberShouldMatch CDATA "0">
-
-<!-- NOTE: "Clause" tag has 2 modes of use - inside <BooleanQuery> in which case only "query" types can be
-       child elements - while in a <BooleanFilter> clause only "filter" types can be contained.
-       @hidden TODO: Change BooleanFilterBuilder and BooleanQueryBuilder to auto-wrap choice of query or filters. This type of
-             code already exists in CachedFilter so could be reused.
--->    
-<!ELEMENT Clause (%queries;|%filters;)>
-<!-- Controls if the clause is optional (should), mandatory (must) or unacceptable (mustNot) -->
-<!ATTLIST Clause occurs (should | must | mustnot) "should">
-
-
-<!-- Caches any nested query or filter in an LRU (Least recently used) Cache. Cached queries, like filters, are turned into
-       Bitsets at a cost of 1 bit per document in the index. The memory cost of a cached query/filter is therefore numberOfDocsinIndex/8 bytes.
-       Queries that are cached as filters obviously retain none of the scoring information associated with results - they retain just
-       a Boolean yes/no record of which documents matched. 
-       @example 
-               <em>Search for documents about banks from the last 10 years - caching the commonly-used "last 10 year" filter as a BitSet in 
-       RAM to eliminate the cost of building this filter from disk for every query</em>
-               %                 
-            <FilteredQuery>
-               <Query>
-                  <UserQuery>bank</UserQuery>
-               </Query>        
-               <Filter>
-                  <CachedFilter>
-                     <RangeFilter fieldName="date" lowerTerm="19970101" upperTerm="20070101"/>
-                  </CachedFilter>
-               </Filter>       
-            </FilteredQuery>
-                %
-       
-       -->
-<!ELEMENT CachedFilter (%queries;|%filters;)>
-
-
-
-<!--
-Passes content directly through to the standard LuceneQuery parser see "Lucene Query Syntax"
-       @example 
-               <em>Search for documents about John Smith or John Doe using standard LuceneQuerySyntax</em>
-               %                 
-               <UserQuery>"John Smith" OR "John Doe"</UserQuery>
-                %
-               
--->
-<!ELEMENT UserQuery (#PCDATA)>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST UserQuery boost CDATA "1.0">
-<!-- fieldName can optionally be defined here to change the default field used in the QueryParser -->  
-<!ATTLIST UserQuery fieldName CDATA #IMPLIED>
-
-<!-- A query which is used to match all documents. This has a couple of uses: 
-       <ol>
-       <li> as a Clause in a BooleanQuery who's only other clause
-       is a "mustNot" match (Lucene requires at least one positive clause) and..</li>
-       <li> in a FilteredQuery where a Filter tag is effectively being 
-       used to select content rather than it's usual role of filtering the results of a query.</li>
-       </ol>
-       
-       @example 
-               <em>Effectively use a Filter as a query </em>
-               %                 
-               <FilteredQuery>
-                 <Query>
-                    <MatchAllDocsQuery/>
-                 </Query>
-                 <Filter>
-                     <RangeFilter fieldName="date" lowerTerm="19870409" upperTerm="19870412"/>
-                 </Filter>     
-               </FilteredQuery>                 
-              %
-       
--->
-<!ELEMENT MatchAllDocsQuery EMPTY>
-
-<!-- a single term query - no analysis is done of the child text
-       @example 
-               <em>Match on a primary key</em>
-               %                 
-               <TermQuery fieldName="primaryKey">13424</TermQuery>
-              %        
--->    
-<!ELEMENT TermQuery (#PCDATA)>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST TermQuery boost CDATA "1.0">
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->    
-<!ATTLIST TermQuery fieldName CDATA #IMPLIED>
-
-
-<!--
-  A boosted term query - no analysis is done of the child text. Also a span member.
-
-  (Text below is copied from the javadocs of BoostingTermQuery)
-   
-  The BoostingTermQuery is very similar to the {@link org.apache.lucene.search.spans.SpanTermQuery} except
-  that it factors in the value of the payload located at each of the positions where the
-  {@link org.apache.lucene.index.Term} occurs.
-
-  In order to take advantage of this, you must override {@link org.apache.lucene.search.Similarity#scorePayload(String, byte[],int,int)}
-  which returns 1 by default.
-
-  Payload scores are averaged across term occurrences in the document.
-
-  @see org.apache.lucene.search.Similarity#scorePayload(String, byte[], int, int)
--->
-<!ELEMENT BoostingTermQuery (#PCDATA)>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST TermQuery boost CDATA "1.0">
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->
-<!ATTLIST TermQuery fieldName CDATA #IMPLIED>
-
-
-
-<!-- 
-       The equivalent of a BooleanQuery with multiple optional TermQuery clauses.
-       Child text is analyzed using a field-specific choice of Analyzer to produce a set of terms that are ORed together in Boolean logic.
-       Unlike UserQuery element, this does not parse any special characters to control fuzzy/phrase/boolean logic and as such is incapable
-       of producing a Query parse error given any user input
-       @example 
-               <em>Match on text from a database description (which may contain characters that 
-       are illegal characters in the standard Lucene Query syntax used in the UserQuery tag</em>
-               %                 
-               <TermsQuery fieldName="description">Smith & Sons (Ltd) : incorporated 1982</TermsQuery>
-              %        
--->    
-<!ELEMENT TermsQuery (#PCDATA)>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST TermsQuery boost CDATA "1.0">
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->    
-<!ATTLIST TermsQuery fieldName CDATA #IMPLIED>
-<!-- The "Coordination factor" rewards documents that contain more of the terms in this list. This flag can be used to turn off this factor. -->
-<!ATTLIST TermsQuery disableCoord (true | false) "false">
-<!-- The minimum number of terms that should be present in any one document before it is considered to be a match. -->
-<!ATTLIST TermsQuery minimumNumberShouldMatch CDATA "0">
-
-
-<!-- 
-       Runs a Query and filters results to only those query matches that also match the Filter element.        
-       @example 
-               <em>Find all documents about Lucene that have a status of "published"</em>
-               %                 
-               <FilteredQuery>
-                 <Query>
-                    <UserQuery>Lucene</UserQuery>
-                 </Query>
-                 <Filter>
-                     <TermsFilter fieldName="status">published</TermsFilter>
-                 </Filter>     
-               </FilteredQuery>                 
-              %        
--->    
-<!ELEMENT FilteredQuery (Query,Filter)>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST FilteredQuery boost CDATA "1.0">
-<!-- Used to identify a nested Query element inside another container element. NOT a top-level query tag  -->
-<!ELEMENT Query (%queries;)>
-<!-- The choice of Filter that MUST also be matched  -->
-<!ELEMENT Filter (%filters;)>
-
-<!--
-       Filter used to limit query results to documents matching a range of field values
-       @example 
-               <em>Search for documents about banks from the last 10 years</em>
-               %                 
-            <FilteredQuery>
-               <Query>
-                  <UserQuery>bank</UserQuery>
-               </Query>        
-               <Filter>
-                     <RangeFilter fieldName="date" lowerTerm="19970101" upperTerm="20070101"/>
-               </Filter>       
-            </FilteredQuery>
-                %
-       -->
-<!ELEMENT RangeFilter EMPTY>
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->    
-<!ATTLIST RangeFilter fieldName CDATA #IMPLIED>
-<!-- The lower-most term value for this field (must be <= upperTerm) -->
-<!ATTLIST RangeFilter lowerTerm CDATA #REQUIRED>
-<!-- The upper-most term value for this field (must be >= lowerTerm) -->
-<!ATTLIST RangeFilter upperTerm CDATA #REQUIRED>
-<!-- Controls if the lowerTerm in the range is part of the allowed set of values -->
-<!ATTLIST RangeFilter includeLower (true | false) "true">
-<!-- Controls if the upperTerm in the range is part of the allowed set of values -->
-<!ATTLIST RangeFilter includeUpper (true | false) "true">
-
-<!--
-       A Query that matches numeric values within a specified range.
-       @example 
-               <em>Search for documents about people who are aged 20-25</em>
-               %                 
-            <NumericRangeQuery fieldName="age" lowerTerm="20" upperTerm="25" />
-                %
-       -->
-<!ELEMENT NumericRangeQuery EMPTY>
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->    
-<!ATTLIST NumericRangeQuery fieldName CDATA #IMPLIED>
-<!-- The lower-most term value for this field (must be <= upperTerm and a valid native java numeric type) -->
-<!ATTLIST NumericRangeQuery lowerTerm CDATA #REQUIRED>
-<!-- The upper-most term value for this field (must be >= lowerTerm and a valid native java numeric type) -->
-<!ATTLIST NumericRangeQuery upperTerm CDATA #REQUIRED>
-<!-- The numeric type of this field -->
-<!ATTLIST NumericRangeQuery type (int | long | float | double) "int">
-<!-- Controls if the lowerTerm in the range is part of the allowed set of values -->
-<!ATTLIST NumericRangeQuery includeLower (true | false) "true">
-<!-- Controls if the upperTerm in the range is part of the allowed set of values -->
-<!ATTLIST NumericRangeQuery includeUpper (true | false) "true">
-<!-- Lower step values mean more precisions and so more terms in index (and index gets larger). This value must be an integer -->
-<!ATTLIST NumericRangeQuery precisionStep CDATA "4">
-
-<!--
-       A Filter that only accepts numeric values within a specified range
-       @example 
-               <em>Search for documents about people who are aged 20-25</em>
-               %                 
-            <FilteredQuery>
-               <Query>
-                  <UserQuery>person</UserQuery>
-               </Query>        
-               <Filter>
-                     <NumericRangeFilter fieldName="age" lowerTerm="20" upperTerm="25"/>
-               </Filter>       
-            </FilteredQuery>
-                %
-       -->
-<!ELEMENT NumericRangeFilter EMPTY>
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->    
-<!ATTLIST NumericRangeFilter fieldName CDATA #IMPLIED>
-<!-- The lower-most term value for this field (must be <= upperTerm and a valid native java numeric type) -->
-<!ATTLIST NumericRangeFilter lowerTerm CDATA #REQUIRED>
-<!-- The upper-most term value for this field (must be >= lowerTerm and a valid native java numeric type) -->
-<!ATTLIST NumericRangeFilter upperTerm CDATA #REQUIRED>
-<!-- The numeric type of this field -->
-<!ATTLIST NumericRangeFilter type (int | long | float | double) "int">
-<!-- Controls if the lowerTerm in the range is part of the allowed set of values -->
-<!ATTLIST NumericRangeFilter includeLower (true | false) "true">
-<!-- Controls if the upperTerm in the range is part of the allowed set of values -->
-<!ATTLIST NumericRangeFilter includeUpper (true | false) "true">
-<!-- Lower step values mean more precisions and so more terms in index (and index gets larger). This value must be an integer -->
-<!ATTLIST NumericRangeFilter precisionStep CDATA "4">
-
-<!-- A single term used in a SpanQuery. These clauses are the building blocks for more complex "span" queries which test word proximity
-       @example <em>Find documents using terms close to each other about mining and accidents</em>
-             %
-             <SpanNear slop="8" inOrder="false" fieldName="text">              
-                       <SpanOr>
-                               <SpanTerm>killed</SpanTerm>
-                               <SpanTerm>died</SpanTerm>
-                               <SpanTerm>dead</SpanTerm>
-                       </SpanOr>
-                       <SpanOr>
-                               <SpanTerm>miner</SpanTerm>
-                               <SpanTerm>mining</SpanTerm>
-                               <SpanTerm>miners</SpanTerm>
-                       </SpanOr>
-             </SpanNear>
-             %         
-       -->
-<!ELEMENT SpanTerm (#PCDATA)>
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->    
-<!ATTLIST SpanTerm fieldName CDATA #REQUIRED>
-
-<!-- A field-specific analyzer is used here to parse the child text provided in this tag. The SpanTerms produced are ORed in terms of Boolean logic 
-       @example <em>Use SpanOrTerms as a more convenient/succinct way of expressing multiple choices of SpanTerms. This example looks for reports 
-       using words describing a fatality near to references to miners</em>
-             %
-             <SpanNear slop="8" inOrder="false" fieldName="text">              
-                       <SpanOrTerms>killed died death dead deaths</SpanOrTerms>
-                       <SpanOrTerms>miner mining miners</SpanOrTerms>
-             </SpanNear>
-             %         
-       -->
-<!ELEMENT SpanOrTerms (#PCDATA)>
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute -->    
-<!ATTLIST SpanOrTerms fieldName CDATA #REQUIRED>
-
-<!-- Takes any number of child queries from the Span family 
-       @example <em>Find documents using terms close to each other about mining and accidents</em>
-             %
-             <SpanNear slop="8" inOrder="false" fieldName="text">              
-                       <SpanOr>
-                               <SpanTerm>killed</SpanTerm>
-                               <SpanTerm>died</SpanTerm>
-                               <SpanTerm>dead</SpanTerm>
-                       </SpanOr>
-                       <SpanOr>
-                               <SpanTerm>miner</SpanTerm>
-                               <SpanTerm>mining</SpanTerm>
-                               <SpanTerm>miners</SpanTerm>
-                       </SpanOr>
-             </SpanNear>
-             % 
-       
-       -->
-<!ELEMENT SpanOr (%spanQueries;)* >
-
-<!-- Takes any number of child queries from the Span family and tests for proximity
-       @hidden TODO SpanNear missing "boost attr (could add to SpanBuilderBase)
-       -->
-<!ELEMENT SpanNear (%spanQueries;)* >
-<!-- defines the maximum distance between Span elements where distance is expressed as word number, not byte offset 
-       @example <em>Find documents using terms within 8 words of each other talking about mining and accidents</em>
-             %
-             <SpanNear slop="8" inOrder="false" fieldName="text">              
-                       <SpanOr>
-                               <SpanTerm>killed</SpanTerm>
-                               <SpanTerm>died</SpanTerm>
-                               <SpanTerm>dead</SpanTerm>
-                       </SpanOr>
-                       <SpanOr>
-                               <SpanTerm>miner</SpanTerm>
-                               <SpanTerm>mining</SpanTerm>
-                               <SpanTerm>miners</SpanTerm>
-                       </SpanOr>
-             </SpanNear>
-             % 
-       -->
-<!ATTLIST SpanNear slop CDATA #REQUIRED>
-<!-- Controls if matching terms  have to appear in the order listed or can be reversed -->
-<!ATTLIST SpanNear inOrder (true | false) "true">
-
-<!-- Looks for a SpanQuery match occuring near the beginning of a document
-       
-       @example 
-               <em>Find letters where the first 50 words talk about a resignation:</em>
-               %                 
-                <SpanFirst end="50">
-                      <SpanOrTerms fieldName="text">resigning resign leave</SpanOrTerms>
-                </SpanFirst>
-                %
-       
-        --> 
-<!ELEMENT SpanFirst (%spanQueries;) >
-<!-- Controls the end of the region considered in a document's field (expressed in word number, not byte offset) --> 
-<!ATTLIST SpanFirst end CDATA #REQUIRED>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST SpanFirst boost CDATA "1.0">
-
-<!-- Finds documents matching a SpanQuery but not if matching another SpanQuery 
-       @example <em>Find documents talking about social services but not containing the word "public"</em>
-             %
-          <SpanNot fieldName="text">
-             <Include>
-                <SpanNear slop="2" inOrder="true">             
-                     <SpanTerm>social</SpanTerm>
-                     <SpanTerm>services</SpanTerm>
-                </SpanNear>                            
-             </Include>
-             <Exclude>
-                <SpanTerm>public</SpanTerm>
-             </Exclude>
-          </SpanNot>
-             % 
-       
-       -->
-<!ELEMENT SpanNot (Include,Exclude) >
-<!-- The SpanQuery to find -->
-<!ELEMENT Include (%spanQueries;) >
-<!-- The SpanQuery to be avoided -->
-<!ELEMENT Exclude (%spanQueries;) >
-
-
-<!-- a utility tag to wrap any filter as a query 
-       @example <em> Find all documents from the last 10 years </em>
-       %
-     <ConstantScoreQuery>
-           <RangeFilter fieldName="date" lowerTerm="19970101" upperTerm="20070101"/>
-     </ConstantScoreQuery>     
-       %
-       -->
-<!ELEMENT ConstantScoreQuery (%filters;)* >
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST ConstantScoreQuery boost CDATA "1.0">
-
-
-