Contrib Lucene

X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/xml-query-parser/docs/LuceneContribQuery.dtd.org.html diff --git a/lucene-java-3.4.0/lucene/contrib/xml-query-parser/docs/LuceneContribQuery.dtd.org.html b/lucene-java-3.4.0/lucene/contrib/xml-query-parser/docs/LuceneContribQuery.dtd.org.html deleted file mode 100644 index 307747d..0000000 --- a/lucene-java-3.4.0/lucene/contrib/xml-query-parser/docs/LuceneContribQuery.dtd.org.html +++ /dev/null @@ -1,237 +0,0 @@ - - - - -Contrib Lucene - -
LuceneContribQuery.dtd: Elements - Entities - Source | Intro - Index
FRAMES / NO FRAMES
<!--    
-    This DTD builds on the <a href="LuceneCoreQuery.dtd.html">core Lucene XML syntax</a> and adds support for features found in the "contrib" section of the Lucene project.
-    
-    CorePlusExtensionsParser.java is the Java class that encapsulates this parser behaviour.
-
-    
-    The features added are:
-    <ul>
-    <li><a href="#LikeThisQuery">LikeThisQuery</a></li>
-       Support for querying using large amounts of example text indicative of the users' general area of interest
-    <li><a href="#FuzzyLikeThisQuery">FuzzyLikeThisQuery</a></li>
-       A style of fuzzy query which automatically looks for fuzzy variations on only the "interesting" terms 
-    <li><a href="#BooleanFilter">BooleanFilter</a></li>
-       Is to Filters what core Lucene's BooleanQuery is to Queries - allows mixing of clauses using Boolean logic
-    <li><a href="#TermsFilter">TermsFilter</a></li>
-       Constructs a filter from an arbitrary set of terms (unlike <a href="#RangeFilter">RangeFilter</a> which requires a contiguous range of terms)
-    <li><a href="#DuplicateFilter">DuplicateFilter</a></li>
-       Removes duplicated documents from results where "duplicate" means documents share a value for a particular field (e.g. a primary key)
-    <li><a href="#BoostingQuery">BoostingQuery</a></li>
-       Influence score of a query's matches in a subtle way which can't be achieved using BooleanQuery
-    </ul>
-    @title Contrib Lucene
--->
-<!-- @hidden include the core DTD -->
-<!ENTITY % coreParserDTD SYSTEM "LuceneCoreQuery.dtd" >
-
-
-<!-- @hidden Allow for extensions -->
-<!ENTITY % extendedSpanQueries2 " " >
-<!ENTITY % extendedQueries2 " " >
-<!ENTITY % extendedFilters2 " " >
-
-
-<!ENTITY % extendedQueries1 "|LikeThisQuery|BoostingQuery|FuzzyLikeThisQuery%extendedQueries2;%extendedSpanQueries2;" >
-<!ENTITY % extendedFilters1 "|TermsFilter|BooleanFilter|DuplicateFilter%extendedFilters2;" >
-
-
-%coreParserDTD;
-
-<!--
-Performs fuzzy matching on "significant" terms in fields. Improves on "LikeThisQuery" by allowing for fuzzy variations of supplied fields.
-Improves on FuzzyQuery by rewarding all fuzzy variants of a term with the same IDF rather than default fuzzy behaviour which ranks rarer
-    variants (typically misspellings) more highly. This can be a useful default search mode for processing user input where the end user
-    is not expected to know about the standard query operators for fuzzy, boolean or phrase logic found in UserQuery
-    @example 
-            <em>Search for information about the Sumitomo bank, where the end user has mis-spelt the name</em>
-            %             
-            <FuzzyLikeThisQuery>
-                <Field fieldName="contents">
-                     Sumitimo bank
-                </Field>
-            </FuzzyLikeThisQuery>
-             %  
--->
-<!ELEMENT FuzzyLikeThisQuery (Field)*>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST FuzzyLikeThisQuery boost CDATA "1.0">
-<!-- Limits the total number of terms selected from the provided text plus the selected "fuzzy" variants -->
-<!ATTLIST FuzzyLikeThisQuery maxNumTerms CDATA "50">
-<!-- Ignore "Term Frequency" - a boost factor which rewards multiple occurences of the same term in a document -->
-<!ATTLIST FuzzyLikeThisQuery ignoreTF (true|false) "false">
-<!-- A field used in a FuzzyLikeThisQuery -->
-<!ELEMENT Field (#PCDATA)>
-<!-- Controls the level of similarity required for fuzzy variants where 1 is identical and 0.5 is that the variant contains 
-    half of the original's characters in the same order. Lower values produce more results but may take longer to execute due to
-    additional IO required to read matching document ids-->
-<!ATTLIST Field minSimilarity CDATA "0.5">
-<!-- Controls the minimum number of characters at the start of fuzzy variant words that must exactly match the original.
-    A value of zero will require no minimum and the search software will effectively scan ALL terms from a to z looking for variations.
-    This can incur high CPU overhead and a prefix length of just "1" will reduce this overhead to 1/26th of the original cost (assuming
-    an even distribution of letters used from the alphabet).
- -->
-<!ATTLIST Field prefixLength CDATA "1">
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute --> 
-<!ATTLIST Field fieldName CDATA #IMPLIED>
-
-
-
-<!--
-    Cherry-picks "significant" terms from the example child text and queries using these words. By only using significant (read: rare) terms the
-    performance cost of the query is substantially reduced and large bodies of text can be used as example content.
-    @example 
-            <em>Use a block of text as an example of the type of content to be found, ignoring the "Reuters" word which
-           appears commonly in the index.</em>
-            %
-            <LikeThisQuery percentTermsToMatch="5" stopWords="Reuters">
-                IRAQI TROOPS REPORTED PUSHING BACK IRANIANS Iraq said today its troops were pushing Iranian forces out of 
-                positions they had initially occupied when they launched a new offensive near the southern port of 
-                Basra early yesterday.     A High Command communique said Iraqi troops had won a significant victory 
-                and were continuing to advance.     Iraq said it had foiled a three-pronged thrust some 10 km 
-                (six miles) from Basra, but admitted the Iranians had occupied ground held by the Mohammed al-Qassem 
-                unit, one of three divisions attacked.     The communique said Iranian Revolutionary Guards were under 
-                assault from warplanes, helicopter gunships, heavy artillery and tanks.     "Our forces are continuing 
-                their advance until they purge the last foothold" occupied by the Iranians, it said.     
-                (Iran said its troops had killed or wounded more than 4,000 Iraqis and were stabilising their new positions.)     
-                The Baghdad communique said Iraqi planes also destroyed oil installations at Iran's southwestern Ahvaz field 
-                during a raid today. It denied an Iranian report that an Iraqi jet was shot down.     
-                Iraq also reported a naval battle at the northern tip of the Gulf. Iraqi naval units and forces defending an 
-                offshore terminal sank six Iranian out of 28 Iranian boats attempting to attack an offshore terminal, 
-                the communique said.      Reuters 3;
-            </LikeThisQuery>             
-            %   
-    -->
-<!ELEMENT LikeThisQuery (#PCDATA)>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST LikeThisQuery boost CDATA "1.0">
-<!-- Comma delimited list of field names -->
-<!ATTLIST LikeThisQuery fieldNames CDATA #IMPLIED>
-<!-- a list of stop words - analyzed to produce stop terms -->
-<!ATTLIST LikeThisQuery stopWords CDATA #IMPLIED>
-<!-- controls the maximum number of words shortlisted for the query. The higher the number the slower the response due to more disk reads required -->
-<!ATTLIST LikeThisQuery maxQueryTerms CDATA "20">
-<!-- Controls how many times a term must appear in the example text before it is shortlisted for use in the query -->
-<!ATTLIST LikeThisQuery minTermFrequency CDATA "1">
-<!-- A quality control that can be used to limit the number of results to those documents matching a certain percentage of the shortlisted query terms.
-    Values must be between 1 and 100-->
-<!ATTLIST LikeThisQuery percentTermsToMatch CDATA "30">
-
-<!--
-    Requires matches on the "Query" element and optionally boosts by any matches on the "BoostQuery".
-    Unlike a regular BooleanQuery the boost can be less than 1 to produce a subtractive rather than additive result
-    on the match score. 
-    @example <em>Find documents about banks, preferably related to mergers, and preferably not about "World bank"</em>
-    %
-    <BoostingQuery>
-      <Query>
-         <BooleanQuery fieldName="contents">
-           <Clause occurs="should">
-              <TermQuery>merger</TermQuery>
-           </Clause>
-           <Clause occurs="must">
-              <TermQuery>bank</TermQuery>
-           </Clause>
-         </BooleanQuery>    
-      </Query>
-      <BoostQuery boost="0.01">
-         <UserQuery>"world bank"</UserQuery>
-      </BoostQuery>
-    </BoostingQuery>
-    %
-    
---> 
-<!ELEMENT BoostingQuery (Query,BoostQuery)>
-<!-- Optional boost for matches on this query. Values > 1 -->
-<!ATTLIST BoostingQuery boost CDATA "1.0">
-
-<!--
-    Child element of BoostingQuery used to contain the choice of Query which is used for boosting purposes
---> 
-<!ELEMENT BoostQuery (%queries;)>
-<!-- Optional boost for matches on this query. A boost of >0 but <1 
-    effectively demotes results from Query that match this BoostQuery.      
-    -->
-<!ATTLIST BoostQuery boost CDATA "1.0">
-
-
-
-<!-- Removes duplicated documents from results where "duplicate" means documents share a value for a particular field such as a primary key
-    @example <em>Find the latest version of each web page that mentions "Lucene"</em>
-    %
-    <FilteredQuery>
-      <Query>
-         <TermQuery fieldName="text">lucene</TermQuery>
-      </Query>
-      <Filter>
-        <DuplicateFilter fieldName="url" keepMode="last"/>
-      </Filter> 
-    </FilteredQuery>    
-    %   
-    -->
-<!ELEMENT DuplicateFilter EMPTY>
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute --> 
-<!ATTLIST DuplicateFilter fieldName CDATA #IMPLIED>
-<!-- Determines if the first or last document occurence is the one to return when presented with duplicated field values -->    
-<!ATTLIST DuplicateFilter keepMode (first | last) "first">
-<!-- Controls the choice of process used to produce the filter - "full" mode identifies only non-duplicate documents with the chosen field 
-    while "fast" mode may perform faster but will also mark documents <em>without</em> the field as valid. The former approach starts by 
-    assuming every document is a duplicate then finds the "master" documents to keep while the latter approach assumes all documents are 
-    unique and  unmarks those documents that are a copy. 
-    --> 
-<!ATTLIST DuplicateFilter processingMode (full | fast) "full">
-
-
-
-
-<!-- Processes child text using a field-specific choice of Analyzer to produce a set of terms that are then used as a filter.
-    @example <em>Find documents talking about Lucene written on a Monday or a Friday</em>
-    %
-    <FilteredQuery>
-      <Query>
-         <TermQuery fieldName="text">lucene</TermQuery>
-      </Query>
-    <Filter>
-        <TermsFilter fieldName="dayOfWeek">monday friday</TermsFilter> 
-    </Filter>   
-    </FilteredQuery>    
-    %
-    
-    -->
-<!ELEMENT TermsFilter (#PCDATA)>
-<!-- fieldName must be defined here or is taken from the most immediate parent XML element that defines a "fieldName" attribute --> 
-<!ATTLIST TermsFilter fieldName CDATA #IMPLIED>
-<!--
-    A Filter equivalent to BooleanQuery that applies Boolean logic to Clauses containing Filters.
-    Unlike BooleanQuery a BooleanFilter can contain a single "mustNot" clause.
-    @example <em>Find documents from the first quarter of this year or last year that are not in "draft" status</em>
-    %
-     <FilteredQuery>
-       <Query>
-           <MatchAllDocsQuery/>
-       </Query>
-       <Filter>
-        <BooleanFilter>
-          <Clause occurs="should">
-             <RangeFilter fieldName="date" lowerTerm="20070101" upperTerm="20070401"/>
-          </Clause>
-          <Clause occurs="should">
-             <RangeFilter fieldName="date" lowerTerm="20060101" upperTerm="20060401"/>
-          </Clause>
-          <Clause occurs="mustNot">
-             <TermsFilter fieldName="status">draft</TermsFilter> 
-          </Clause>
-        </BooleanFilter>
-       </Filter>
-    </FilteredQuery>
-    %
-    -->
-<!ELEMENT BooleanFilter (Clause)+>
-
-