X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/test-framework/org/apache/lucene/analysis/CollationTestBase.java diff --git a/lucene-java-3.4.0/lucene/src/test-framework/org/apache/lucene/analysis/CollationTestBase.java b/lucene-java-3.4.0/lucene/src/test-framework/org/apache/lucene/analysis/CollationTestBase.java deleted file mode 100644 index 8fb540a..0000000 --- a/lucene-java-3.4.0/lucene/src/test-framework/org/apache/lucene/analysis/CollationTestBase.java +++ /dev/null @@ -1,316 +0,0 @@ -package org.apache.lucene.analysis; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermRangeFilter; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.Searcher; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Document; -import org.apache.lucene.util.IndexableBinaryStringTools; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; - -import java.io.IOException; -import java.io.StringReader; -import java.util.HashMap; -import java.util.Map; - -public abstract class CollationTestBase extends LuceneTestCase { - - protected String firstRangeBeginningOriginal = "\u062F"; - protected String firstRangeEndOriginal = "\u0698"; - - protected String secondRangeBeginningOriginal = "\u0633"; - protected String secondRangeEndOriginal = "\u0638"; - - /** - * Convenience method to perform the same function as CollationKeyFilter. - * - * @param keyBits the result from - * collator.getCollationKey(original).toByteArray() - * @return The encoded collation key for the original String - */ - protected String encodeCollationKey(byte[] keyBits) { - // Ensure that the backing char[] array is large enough to hold the encoded - // Binary String - int encodedLength = IndexableBinaryStringTools.getEncodedLength(keyBits, 0, keyBits.length); - char[] encodedBegArray = new char[encodedLength]; - IndexableBinaryStringTools.encode(keyBits, 0, keyBits.length, encodedBegArray, 0, encodedLength); - return new String(encodedBegArray); - } - - public void testFarsiRangeFilterCollating(Analyzer analyzer, String firstBeg, - String firstEnd, String secondBeg, - String secondEnd) throws Exception { - RAMDirectory ramDir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer)); - Document doc = new Document(); - doc.add(new Field("content", "\u0633\u0627\u0628", - Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("body", "body", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - writer.close(); - IndexSearcher searcher = new IndexSearcher(ramDir, true); - Query query = new TermQuery(new Term("body","body")); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a TermRangeFilter with a Farsi - // Collator (or an Arabic one for the case when Farsi searcher not - // supported). - ScoreDoc[] result = searcher.search - (query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = searcher.search - (query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - searcher.close(); - } - - public void testFarsiRangeQueryCollating(Analyzer analyzer, String firstBeg, - String firstEnd, String secondBeg, - String secondEnd) throws Exception { - RAMDirectory ramDir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer)); - Document doc = new Document(); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a TermRangeQuery with a Farsi - // Collator (or an Arabic one for the case when Farsi is not supported). - doc.add(new Field("content", "\u0633\u0627\u0628", - Field.Store.YES, Field.Index.ANALYZED)); - writer.addDocument(doc); - writer.close(); - IndexSearcher searcher = new IndexSearcher(ramDir, true); - - Query query = new TermRangeQuery("content", firstBeg, firstEnd, true, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, hits.length); - - query = new TermRangeQuery("content", secondBeg, secondEnd, true, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, hits.length); - searcher.close(); - } - - public void testFarsiTermRangeQuery(Analyzer analyzer, String firstBeg, - String firstEnd, String secondBeg, String secondEnd) throws Exception { - - RAMDirectory farsiIndex = new RAMDirectory(); - IndexWriter writer = new IndexWriter(farsiIndex, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer)); - Document doc = new Document(); - doc.add(new Field("content", "\u0633\u0627\u0628", - Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("body", "body", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - writer.close(); - - IndexReader reader = IndexReader.open(farsiIndex, true); - IndexSearcher search = newSearcher(reader); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a TermRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is - // not supported). - Query csrq - = new TermRangeQuery("content", firstBeg, firstEnd, true, true); - ScoreDoc[] result = search.search(csrq, null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - csrq = new TermRangeQuery - ("content", secondBeg, secondEnd, true, true); - result = search.search(csrq, null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - search.close(); - } - - // Test using various international locales with accented characters (which - // sort differently depending on locale) - // - // Copied (and slightly modified) from - // org.apache.lucene.search.TestSort.testInternationalSort() - // - // TODO: this test is really fragile. there are already 3 different cases, - // depending upon unicode version. - public void testCollationKeySort(Analyzer usAnalyzer, - Analyzer franceAnalyzer, - Analyzer swedenAnalyzer, - Analyzer denmarkAnalyzer, - String usResult, - String frResult, - String svResult, - String dkResult) throws Exception { - RAMDirectory indexStore = new RAMDirectory(); - PerFieldAnalyzerWrapper analyzer - = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); - analyzer.addAnalyzer("US", usAnalyzer); - analyzer.addAnalyzer("France", franceAnalyzer); - analyzer.addAnalyzer("Sweden", swedenAnalyzer); - analyzer.addAnalyzer("Denmark", denmarkAnalyzer); - IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer)); - - // document data: - // the tracer field is used to determine which document was hit - String[][] sortData = new String[][] { - // tracer contents US France Sweden (sv_SE) Denmark (da_DK) - { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, - { "B", "y", "HAT", "HAT", "HAT", "HAT" }, - { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, - { "D", "y", "HUT", "HUT", "HUT", "HUT" }, - { "E", "x", "peach", "peach", "peach", "peach" }, - { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, - { "G", "x", "sin", "sin", "sin", "sin" }, - { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, - { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, - { "J", "y", "HOT", "HOT", "HOT", "HOT" }, - }; - - for (int i = 0 ; i < sortData.length ; ++i) { - Document doc = new Document(); - doc.add(new Field("tracer", sortData[i][0], - Field.Store.YES, Field.Index.NO)); - doc.add(new Field("contents", sortData[i][1], - Field.Store.NO, Field.Index.ANALYZED)); - if (sortData[i][2] != null) - doc.add(new Field("US", sortData[i][2], - Field.Store.NO, Field.Index.ANALYZED)); - if (sortData[i][3] != null) - doc.add(new Field("France", sortData[i][3], - Field.Store.NO, Field.Index.ANALYZED)); - if (sortData[i][4] != null) - doc.add(new Field("Sweden", sortData[i][4], - Field.Store.NO, Field.Index.ANALYZED)); - if (sortData[i][5] != null) - doc.add(new Field("Denmark", sortData[i][5], - Field.Store.NO, Field.Index.ANALYZED)); - writer.addDocument(doc); - } - writer.optimize(); - writer.close(); - Searcher searcher = new IndexSearcher(indexStore, true); - - Sort sort = new Sort(); - Query queryX = new TermQuery(new Term ("contents", "x")); - Query queryY = new TermQuery(new Term ("contents", "y")); - - sort.setSort(new SortField("US", SortField.STRING)); - assertMatches(searcher, queryY, sort, usResult); - - sort.setSort(new SortField("France", SortField.STRING)); - assertMatches(searcher, queryX, sort, frResult); - - sort.setSort(new SortField("Sweden", SortField.STRING)); - assertMatches(searcher, queryY, sort, svResult); - - sort.setSort(new SortField("Denmark", SortField.STRING)); - assertMatches(searcher, queryY, sort, dkResult); - } - - // Make sure the documents returned by the search match the expected list - // Copied from TestSort.java - private void assertMatches(Searcher searcher, Query query, Sort sort, - String expectedResult) throws IOException { - ScoreDoc[] result = searcher.search(query, null, 1000, sort).scoreDocs; - StringBuilder buff = new StringBuilder(10); - int n = result.length; - for (int i = 0 ; i < n ; ++i) { - Document doc = searcher.doc(result[i].doc); - String[] v = doc.getValues("tracer"); - for (int j = 0 ; j < v.length ; ++j) { - buff.append(v[j]); - } - } - assertEquals(expectedResult, buff.toString()); - } - - public void assertThreadSafe(final Analyzer analyzer) throws Exception { - int numTestPoints = 100; - int numThreads = _TestUtil.nextInt(random, 3, 5); - final HashMap map = new HashMap(); - - // create a map up front. - // then with multiple threads, generate sort keys for all the keys in the map - // and ensure they are the same as the ones we produced in serial fashion. - - for (int i = 0; i < numTestPoints; i++) { - String term = _TestUtil.randomSimpleString(random); - TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term)); - CharTermAttribute encodedBytes = ts.addAttribute(CharTermAttribute.class); - ts.reset(); - assertTrue(ts.incrementToken()); - // ensure we make a copy of the actual bytes too - map.put(term, encodedBytes.toString()); - } - - Thread threads[] = new Thread[numThreads]; - for (int i = 0; i < numThreads; i++) { - threads[i] = new Thread() { - @Override - public void run() { - try { - for (Map.Entry mapping : map.entrySet()) { - String term = mapping.getKey(); - String expected = mapping.getValue(); - TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term)); - CharTermAttribute encodedBytes = ts.addAttribute(CharTermAttribute.class); - ts.reset(); - assertTrue(ts.incrementToken()); - assertEquals(expected, encodedBytes.toString()); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - }; - } - for (int i = 0; i < numThreads; i++) { - threads[i].start(); - } - for (int i = 0; i < numThreads; i++) { - threads[i].join(); - } - } -}