X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java deleted file mode 100644 index f6df136..0000000 --- a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java +++ /dev/null @@ -1,288 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Random; -import java.util.Set; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.UnicodeUtil; - -public class TestIndexWriterUnicode extends LuceneTestCase { - - final String[] utf8Data = new String[] { - // unpaired low surrogate - "ab\udc17cd", "ab\ufffdcd", - "\udc17abcd", "\ufffdabcd", - "\udc17", "\ufffd", - "ab\udc17\udc17cd", "ab\ufffd\ufffdcd", - "\udc17\udc17abcd", "\ufffd\ufffdabcd", - "\udc17\udc17", "\ufffd\ufffd", - - // unpaired high surrogate - "ab\ud917cd", "ab\ufffdcd", - "\ud917abcd", "\ufffdabcd", - "\ud917", "\ufffd", - "ab\ud917\ud917cd", "ab\ufffd\ufffdcd", - "\ud917\ud917abcd", "\ufffd\ufffdabcd", - "\ud917\ud917", "\ufffd\ufffd", - - // backwards surrogates - "ab\udc17\ud917cd", "ab\ufffd\ufffdcd", - "\udc17\ud917abcd", "\ufffd\ufffdabcd", - "\udc17\ud917", "\ufffd\ufffd", - "ab\udc17\ud917\udc17\ud917cd", "ab\ufffd\ud917\udc17\ufffdcd", - "\udc17\ud917\udc17\ud917abcd", "\ufffd\ud917\udc17\ufffdabcd", - "\udc17\ud917\udc17\ud917", "\ufffd\ud917\udc17\ufffd" - }; - - private int nextInt(int lim) { - return random.nextInt(lim); - } - - private int nextInt(int start, int end) { - return start + nextInt(end-start); - } - - private boolean fillUnicode(char[] buffer, char[] expected, int offset, int count) { - final int len = offset + count; - boolean hasIllegal = false; - - if (offset > 0 && buffer[offset] >= 0xdc00 && buffer[offset] < 0xe000) - // Don't start in the middle of a valid surrogate pair - offset--; - - for(int i=offset;i> 10) + UnicodeUtil.UNI_SUR_HIGH_START); - chars[len++] = (char) (((ch-0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START); - } - - UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); - - String s1 = new String(chars, 0, len); - String s2 = new String(utf8.result, 0, utf8.length, "UTF-8"); - assertEquals("codepoint " + ch, s1, s2); - - UnicodeUtil.UTF8toUTF16(utf8.result, 0, utf8.length, utf16); - assertEquals("codepoint " + ch, s1, new String(utf16.result, 0, utf16.length)); - - byte[] b = s1.getBytes("UTF-8"); - assertEquals(utf8.length, b.length); - for(int j=0;j