From: Marcin Koziej Date: Tue, 11 Oct 2011 11:39:55 +0000 (+0200) Subject: PyLucene 3.4.0-1 import X-Git-Url: https://git.mdrn.pl/pylucene.git/commitdiff_plain/d6fa7893dc952f8fe6e01d68c9ded2e3f5b3f9de PyLucene 3.4.0-1 import --- d6fa7893dc952f8fe6e01d68c9ded2e3f5b3f9de diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..e69ad04 --- /dev/null +++ b/CHANGES @@ -0,0 +1,116 @@ + +Version 3.3 -> 3.4.0 +-------------------- + - added new join contrib module to build + - PyLucene built with JCC 2.11 + +Version 3.2 -> 3.3 +------------------ + - using Lucene 3.3 sources + - adapted to FieldComparator becoming generic + - added new grouping contrib module to build + - PyLucene built with JCC 2.10 + +Version 3.1.0 -> 3.2 +-------------------- + - using Lucene 3.2 sources + - PyLucene built with JCC 2.9 + - rearranged Lucene source checkout tree to reflect new constraints + +Version 3.0.0 -> 3.1.0 +---------------------- + - using Lucene 3.1 sources + - improved support for building on Windows with mingw32 + - added wininst target to Makefile + - added port of ICUNormalizer2Filter using C++ ICU's Normalizer2 via PyICU 1.1 + - added port of ICUFoldingFilter using C++ ICU's Normalizer2 via PyICU 1.1 + - added port of ICUTransformFilter using C++ ICU's Transliterator via PyICU 1.1 + - fixed "Lucene in Action" samples left over on old API + - improved support for adding optional contrib modules + - added --package java.util.regex to wrap constructors on PatternAnalyzer + - fixed mansearch.py sample to reflect API changes + - PyLucene built with JCC 2.8 + +Version 2.9.0 -> 3.0.0 +---------------------- + - unit tests ported to new API + - removed InstantiatedIndex contrib from default build + - with JCC 2.5's Java generics support, a lot less downcasting needed + - Java Lucene sources now included in PyLucene source distribution + - "Lucene in Action" samples and tests converted to new Lucene 3.0 API + - PyLucene built with JCC 2.5 + +Version 2.4.1 -> 2.9.0 +---------------------- + - renamed the Highlighter's SpanScorer class to HighlighterSpanScorer + - fixed bug in Makefile's test target which tested installed build + - added Mac OS X 10.6 sections to Makefile + - added FieldCache.Parser Python extension classes (used in test/test_Sort.py) + - added FieldComparator and FieldComparatorSource Python extension classes + - added 'memory' contrib module to default build + - PyLucene built with JCC 2.4 + +Version 2.4.0 -> 2.4.1 +---------------------- + - PyLucene with JCC now a subproject of the Apache Lucene project + - documentation moved to http://lucene.apache.org/pylucene + - added java.util.Arrays to the build to bridge the Java array/collection gap + - added collections.py module with JavaSet class, a Python java.util.Set + - fixed bug in PythonQueryParser overriding wrong method (Aaron Lav) + - PyLucene built with JCC 2.2 + - fixed bug with collections.py shadowing Python 2.6's during build + - passing strings for byte[] or char[] is no longer supported, use JArray + - added copy of PyLucene web site to distribution for offline viewing + +Version 2.3.2 -> 2.4.0 +---------------------- + - fixed Debian bug http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=499599 + - arrays are now wrapped with JArray() instances instead of expanded into lists + - return by value in arrays now supported + - PythonTermDocs removed since arrays can now receive values + - PythonReader removed since arrays now wrapped + - added InstantiatedIndex contrib to build + - PyLucene built with JCC 2.1 + +Version 2.3.1 -> 2.3.2 +---------------------- + - fixed code generation for clone() broken by finalization proxy work + - added 'union' and 'NULL' to the list of reserved words + - fixed castCheck() to work with finalization proxies + - added scorePayload() delegator to PythonSimilarityDelegator + - added support for --install-dir and --use-distutils options + - added support for INSTALL_OPT to Makefile + - fixed basic samples to initialize VM + - added bdist target to Makefile + +Version 2.3 -> 2.3.1 +-------------------- + - fixed bug in JCC using the wrong field modifiers for setter (Bill Janssen) + - added missing calls for generating wrappers for ancestors of Exception + - added missing call for generating wrappers for String + - added PythonTokenizer for implementing complete tokenizers in Python + +Version 2.2 -> 2.3 +------------------ + - PyLucene with JCC introduced + - added support for Python 2.3.5 + - added support for using clone() with extensions + - renamed decRef() (and incRef()) native extensions method to pythonDecRef() + - improved error reporting a bit + - JCC now generates Python properties for get/set/is methods + - fixed bug in generated code invoking parent method when inherited from above + - added support for building on 64-bit Linux (Ubuntu 7.10) + - added support for implicitely iterable Enumeration + - added support for --root and --prefix for jcc invocations (Esteve Fernandez) + - jcc switched to setuptools by default (and fallback on distutils) + - fixed bug http://bugzilla.osafoundation.org/show_bug.cgi?id=11643 + - added support for automatic boxing of primitives when Object is expected + - fixed bug in missing extensions' Iterator and Enumeration methods + - added JavaSet.py sample using PythonSet and PythonIterator extensions + - added missing LICENSE files + - fixed memory leak when calling inherited methods via callSuper() + - made finalize() method public on extensions for manually breaking ref cycle + - added support for building on Solaris with Sun Studio C++ (Solaris 11) + - fixed leak of local refs of jstring when converting to an array of String + - automated finalization of extensions via proxy for breaking ref cycle + - added Py_CLEAR and Py_VISIT macros for Python 2.3.5 compilation diff --git a/CREDITS b/CREDITS new file mode 100644 index 0000000..ef53ab8 --- /dev/null +++ b/CREDITS @@ -0,0 +1,29 @@ + +PyLucene is a JCC-compiled Python extension of Java Lucene and wouldn't be +possible without the tireless efforts of the people and open source projects +below. + + - the Apache Lucene developers, + http://lucene.apache.org/java/docs/whoweare.html + + - the Open Source Applications Foundation, for hosting the project from + 2004 to 2008: http://www.osafoundation.org + + - Andi Vajda, PyLucene and JCC project founder and maintainer, for + believing that PyLucene should be feasible + + - the following people contributed patches, samples, bug reports + and resources: + . Kapil Thangavelu (hazmat): FSDirectory support, first unit test + . Frank Wierzbicki: IndexFiles.py and SearchFiles.py samples + . Andreas Jung: several bug reports, nasty bugs indeed + . Jeff Bowden: several bug reports and API additions via patches + . Wai Yip Tung: test_PyLuceneThread.py unit test, windows threads testing + . Yura Smolsky: test_Highlighter.py unit test, numerous bug reports + . Steve Jenson: MultiFieldQueryParser addition to test_PyLucene.py + . Erik Hatcher: man page index and search samples + . Bill Janssen: many bug reports and 'shared mode' suggestion + . Aaron Lav: several memory leaks, fixed with patches and tests + . Grant Ingersoll: for inviting and sponsoring PyLucene's move to Apache + +Thank you all ! diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..139f0a2 --- /dev/null +++ b/INSTALL @@ -0,0 +1,3 @@ + +Please see doc/documentation/install.html or +http://lucene.apache.org/pylucene/documentation/install.html diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7502d17 --- /dev/null +++ b/Makefile @@ -0,0 +1,374 @@ + +# Makefile for building PyLucene +# +# Supported operating systems: Mac OS X, Linux and Windows. +# See INSTALL file for requirements. +# See jcc/INSTALL for information about --shared. +# +# Steps to build +# 1. Edit the sections below as documented +# 2. Edit the JARS variable to add optional contrib modules not defaulted +# 3. make +# 4. make install +# +# The install target installs the lucene python extension in python's +# site-packages directory. +# + +VERSION=3.4.0-1 +LUCENE_SVN_VER=HEAD +LUCENE_VER=3.4.0 +LUCENE_SVN=http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_3_4_0 +PYLUCENE:=$(shell pwd) +LUCENE_SRC=lucene-java-$(LUCENE_VER) +LUCENE=$(LUCENE_SRC)/lucene + +# +# You need to uncomment and edit the variables below in the section +# corresponding to your operating system. +# +# Windows drive-absolute paths need to be expressed cygwin style. +# +# PREFIX: where programs are normally installed on your system (Unix). +# PREFIX_PYTHON: where your version of python is installed. +# JCC: how jcc is invoked, depending on the python version: +# - python 2.7: +# $(PYTHON) -m jcc +# - python 2.6: +# $(PYTHON) -m jcc.__main__ +# - python 2.5: +# $(PYTHON) -m jcc +# - python 2.4: +# $(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__main__.py +# NUM_FILES is the number of wrapper files to generate. By default, jcc +# generates all C++ classes into one single file. This may exceed a compiler +# limit. +# + +# Mac OS X 10.6 (64-bit Python 2.6, Java 1.6) +#PREFIX_PYTHON=/usr +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 +#NUM_FILES=3 + +# Mac OS X 10.6 (MacPorts 1.8.0 64-bit Python 2.7, Java 1.6) +#PREFIX_PYTHON=/opt/local +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) -m jcc --shared --arch x86_64 +#NUM_FILES=3 + +# Mac OS X 10.6 (MacPorts 1.8.0 64-bit Python 2.6, Java 1.6) +#PREFIX_PYTHON=/opt/local +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 +#NUM_FILES=3 + +# Mac OS X 10.6 (64-bit and 32-bit Python 2.6 together, Java 1.6) +#PREFIX_PYTHON=/usr +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386 +#NUM_FILES=3 + +# Mac OS X 10.5 (32-bit Python 2.5, Java 1.5) +#PREFIX_PYTHON=/usr +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) -m jcc --shared +#NUM_FILES=3 + +# Mac OS X (Python 2.3.5, Java 1.5, setuptools 0.6c7, Intel Mac OS X 10.4) +#PREFIX_PYTHON=/usr +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/site-packages/JCC-2.3-py2.3-macosx-10.4-i386.egg/jcc/__init__.py +#NUM_FILES=3 + +# Mac OS X (Python 2.3.5, Java 1.5, setuptools 0.6c7, PPC Mac OS X 10.4) +#PREFIX_PYTHON=/usr +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/site-packages/JCC-2.3-py2.3-macosx-10.4-ppc.egg/jcc/__init__.py +#NUM_FILES=3 + +# Linux (Ubuntu 6.06, Python 2.4, Java 1.5, no setuptools) +#PREFIX_PYTHON=/usr +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__init__.py +#NUM_FILES=3 + +# Linux (Ubuntu 8.10 64-bit, Python 2.5.2, OpenJDK 1.6, setuptools 0.6c9) +#PREFIX_PYTHON=/usr +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) -m jcc --shared +#NUM_FILES=3 + +# FreeBSD +#PREFIX_PYTHON=/usr +#ANT=ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) -m jcc +#NUM_FILES=3 + +# Solaris (Solaris 11, Python 2.4 32-bit, Sun Studio 12, Java 1.6) +#PREFIX_PYTHON=/usr +#ANT=/usr/local/apache-ant-1.7.0/bin/ant +#PYTHON=$(PREFIX_PYTHON)/bin/python +#JCC=$(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__init__.py +#NUM_FILES=3 + +# Windows (Win32, Python 2.5.1, Java 1.6, ant 1.7.0) +#PREFIX_PYTHON=/cygdrive/o/Python-2.5.2/PCbuild +#ANT=JAVA_HOME=o:\\Java\\jdk1.6.0_02 /cygdrive/o/java/apache-ant-1.7.0/bin/ant +#PYTHON=$(PREFIX_PYTHON)/python.exe +#JCC=$(PYTHON) -m jcc --shared +#NUM_FILES=3 + +# Windows (Win32, msys/MinGW, Python 2.6.4, Java 1.6, ant 1.7.1 (WinAnt)) +#PREFIX_PYTHON=/c/Python26 +#ANT=JAVA_HOME="c:\\Program Files\\Java\\jdk1.6.0_18" "/c/Program Files/WinAnt/bin/ant" +#PYTHON=$(PREFIX_PYTHON)/python.exe +#JCC=$(PYTHON) -m jcc.__main__ --shared --compiler mingw32 +#NUM_FILES=3 + +# Windows (Win32, Python 2.7, Java 1.6, ant 1.8.1, Java not on PATH) +#PREFIX_PYTHON=/cygdrive/c/Python27 +#ANT=JAVA_HOME=c:\\jdk1.6.0_22 /cygdrive/c/java/apache-ant-1.8.1/bin/ant +#PYTHON=$(PREFIX_PYTHON)/python.exe +#JCC=$(PYTHON) -m jcc --shared --find-jvm-dll +#NUM_FILES=3 + +JARS=$(LUCENE_JAR) + +# comment/uncomment the desired/undesired optional contrib modules below +JARS+=$(ANALYZERS_JAR) # many language analyzers +JARS+=$(MEMORY_JAR) # single-document memory index +JARS+=$(HIGHLIGHTER_JAR) # needs memory contrib +JARS+=$(EXTENSIONS_JAR) # needs highlighter contrib +JARS+=$(QUERIES_JAR) # regex and other contrib queries +#JARS+=$(SMARTCN_JAR) # smart chinese analyzer +#JARS+=$(SPATIAL_JAR) # spatial lucene +JARS+=$(GROUPING_JAR) # grouping module +JARS+=$(JOIN_JAR) # join module + + +# +# No edits required below +# + +SVNOP?=export + +ifeq ($(DEBUG),1) + DEBUG_OPT=--debug +endif + +DEFINES=-DPYLUCENE_VER="\"$(VERSION)\"" -DLUCENE_VER="\"$(LUCENE_VER)\"" + +LUCENE_JAR=$(LUCENE)/build/lucene-core-$(LUCENE_VER).jar +ANALYZERS_JAR=$(LUCENE)/build/contrib/analyzers/common/lucene-analyzers-$(LUCENE_VER).jar +HIGHLIGHTER_JAR=$(LUCENE)/build/contrib/highlighter/lucene-highlighter-$(LUCENE_VER).jar +MEMORY_JAR=$(LUCENE)/build/contrib/memory/lucene-memory-$(LUCENE_VER).jar +QUERIES_JAR=$(LUCENE)/build/contrib/queries/lucene-queries-$(LUCENE_VER).jar +EXTENSIONS_JAR=build/jar/extensions.jar +SMARTCN_JAR=$(LUCENE)/build/contrib/analyzers/smartcn/lucene-smartcn-$(LUCENE_VER).jar +SPATIAL_JAR=$(LUCENE)/build/contrib/spatial/lucene-spatial-$(LUCENE_VER).jar +GROUPING_JAR=$(LUCENE)/build/contrib/grouping/lucene-grouping-$(LUCENE_VER).jar +JOIN_JAR=$(LUCENE)/build/contrib/join/lucene-join-$(LUCENE_VER).jar + +ICUPKG:=$(shell which icupkg) + +.PHONY: generate compile install default all clean realclean \ + sources test jars distrib + +default: all + +$(LUCENE_SRC): + mkdir -p $(LUCENE_SRC) + svn $(SVNOP) -r $(LUCENE_SVN_VER) $(LUCENE_SVN)/lucene $(LUCENE_SRC)/lucene + +sources: $(LUCENE_SRC) + +to-orig: sources + mkdir -p $(LUCENE)-orig + tar -C $(LUCENE) -cf - . | tar -C $(LUCENE)-orig -xvf - + +from-orig: $(LUCENE)-orig + mkdir -p $(LUCENE) + tar -C $(LUCENE)-orig -cf - . | tar -C $(LUCENE) -xvf - + +lucene: + rm -f $(LUCENE_JAR) + $(MAKE) $(LUCENE_JAR) + +$(LUCENE_JAR): $(LUCENE) + cd $(LUCENE); $(ANT) -Dversion=$(LUCENE_VER) + +$(ANALYZERS_JAR): $(LUCENE_JAR) + cd $(LUCENE)/contrib/analyzers/common; $(ANT) -Dversion=$(LUCENE_VER) + +$(MEMORY_JAR): $(LUCENE_JAR) + cd $(LUCENE)/contrib/memory; $(ANT) -Dversion=$(LUCENE_VER) + +$(HIGHLIGHTER_JAR): $(LUCENE_JAR) + cd $(LUCENE)/contrib/highlighter; $(ANT) -Dversion=$(LUCENE_VER) + +$(QUERIES_JAR): $(LUCENE_JAR) + cd $(LUCENE)/contrib/queries; $(ANT) -Dversion=$(LUCENE_VER) + +$(EXTENSIONS_JAR): $(LUCENE_JAR) + $(ANT) -f extensions.xml -Dlucene.dir=$(LUCENE) + +$(SMARTCN_JAR): $(LUCENE_JAR) + cd $(LUCENE)/contrib/analyzers/smartcn; $(ANT) -Dversion=$(LUCENE_VER) + +$(SPATIAL_JAR): $(LUCENE_JAR) + cd $(LUCENE)/contrib/spatial; $(ANT) -Dversion=$(LUCENE_VER) + +$(GROUPING_JAR): $(LUCENE_JAR) + cd $(LUCENE)/contrib/grouping; $(ANT) -Dversion=$(LUCENE_VER) + +$(JOIN_JAR): $(LUCENE_JAR) + cd $(LUCENE)/contrib/join; $(ANT) -Dversion=$(LUCENE_VER) + +JCCFLAGS?= + +jars: $(JARS) + + +ifneq ($(ICUPKG),) + +RESOURCES=--resources $(LUCENE)/contrib/icu/src/resources +ENDIANNESS:=$(shell $(PYTHON) -c "import struct; print struct.pack('h', 1) == '\000\001' and 'b' or 'l'") + +resources: $(LUCENE)/contrib/icu/src/resources/org/apache/lucene/analysis/icu/utr30.dat + +$(LUCENE)/contrib/icu/src/resources/org/apache/lucene/analysis/icu/utr30.dat: $(LUCENE)/contrib/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm + rm -f $@ + cd $(dir $<); $(ICUPKG) --type $(ENDIANNESS) --add $(notdir $<) new $(notdir $@) + +else + +RESOURCES= + +resources: + @echo ICU not installed + +endif + +GENERATE=$(JCC) $(foreach jar,$(JARS),--jar $(jar)) \ + $(JCCFLAGS) \ + --package java.lang java.lang.System \ + java.lang.Runtime \ + --package java.util java.util.Arrays \ + java.util.HashMap \ + java.util.HashSet \ + java.text.SimpleDateFormat \ + java.text.DecimalFormat \ + java.text.Collator \ + --package java.util.regex \ + --package java.io java.io.StringReader \ + java.io.InputStreamReader \ + java.io.FileInputStream \ + --exclude org.apache.lucene.queryParser.Token \ + --exclude org.apache.lucene.queryParser.TokenMgrError \ + --exclude org.apache.lucene.queryParser.QueryParserTokenManager \ + --exclude org.apache.lucene.queryParser.ParseException \ + --exclude org.apache.lucene.search.regex.JakartaRegexpCapabilities \ + --exclude org.apache.regexp.RegexpTunnel \ + --exclude org.apache.lucene.analysis.cn.smart.AnalyzerProfile \ + --python lucene \ + --mapping org.apache.lucene.document.Document 'get:(Ljava/lang/String;)Ljava/lang/String;' \ + --mapping java.util.Properties 'getProperty:(Ljava/lang/String;)Ljava/lang/String;' \ + --sequence java.util.AbstractList 'size:()I' 'get:(I)Ljava/lang/Object;' \ + --rename org.apache.lucene.search.highlight.SpanScorer=HighlighterSpanScorer \ + --version $(LUCENE_VER) \ + --module python/collections.py \ + --module python/ICUNormalizer2Filter.py \ + --module python/ICUFoldingFilter.py \ + --module python/ICUTransformFilter.py \ + $(RESOURCES) \ + --files $(NUM_FILES) + +generate: jars + $(GENERATE) + +compile: jars + $(GENERATE) --build $(DEBUG_OPT) + +install: jars + $(GENERATE) --install $(DEBUG_OPT) $(INSTALL_OPT) + +bdist: jars + $(GENERATE) --bdist + +wininst: jars + $(GENERATE) --wininst + +all: sources jars resources compile + @echo build of $(PYLUCENE_LIB) complete + +clean: + if test -f $(LUCENE)/build.xml; then cd $(LUCENE); $(ANT) clean; fi + rm -rf $(LUCENE)/build build + +realclean: + if test ! -d $(LUCENE)/.svn; then rm -rf $(LUCENE_SRC); else rm -rf $(LUCENE)/build; fi + rm -rf build samples/LuceneInAction/index + +OS=$(shell uname) +BUILD_TEST:=$(PYLUCENE)/build/test + +ifeq ($(findstring CYGWIN,$(OS)),CYGWIN) + BUILD_TEST:=`cygpath -aw $(BUILD_TEST)` +else + ifeq ($(findstring MINGW,$(OS)),MINGW) + BUILD_TEST:=`$(PYTHON) -c "import os, sys; print os.path.normpath(sys.argv[1]).replace(chr(92), chr(92)*2)" $(BUILD_TEST)` + endif +endif + +install-test: + mkdir -p $(BUILD_TEST) + PYTHONPATH=$(BUILD_TEST) $(GENERATE) --install $(DEBUG_OPT) --install-dir $(BUILD_TEST) + +samples/LuceneInAction/index: + cd samples/LuceneInAction; PYTHONPATH=$(BUILD_TEST) $(PYTHON) index.py + +test: install-test samples/LuceneInAction/index + find test -name 'test_*.py' | PYTHONPATH=$(BUILD_TEST) xargs -t -n 1 $(PYTHON) + ls samples/LuceneInAction/*Test.py | PYTHONPATH=$(BUILD_TEST) xargs -t -n 1 $(PYTHON) + PYTHONPATH=$(BUILD_TEST) $(PYTHON) samples/LuceneInAction/AnalyzerDemo.py + PYTHONPATH=$(BUILD_TEST) $(PYTHON) samples/LuceneInAction/AnalyzerUtils.py + PYTHONPATH=$(BUILD_TEST) $(PYTHON) samples/LuceneInAction/BooksLikeThis.py + PYTHONPATH=$(BUILD_TEST) $(PYTHON) samples/LuceneInAction/Explainer.py samples/LuceneInAction/index programming + PYTHONPATH=$(BUILD_TEST) $(PYTHON) samples/LuceneInAction/HighlightIt.py + PYTHONPATH=$(BUILD_TEST) $(PYTHON) samples/LuceneInAction/SortingExample.py + + +ARCHIVE=pylucene-$(VERSION)-src.tar.gz +SITE=../site/build/site/en + +distrib: + mkdir -p distrib + svn export . distrib/pylucene-$(VERSION) + tar -cf - --exclude build $(LUCENE_SRC) | tar -C distrib/pylucene-$(VERSION) -xvf - + mkdir distrib/pylucene-$(VERSION)/doc + tar -C $(SITE) -cf - . | tar -C distrib/pylucene-$(VERSION)/doc -xvf - + cd distrib; tar -cvzf $(ARCHIVE) pylucene-$(VERSION) + cd distrib; gpg2 --armor --output $(ARCHIVE).asc --detach-sig $(ARCHIVE) + cd distrib; openssl md5 < $(ARCHIVE) > $(ARCHIVE).md5 + +stage: + cd distrib; scp -p $(ARCHIVE) $(ARCHIVE).asc $(ARCHIVE).md5 \ + people.apache.org:public_html/staging_area + +release: + cd distrib; scp -p $(ARCHIVE) $(ARCHIVE).asc $(ARCHIVE).md5 \ + people.apache.org:/www/www.apache.org/dist/lucene/pylucene + +print-%: + @echo $* = $($*) diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..5649439 --- /dev/null +++ b/NOTICE @@ -0,0 +1,7 @@ + +Apache PyLucene + Copyright 2009 The Apache Software Foundation + Copyright (c) 2004-2008 Open Source Applications Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/README b/README new file mode 100644 index 0000000..54c7c71 --- /dev/null +++ b/README @@ -0,0 +1,2 @@ + +Please see doc/documentation/readme.html diff --git a/doc/broken-links.xml b/doc/broken-links.xml new file mode 100644 index 0000000..06cd3d4 --- /dev/null +++ b/doc/broken-links.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/doc/documentation/install.html b/doc/documentation/install.html new file mode 100644 index 0000000..707dbb7 --- /dev/null +++ b/doc/documentation/install.html @@ -0,0 +1,319 @@ + + + + + + + +Installing PyLucene + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

Installing PyLucene

+ + + +

Building PyLucene

+
+

+ PyLucene is completely code-generated by JCC whose sources are + included with the PyLucene sources.
+ +

+ +

Requirements

+

+ To build PyLucene a Java Development Kit (JDK) + and Ant are required; use of the + resulting PyLucene binaries requires only a Java Runtime Environment + (JRE). +

+

+ The setuptools + package is required to build and run PyLucene on Python 2.3.5. With + later versions of Python, setuptools is only required for shared + mode. See JCC's installation + instructions for more information. +

+ +

For the Impatient Ones

+
    + +
  1. +pushd jcc +
  2. + +
  3. <edit setup.py to match your environment>
  4. + +
  5. +python setup.py build +
  6. + +
  7. +sudo python setup.py install +
  8. + +
  9. +popd +
  10. + +
  11. <edit Makefile to match your environment>
  12. + +
  13. +make +
  14. + +
  15. +sudo make install +
  16. + +
  17. +make test (look for failures)
  18. + +
+ +

For the Rest of Us

+

+ Before building PyLucene, JCC must be + built first. See + JCC's installation + instructions for building and installing it. +

+

+ Once JCC is built and installed, PyLucene is built + via make which invokes JCC. See PyLucene's + Makefile for configuration instructions. +

+

+ There are limits to both how many files can fit on the command + line and how large a C++ file the C++ compiler can handle. + By default, JCC generates one large C++ file containing the source + code for all wrapper classes. +

+

+ Using the --files command line argument, this behaviour can be + tuned to workaround various limits, for example: +

+
    + +
  • + to break up the large wrapper class file into about 2 files:
    + +--files 2 + +
  • + +
  • + to break up the large wrapper class file into about 10 files:
    + +--files 10 + +
  • + +
  • + to generate one C++ file per Java class wrapped:
    + +--files separate + +
  • + +
+
+ + +

Notes for Solaris

+
+

+ PyLucene's Makefile is a GNU Makefile. Be sure to + use gmake instead of plain make. +

+

+ Just as when building JCC, Python's distutils must be nudged a bit to + invoke the correct compiler. Sun Studio's C compiler is + called cc while its C++ compiler is + called CC.
+ +

+

+ To build PyLucene, use the following shell command to ensure that + the C++ compiler is used:
+ + + $ CC=CC gmake + + +

+
+ +
+ +
 
+
+ + + diff --git a/doc/documentation/install.pdf b/doc/documentation/install.pdf new file mode 100644 index 0000000..b03c8f0 --- /dev/null +++ b/doc/documentation/install.pdf @@ -0,0 +1,360 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 553 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gau`Pa\K`-&A@6WHu'<%?K*M+MPpYEXPj:%6L2"@d^UPTLqguRdb7.:p/IPL1A:K;hOA)@"fGjbBF#Gd7=`Wu&n!O(#f?E]-oH?KLk.u$aqj2dE*9Y"YH"n"%FGKA'7E'Sqmp*.6nO/W'kLGb1Ie1\g]HIi#K!2F.*LKt@5,2O)T]dD:LpRqXkF4*7g*bj&PNWV-P49<*q?r.%rZgNpE$HZC>+m8JiXOqKOZVb,quYl1nZpU)oXgI5hEYW-ZdF\DtuVRpJon&747Tp\b:F"So8]^?buh^psT+g4o_d'>a!Xm/%NjB10:='_/8#Zicq3]Y_4el39suk6*a@=m8_!ss'(0^dAl*YnEK`@Tn>jLp?s!kApCKPd@o@,Q3=:i[ft,lE%';T?]E#jqHX$P_JtX)fK@?=7oNOR]O?;<`RGLZ!5NiX_9o:'HHdaF4"GJ~> +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +14 0 R +16 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 203.0 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 541.466 190.16 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 523.266 234.488 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 505.066 211.496 493.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 15 0 R +/H /I +>> +endobj +16 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 486.866 191.324 474.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 17 0 R +/H /I +>> +endobj +18 0 obj +<< /Length 2202 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gau0E>Ar7S'Roe[d+]0i@7='.V/@-sUeT6Y6bCje*gSr;2Jm5l-!(n0q=cu#oh^r`FmDks#q;JCG:mF/k-MPgg"2Wd4Q!KUV11OjlCYre4j-ILf(d:tGLk!IM/Bf?Jc?aaJ%qe2;!qGbZ6!Vmc*$38QT*?eqS7J9Z`Wm/35@an(<&>Ec,0gP0MZ[&Osqt??\K'Spi+(.TY``T@WqK;Oj'NhUQ@[LNouV-mEIsK^]FFKLK%?M5^ZR`0$;tSX;lcR!$.sP3jl"\Vd!JhGRX^JKt\F[I!T/d$`YSlY^^p,T5U6,t5CA84\hOl1#rIQC.+abW"?-h_ZB/9sQq!7STmO'(!gA#Qit1`9Fq<,;+3_#(45^6@+;XYCQZX_4CF+KW3\*36t(3/u22]arA0g%e,.=$R(@$CCeZ''K1r`+kX3]6"(5rB_=j^X^XB)d\AT0loG#4G4gu2nNj07_;39AFqHV]W;sSZcVKQ4+[BHueV,3\C>fu*%"r,N9*6TG9XTASb=nQ1U&S=IrQe?bF;un;\USCY6#id$:EI-,LTUd@Jq)B:b]->PM+[L%nR>:BNmSBhQ)0MdS$2n^*lo#!:%)4"ua`OO_>sTNpi8@[_gI%O]V'47]#.sS.Ub>cu!7c48?<;SKoJFFQB\F+eTt\?D(^9C,FpAd"A`[F'`d='tTEfYRPBt/'FJH`:b],`h!&3g4<3h"!iu];>!3,7rn(-ZG7CLStG3r2B^D;`nIS:^tKK%mm-gnL^f&GW#CrH:JQ=9c_L?2ji,UsIom%0d^"hh5<#N4r-3]IdpJ?lspD\gNlPBU3*\r<0-M*oOLNDAK?:)kHI>JDGqP=BV\kusIdVfAW\qqm8mKBUqF;6@]+9u*;cpV5XH]7-)iEe8f"I%(FHYiMS[(_U%\HP6OtGqf`-9&-lWaC-*fr<`%'@cXD;NPo(5q/\ic0KP]qpLd?d4HB(1<=hV'N1,''s)+rp1CVPR2CnY;9H#Rkr+$2f9;:Z]"up5'R0>/g[f3q_;EU&b+`K^'BuJd;!rH%*V;up0mGa_;[0I]$M`3[>gW&S4hXi94FOUYR,;.$0-I)Lp+U%Hp7];S>YPhL1=_)RiR+bf9(#4G>63dh2t!sG5?;MBjE"S.U3d(SKTs6%0D:c!7d8l*8q6bCQ.de&V\sn7ijFg[L'ZhDNY*>g[Pq0/J7^I,)GbJ>ZoGZUAus1mFFNB^g)kLd82(qde3+1e+JV8VM9_PK#Z7$W1f>>XrfKO6TS]:#5!iCp%A_QB;not&`mNq]6P#H0%t'i^]CJISZ2ZaF2`#B(d7?t7sk`+@>mVZ%nV=%!UEaH="!06b#(7f:f<@]XJKPF'NIip\<9Z[/ID"mAU_VV^!TOHUTS^ln=NKuDm'4M9-Qefn(E@#4moG`f8#r&!V3b@/XS7T=c<5X^G<5!Z/4!5m"SYr8Oq_%+X,e?,V0J+'rtVZSa;rrE4m]k.^b$3'S:KY892ILc)\?TMq>RKJ_-NXshHdk8Y*&*K~> +endstream +endobj +19 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 18 0 R +/Annots 20 0 R +>> +endobj +20 0 obj +[ +21 0 R +22 0 R +23 0 R +24 0 R +25 0 R +] +endobj +21 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 356.952 565.213 374.952 553.213 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://ant.apache.org) +/S /URI >> +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 111.66 530.813 160.332 518.813 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://pypi.python.org/pypi/setuptools) +/S /URI >> +/H /I +>> +endobj +23 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 450.804 517.613 504.144 505.613 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 504.413 146.004 492.413 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +25 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 388.164 304.307 500.508 292.307 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +26 0 obj +<< /Length 1056 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GatU2?#S1G'Re<2n,j;m[Mmo*o7=g^MqGrNe"Ge/G]4V2NL%R1EE;Jaj4%dae#!;%W]%O^cM]fEf=mYt+"#htO.q@,hK^il"9h)!J%97lO$nXG<%9`t^3DRMn'*FQPh?;*\OX(N@QtPeTEm6231?qqpPU,PM+dIS:$sT(lp3]5,uKR+8;f1u&rkO%m)?>2nu4G2FnacEQAhDb4Sr'h2PPiX/[u==Us<6$5cZ9F_s1'P!5sn]<GslRdXYeL!K?+)l;U].mCS_2nmjO^ZdZ\Q$OngNfp>=5V+YssAueFRUGsG]C9F)TjsinA+]L)@l%`X7OAiB>"_Kh";h:poHQ!A+jTGGt&]_U-\3(Ys.XfZ55o"+\U9V-FbUT!.D`*BuY,3>oB1%'9)GK:\>DES'%RQN*j3+?HAf6"+P5$T#e:F.+;o%,C_ma+i...;LW9P!t:f);2cLC2TA@_(I9RRMTR?8!nGSI138iuB9M1]KK4s:%f_WC+%&DY"WB7nrLgDB;D$Hre_=2[uO@#!37Sn>qC/>jc(m]=GFZd]0>1sILKRk+fR^eRb)$^D3"50_;*McoSOT-2$jQHjSTWgO_UT1,-Tjj3R6D&$"O$&r^sp=Rl/VI'YEnk`^,ZX2uD3O^3,)"RjFYSsW[8c-@m+".U+b`.VJK4R0dr`nWshXol4/QJD:g9Hf03ijl4*6aG>1aaLG7?TKU0kIV2p!HM_975Kl0Z7rJYccNi:!@JW>6i[+'oI/>YT(#ak5W(U:^mSf\Lk:Mq'b&:B-5A'),ntT`pCPJ\H$7t.Kkb6JaBO/)/]&Ef.U^D=DkYUS'BV-1^*1s7S)<;'??RuhQQ#Lg1*9))d.bS\%41iBeit%!-9'!`\quDCfRJ*=f&WDgi"bVA?5~> +endstream +endobj +27 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 26 0 R +>> +endobj +29 0 obj +<< + /Title (\376\377\0\61\0\40\0\102\0\165\0\151\0\154\0\144\0\151\0\156\0\147\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145) + /Parent 28 0 R + /First 30 0 R + /Last 32 0 R + /Next 33 0 R + /Count -3 + /A 9 0 R +>> endobj +30 0 obj +<< + /Title (\376\377\0\61\0\56\0\61\0\40\0\122\0\145\0\161\0\165\0\151\0\162\0\145\0\155\0\145\0\156\0\164\0\163) + /Parent 29 0 R + /Next 31 0 R + /A 11 0 R +>> endobj +31 0 obj +<< + /Title (\376\377\0\61\0\56\0\62\0\40\0\106\0\157\0\162\0\40\0\164\0\150\0\145\0\40\0\111\0\155\0\160\0\141\0\164\0\151\0\145\0\156\0\164\0\40\0\117\0\156\0\145\0\163) + /Parent 29 0 R + /Prev 30 0 R + /Next 32 0 R + /A 13 0 R +>> endobj +32 0 obj +<< + /Title (\376\377\0\61\0\56\0\63\0\40\0\106\0\157\0\162\0\40\0\164\0\150\0\145\0\40\0\122\0\145\0\163\0\164\0\40\0\157\0\146\0\40\0\125\0\163) + /Parent 29 0 R + /Prev 31 0 R + /A 15 0 R +>> endobj +33 0 obj +<< + /Title (\376\377\0\62\0\40\0\116\0\157\0\164\0\145\0\163\0\40\0\146\0\157\0\162\0\40\0\123\0\157\0\154\0\141\0\162\0\151\0\163) + /Parent 28 0 R + /Prev 29 0 R + /A 17 0 R +>> endobj +34 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +35 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +36 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +37 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +38 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F9 +/BaseFont /Courier +/Encoding /WinAnsiEncoding >> +endobj +39 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 3 +/Kids [6 0 R 19 0 R 27 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 28 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F1 34 0 R /F5 35 0 R /F3 36 0 R /F2 37 0 R /F9 38 0 R /F7 39 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [19 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [19 0 R /XYZ 85.0 593.466 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [19 0 R /XYZ 85.0 481.413 null] +>> +endobj +15 0 obj +<< +/S /GoTo +/D [19 0 R /XYZ 85.0 332.56 null] +>> +endobj +17 0 obj +<< +/S /GoTo +/D [27 0 R /XYZ 85.0 600.2 null] +>> +endobj +28 0 obj +<< + /First 29 0 R + /Last 33 0 R +>> endobj +xref +0 40 +0000000000 65535 f +0000007933 00000 n +0000008005 00000 n +0000008097 00000 n +0000000015 00000 n +0000000071 00000 n +0000000715 00000 n +0000000835 00000 n +0000000888 00000 n +0000008231 00000 n +0000001021 00000 n +0000008294 00000 n +0000001157 00000 n +0000008360 00000 n +0000001294 00000 n +0000008426 00000 n +0000001431 00000 n +0000008491 00000 n +0000001568 00000 n +0000003863 00000 n +0000003986 00000 n +0000004041 00000 n +0000004216 00000 n +0000004407 00000 n +0000004594 00000 n +0000004778 00000 n +0000004965 00000 n +0000006114 00000 n +0000008555 00000 n +0000006222 00000 n +0000006454 00000 n +0000006628 00000 n +0000006873 00000 n +0000007079 00000 n +0000007271 00000 n +0000007379 00000 n +0000007489 00000 n +0000007602 00000 n +0000007718 00000 n +0000007824 00000 n +trailer +<< +/Size 40 +/Root 2 0 R +/Info 4 0 R +>> +startxref +8606 +%%EOF diff --git a/doc/documentation/readme.html b/doc/documentation/readme.html new file mode 100644 index 0000000..85e1d3a --- /dev/null +++ b/doc/documentation/readme.html @@ -0,0 +1,509 @@ + + + + + + + +PyLucene Features + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

PyLucene Features

+ + +
+
Warning
+
+ Before calling any PyLucene API that requires the Java VM, start it by + calling initVM(classpath, ...). More about this function + in here. +
+
+ + +

Installing PyLucene

+
+

+ PyLucene is a Python extension built with + JCC. +

+

+ To build PyLucene, JCC needs to be built first. Sources for JCC are + included with the PyLucene sources. Instructions for building and + installing JCC are here. +

+

+ Instruction for building PyLucene + are here. +

+
+ + +

API documentation

+
+

+ PyLucene is closely tracking Java Lucene releases. It intends to + supports the entire Lucene API. +

+

+ PyLucene also includes a number of Lucene contrib packages: the + Snowball analyzer and stemmers, the highlighter package, analyzers + for other languages than english, regular expression queries, + specialized queries such as 'more like this' and more. +

+

+ This document only covers the pythonic extensions to Lucene offered + by PyLucene as well as some differences between the Java and Python + APIs. For the documentation on Java Lucene APIs, + see here. +

+

+ To help with debugging and to support some Lucene APIs, PyLucene also + exposes some Java runtime APIs. +

+ +

Samples

+

+ The best way to learn PyLucene is to look at the many samples + included with the PyLucene source release or on the web at: +

+ +

+ A large number of samples are shipped with PyLucene. Most notably, + all the samples published in + the Lucene in + Action book that did not depend on a third party Java + library for which there was no obvious Python equivalent were + ported to Python and PyLucene. +

+

+ +Lucene in Action is a great companion to learning + Lucene. Having all the samples available in Python should make it + even easier for Python developers. +

+

+ +Lucene in Action was written by Erik Hatcher and Otis + Gospodnetic, both part of the Java Lucene development team, and is + available from + Manning Publications. +

+ +

Threading support with attachCurrentThread

+

+ Before PyLucene APIs can be used from a thread other than the main + thread that was not created by the Java Runtime, the + attachCurrentThread() method must be called on the + JCCEnv object returned by the initVM() + or getVMEnv() functions. +

+ +

Exception handling with lucene.JavaError

+

+ Java exceptions are caught at the language barrier and reported to + Python by raising a JavaError instance whose args tuple contains the + actual Java Exception instance. +

+ +

Handling Java arrays

+

+ Java arrays are returned to Python in a JArray + wrapper instance that implements the Python sequence protocol. It + is possible to change array elements but not to change the array + size. +

+

+ A few Lucene APIs take array arguments and expect values to be + returned in them. To call such an API and be able to retrieve the + array values after the call, a Java array needs to instantiated + first.
+ For example, accessing termDocs: +

+
+          termDocs = reader.termDocs(Term("isbn", isbn))
+          docs = JArray('int')(1)   # allocate an int[1] array
+          freq = JArray('int')(1)   # allocate an int[1] array
+          if termDocs.read(docs, freq) == 1:
+              bits.set(docs[0])     # access the array's first element
+        
+

+ In addition to 'int', the 'JArray' + function accepts 'object', 'string', + 'bool', 'byte', 'char', + 'double', 'float', 'long' + and 'short' to create an array of the corresponding + type. The JArray('object') constructor takes a second + argument denoting the class of the object elements. This argument + is optional and defaults to Object. +

+

+ To convert a char array to a Python string use a + ''.join(array) construct. +

+

+ Instead of an integer denoting the size of the desired Java array, + a sequence of objects of the expected element type may be passed + in to the array constructor.
+ For example: +

+
+          # creating a Java array of double from the [1.5, 2.5] list
+          JArray('double')([1.5, 2.5])
+        
+

+ All methods that expect an array also accept a sequence of Python + objects of the expected element type. If no values are expected + from the array arguments after the call, it is hence not necessary + to instantiate a Java array to make such calls. +

+

+ See JCC for more + information about handling arrays. +

+ +

Differences between the Java Lucene and PyLucene APIs

+
    + +
  • + The PyLucene API exposes all Java Lucene classes in a flat namespace + in the PyLucene module. For example, the Java import + statement import + org.apache.lucene.index.IndexReader; corresponds to the + Python import statement from lucene import + IndexReader + +
  • + +
  • + Downcasting is a common operation in Java but not a concept in + Python. Because the wrapper objects implementing exactly the + APIs of the declared type of the wrapped object, all classes + implement two class methods called instance_ and cast_ that + verify and cast an instance respectively. +
  • + +
+ +

Pythonic extensions to the Java Lucene APIs

+

+ Java is a very verbose language. Python, on the other hand, offers + many syntactically attractive constructs for iteration, property + access, etc... As the Java Lucene samples from the Lucene in + Action book were ported to Python, PyLucene received a number + of pythonic extensions listed here: +

+
    + +
  • + Iterating search hits is a very common operation. Hits instances + are iterable in Python. Two values are returned for each + iteration, the zero-based number of the document in the Hits + instance and the document instance itself.
    + The Java loop: +
    +              for (int i = 0; i < hits.length(); i++) {
    +                  Document doc = hits.doc(i);
    +                  System.out.println(hits.score(i) + " : " + doc.get("title"));
    +              }
    +            
    + can be written in Python: +
    +             for hit in hits:
    +                 hit = Hit.cast_(hit)
    +                 print hit.getScore(), ':', hit.getDocument['title']
    +             
    + if hit.iterator()'s next() method were declared to return + Hit instead of Object, the above + cast_() call would not be unnecessary.
    + The same java loop can also be written: +
    +              for i xrange(len(hits)):
    +                  print hits.score(i), ':', hits[i]['title']
    +            
    + +
  • + +
  • + Hits instances partially implement the Python 'sequence' + protocol.
    + The Java expressions: +
    +              hits.length()
    +              doc = hits.get(i)
    +            
    + are better written in Python: +
    +              len(hits)
    +              doc = hits[i]
    +            
    + +
  • + +
  • + Document instances have fields whose values can be accessed + through the mapping protocol.
    + The Java expression: +
    +              doc.get("title")
    +            
    + is better written in Python: +
    +              doc['title']
    +            
    + +
  • + +
  • + Document instances can be iterated over for their fields.
    + The Java loop: +
    +              Enumeration fields = doc.getFields();
    +              while (fields.hasMoreElements()) {
    +                  Field field = (Field) fields.nextElement();
    +                  ...
    +              }
    +            
    + is better written in Python: +
    +              for field in doc.getFields():
    +                  field = Field.cast_(field)
    +                  ...
    +            
    + Once JCC heeds Java 1.5 type parameters and once Java Lucene + makes use of them, such casting should become unncessary. +
  • + +
+ +

Extending Java Lucene classes from Python

+

+ Many areas of the Lucene API expect the programmer to provide + their own implementation or specialization of a feature where + the default is inappropriate. For example, text analyzers and + tokenizers are an area where many parameters and environmental + or cultural factors are calling for customization. +

+

+ PyLucene enables this by providing Java extension points listed + below that serve as proxies for Java to call back into the + Python implementations of these customizations. +

+

+ These extension points are simple Java classes that JCC + generates the native C++ implementations for. It is easy to add + more such extensions classes into the 'java' directory of the + PyLucene source tree. +

+

+ To learn more about this topic, please refer to the JCC + documentation. +

+

+ Please refer to the classes in the 'java' tree for currently + available extension points. Examples of uses of these extension + points are to be found in PyLucene's unit tests and Lucene + in + Action samples. +

+
+ +
+ +
 
+
+ + + diff --git a/doc/documentation/readme.pdf b/doc/documentation/readme.pdf new file mode 100644 index 0000000..3d161a0 --- /dev/null +++ b/doc/documentation/readme.pdf @@ -0,0 +1,640 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 793 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gaua=_/c#!&;KX9KoXHPW[&.CA!/n%A9r2BgtEe`&;D&2On8Co"iQ$!7&bh2?gJo>EA0ZCLI%l2q&Y-j^BDq"r)g&phqFHar!Ifq_,@sPgi4ZF_4)tE$UbLT@K>m@iZnpA.)`:GLPaY^dD_dTeY5[\I9`&#QdWgI!2F6jN/.s71%^&]*k&c(/+7Acm"OqgDlDToHlX>Af=0D,oLiVrD`K$9Fj/gLZ;1/De5?a&o-E`0mD<_:PH?BtPGRh4#3cXH-=UR4A)@)Ja4:0^=;rf:l"(HP,@:AKV^]C<2(f$%Ia_r\seFKmo>gHWg`B?;FBS$oIM+jGXF/i-ul.9W1s[R-q"k_FYH@X8A?*1#p23G;Z6EpDWCI$4*d6F_E8'K6_krcZ@s!:H>XqE\EANh5$EOk,b8d1XQpZ+%VgHhttq:k^!4AgIi)-V3@N#R:+YD>B67n*?$74eK3?=m3*a6&leM`P&] +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +14 0 R +16 0 R +18 0 R +20 0 R +22 0 R +24 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 206.324 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 541.466 203.66 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 523.266 164.168 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 505.066 338.456 493.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 15 0 R +/H /I +>> +endobj +16 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 486.866 325.46 474.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 17 0 R +/H /I +>> +endobj +18 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 468.666 224.804 456.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 19 0 R +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 450.466 401.072 438.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 21 0 R +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 432.266 339.476 420.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 23 0 R +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 414.066 335.132 402.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 25 0 R +/H /I +>> +endobj +26 0 obj +<< /Length 2531 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm=99\*g%)226d&kD`8rFDVat:_Wdq"o8[_%_ERN?.1[Q0Hr71[#"?XJV8JWf4VSKtNPdNf*K^kWHUf8k-j4PkLsF%P<,5H"JKPR#7!q"(V/8)9!2:>l$=SPp$UJ#4iTW)H<$HhYS:CR_-]7LEM,CO^b6f^@ahcrb4ZZp3pB2P78?gG\f""+%T)j8P'"#P`'.;Q&-0LP0[1n`9WHln;4J.Qb:jmel*-^@0@"eKj:AL?$TTg#Y&2=f4s^1ZjhX#+'&aBh\HM$HB\`\(L\N]6Yd8?#02@\bDYnF-qk+7b_L*a%=L,c=='>4tl#=0[al&r%&Fo$6G]a7.rGHIXh3r8S!S,^\[ob1oQJ0SjoRSd\p&EpiEh6]f:pao^ThWBi*?S7"Q-LIJ^CGX1mrFI6?.Ef]r][EtB:F`@=gZ`t+f6-)miC6Ia'L?uU'8OV%*Jq'aR]bMtl^fN]Jc'`)4I;Jbs%mY/o'J-CT9qs1"D0%r6e;QDoIen6t24#($>#*a'JJMa!D+RS7:cmObVj%Kfbm>'3%3"K;"oXk"qmkbrZ+J"Ou2+4'O`6ThRXX/^J,`3huF5!7J<:*K=NM?NH/0D=O`3$:n*9rX:QN'X,69U>a:BLVVn"e\S"XG'%U?G)'AIqM@Ln928K1'M:gKodl,2<\528Xl3L(_3ffpWNS^h'^e1/[Ne$`m$M`4%>uX^tNN!O-E?MDF+alD#XL=R;4oLd2W76u>261a!k&_aLM1*aXh]8]G;cq_rFoU1"ZuXX(5tmi97q/aAeqU^UERFMUNYB#UN]hfA]QOL>)aUp8eQp'nQAP1)aE+f)F41V>N0:[JIK$'aXA/]>n8&Z+0QJ'#Mna=V;pr,?m4#.07Yi'U-Y'`S?OC[->pu";9#E@`q#8EBi'@E^cl(lD.bipZH63Q\?;2?>iDaNoK5-S3PtVcQW(W8#2UfI7*-5Pb9Vn]3Cr@kM"(\h@:t67K?Ih$;"!1>W9T0$`)@T_.;@@I:C-HTENM5/Y,-CD>E35J!X>t,Db+3oupJ6rbFfDgu6im9%#?eaI=T+?CQRtH7"tGa!IDlG+kZY5&d_R"VXD3#Aag-5$3Tm=N`tZ7&/@\dL`U[seY3$i@%9&B0RasTRhr7P)*E#lX?f22N1oVnT%.`CC;_i3Or3lZ4Cf^?bN@^@52HGN*PXs>+>]fgR3:[\]K`q85VisD:;tX:&;cNFRV1Q)O+@,A=E&&d9g$lXoJV>F:.TrH2Q@,1BB"m.)^*4N)=1]77VMf78FN%?)`a^@T%97jDU39-NSJdr,/*i+XRDiDEL()\[ko_)K`hRl#YD17=&5SWH5Bb"B*_g=Rdbbos.;a3k!_j.a#Rbe768J[JSj[LYiAZ8j'XgI.Xk3r&I"W=m`l\Jmcq,kLlA*cGQH#R)h_L'lJHqY&XV?!qEPak&cj\^A4c!%![YA!g"bU36NdNlD\ueb?l4qr1cV4F72B\F5m]0/2JB_D/N3+[pm+5uG6'8qo6V391$76;7DF(%NtMp06Y!u<&dRI8!aG0UYMi/.%f27&raT4M0/oBcS.PeWi@%9(*]@XW0?LaKa['cAmDr#leGfSe8G7=-),Z8#`G!d>94OM.2]>>_hpBC>/iX/@WaY%>=pq8"+(Xc9$`\F(Wrs%7Ku"Ho%i^OVKrJ't5XBINKJ'"VBZdBN.LLML%)HjEdGYecnlfkY-VP.->r=t']P,d>#U)gfCg1'kNL"r=e+0(Hl^&D+;4X$"HFb]&o5SC(ie+9J5>"??/K_`HpH(IAeKMT._brl^(I<\^^UHfPHf8rl;V1dC3Knd;!IjF'V1TGod?E'>GE[8Ds09j1SPTtPX8hGJg"9)EN2F-$;oBQ[[8Th][u.Z2C8hQMrbK3rq#!k4p!cTC]V]-\XC$5WoA?(W4EK=tu*5aOLe.ruoOc+i1i"N&asJW*JB04.PmOMkSCT[eWeBM+J;[]/%b%S.?24nEcH_&.ZKU[qE436d@D#=rkL%YopoGp#IX%tEsLr*^cCESKON5^[qaBJX5,'D3\*T$$d*6d'7\4g5 +endstream +endobj +27 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 26 0 R +/Annots 28 0 R +>> +endobj +28 0 obj +[ +29 0 R +30 0 R +31 0 R +32 0 R +33 0 R +34 0 R +35 0 R +36 0 R +] +endobj +29 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 161.448 631.59 175.216 623.59 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/readme.html) +/S /URI >> +/H /I +>> +endobj +30 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 295.668 573.256 316.344 561.256 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/index.html) +/S /URI >> +/H /I +>> +endobj +31 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 411.972 538.856 432.624 526.856 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +32 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 273.636 517.656 294.288 505.656 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../documentation/install.html) +/S /URI >> +/H /I +>> +endobj +33 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 176.64 356.922 197.292 344.922 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/api/index.html) +/S /URI >> +/H /I +>> +endobj +34 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 240.869 420.96 228.869 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/viewcvs.cgi/lucene/pylucene/trunk/samples) +/S /URI >> +/H /I +>> +endobj +35 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 227.669 502.272 215.669 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/viewcvs.cgi/lucene/pylucene/trunk/samples/LuceneInAction) +/S /URI >> +/H /I +>> +endobj +36 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 169.668 191.269 250.992 179.269 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.manning.com/hatcher2) +/S /URI >> +/H /I +>> +endobj +37 0 obj +<< /Length 2707 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!#]95iTT'#*[5&D;\H_!\F6Hj&)Sm)Y3H%H8YM?7[0XS=UTF*;5b99%O"<<:a8;+__8P@:@f[k-1HU$?/96HMbalJO^T*EP&jP)^-=&:p1UuN!-laDNEYPX8g\K.b"jp'l+LhA]=H?3]3=[ok3ae"/qH:b$H(t"lW.b)BY,W<:Nds`[IX(o/M35.C6e0eLubN&#pTk,fOq3r+7^[p78?]HUkPa#cbl.u-f*)OEmP5D$_dEN;bM-.pKr^;Ej5=^b>!"KSM:NFc/OUSo.n'?1O46^eS%3^^7:fP1V7?J>^-,UBP?oV]_L,`@l'V/LG.)K"gH,IS;"uXZt_f))qQg:o\'\CdS0#W_Y"%U&0p[==;*6l/>?BQ2nX"*(T5$#bM_Y_VSc7K(mpTo87G(A/#V*C$Y5'[F6FRlGpqZ0jK3UO3jFE\D#>GFeuNF`",/mibUt#$J.73_&T\.Vim*4<\3P;6(Cm/U^Cu$3e2nO(ADN66(>[ki'oWR;#8]aF^_SNH9$8]CT6jW"u$&hj+1unB$]qKmTuu:B&ReD:":5K3G9,DZ*)=K?DU5bj'[X@^eIWj8_>P#%u^:7]g0(O+5XZ@.IPg5(CKQp!72nFjV2Teb4h&Jn9nsn#P&AE_aQn,(PDn6u(0T!^K@A*X$$ctq;;^_B=PFR1pn'1Sp[g5@RG4Bs%MZ@&Hm5ATBi257&+>*)6PV#`*,,].iLGb\=]9j*WW@U5^;>:k%1uhTdqH2=MPls#kh<,[H\2J*67b(-^P[rK($KieNl\HZji3@c2_LEuZ)Kn'#X89XUGbmclPXbU)JaU/p)#)RWrPCO?d1.Ej19)1@8Ypr"g$&g$a?ZrCE3?i6WA9\MXlhi/iY2NW**ZDg6=?mB>ilXN1Gd%@2$@&*isG+e*1pa.d3WK_*3J+>f_b.)pBB*3)5ej3$I\jYF6,rIZ:1q1Ah2I[[[CMnRV)k8R)e+pQ:%7FGbHWfo'j)cuWU;U1j"C6*tA0+b:KiLoKt`VS,IOS6uA;#o'$e:*r,b`46X`nV*0Gm+7RSgpOkt?PG8g-55nM>-fZPEK&"+A_aol*SqDkC)OG"K?,ujAZ*j%^hTpMXMcp6bht6@iLCGIfLQD%2.4`U#0tJe4gdX,p2&Yr\;-$D,6eSu$r9]-MaOOOaNF^>W*.@5Bun??1,j-"cUiuu'(RkQ;SX5^oV>'9$94gW&HYhjVZ!u#l?P+Q-sAmGF$RLiM!qgn2J2a/cV8Jfq\(7gblWhp1hu)S2=7E7q](X"IaT)0J&`>kX>?UeeLpI[#sBS2OCU[\GDhK2BX@Up%%)83(6VU_b.1?U5Cp7i!Ad:q]<-TQ,4.kdqMh/S-=]?9Yd`9@^/gG<,l*8IDMt/R-)7X1>N[s8hn[Q/IiSH2i:J$GqJ*NSMk\`-0-Z=-5Cn%&J#nkGQEREFAcrsGL]N8 +endstream +endobj +38 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 37 0 R +/Annots 39 0 R +>> +endobj +39 0 obj +[ +40 0 R +] +endobj +40 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 326.616 647.6 432.288 635.6 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.manning.com/hatcher2) +/S /URI >> +/H /I +>> +endobj +41 0 obj +<< /Length 2446 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm=>BA:P'RnB3nF2TO85r6rL\;lV"(=5h9(KtWN7QDE)SFYPQ4p]trq@K0Z9Ij%S%p]dMe,QEVXJoq8A)SuqYg-Omj%*KiU<82`$MQ"L*?KkJUTcn]R[M3k`irP+8p?So$IV<9Z"L-/d_Kt%31^&e;K^n2hEuK;_FX*O!!4<('P"!bk^?ME%$>L[u2K?Z-%bbe7f`qN'tDKgod3/-^T+-lC>1Mb/mhcQ+X/P(M^q>0G<8$,"6!JT]`WA>;XPSM+22/2L+3M=Gae.;5@ZG,8\8hq^iY<[)i%ZYb89t<\#o5pOCn&2!#$]2<1/MU51!^h3R5=1l&dZXPp%k:SG/S]"+dHJ4k,VmQ!::#_a>-9gk],_uNSqOUJ23"^B>UI=$Db:)L:KE5K^C].ch5cO?9>:`1AJVSb8([=;B@KOe$B`=r/;.+J\.&5X;a.G$B?WT32"s4(96s(Gr*2JgdN(J5PC&7!_`/eTthi+h5dsrlj\+adqXb`0Y`@^f/C9kYoZ8+BCPOmb[UcGW;[96q_b!h=.^#3['D0RPsl]g4M,8iS8!VKmio31#$3Te[=%"If,Y;a!OHP-CcogY7g@%\LinTD;KakeXp:u=CX9Ug>o2VT)qIQpVo9>Vhr.m`aG?'5qHiZ#GZ^E3Tn'BP,S#k0%)0N2F1b7]CX$_srKaXEVnmuZApbBba9#n.80OsHa3YU`D@gSi;B=EWY0r4=Zj_bSTIiqNh5s18oa#Kh.8]3`[9,3>(FBFe0$KT]<;f1)76&`ngG0s"**bq[[*Fj[4_0O#AFYMD!fpQm)cFX9L]CtV->V?9g\P^KgRFagOKa`QP+9f@S)$'hN<3ru:G'Hc_/mK>47M)uL/#3`lJ)d-+Tkqe:tclT[a/%S[:gI0)n`qm"36`asgKES%[f'uiutYiAqt1m^1qF'^D5^(bp&626aEPuPU+V7H$^n1rfV@:\s2=]0)E"mMUmG].GIYnne+m9SgGQ+V[56"Q<2CP,`Jr\ROoHQo4CV<1a2;]sVg"#NHFPRnOibNai_:c7&0[Vc'*eCB].^=aQLZ>gt;Z$=LIQYBYKVTVlu6eh(!''E/A7J%sH1Tu_5_4gE"55)V)&l)]9=%E;u<)Kj^7WgtT]\0-W"V;iQDf<9?;tI(GN!=6YMoVj0k[+(!k`_8Yk_!*s4Dre;1Tfh5$e]87D>,p^Pc"`G+*l#.5r&T_>Vbc\R%"`?e6]%B>Jj!d4e3-o]>Kh58$+Z=0jkE/e#=gHNq3Z*]tA_IiBHb(Y0J=%Z`ZE81odr>0TfSVg/44kHiWGq!TW*N9Zm%"siCf^[KHlgX@a+%C=d=BSDN*7&/3X3=H0&ql?5=YgJo\>*2McIcn\OPu5@`8i0EMiQm;"B>`b`p$HfN:)J(hT%iK61W`OnfuZ%8WE_QTSaN!&D3cNTrc[J>8%F[H2\l%&RGq7dDR@i[;I\Y4dW3T.Kdq^SNTgqQIR:]fO;^(T"V;23)cB`T`bAkUi*pWrod:d+\DI9r4TZ5b#\b4L"q)=M2G#W=0(^&R%<`0YH=q/"%*_PnL/5i+a(K`4H[L<*DNJi8l-V)[+XOMQiXBh,QIlhX/@FbQ`/"THA,U:mNg]/c/>k=(=PDJKk[l*7`/S9Uf!h&.FJ/_Jno260;mG9%$=-"G[JITQ@7p=$3=HtUlc1^tWuY8%90-!9,"Y"kc_]%a[K+1l:5q'nK<0-!..LRCcI9AW?&dTsO1k^VW3,h[GDeptp.>oT%:A$2s.*WXN1jTg:/$$[P$nL[RQ*P7KX1#6LO7>Q65G:!3OobB!$mNpQbM0k)F!FU,RV,XQV1GRWF@SeSrU/^TZrBX-\8!-oh+2YA17)4eNMHu)4!^?J8`qgU6@9MTKZ9)SZQ8@c*ki^X(I-]\>bWBTYlSrDDI0^4_P(ec1TpI^6&frco#8!Rk%VXG=1EC/Sh'.%N=.<)N.4Pd(,pXqu>W;UTX~> +endstream +endobj +42 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 41 0 R +/Annots 43 0 R +>> +endobj +43 0 obj +[ +44 0 R +] +endobj +44 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 603.34 131.004 591.34 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/readme.html) +/S /URI >> +/H /I +>> +endobj +45 0 obj +<< /Length 1732 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!#\h/D%+&:aF]+nR&ed'=iEMGf3mZrthZ=Ku\c>9"'Q9NshpdFF!cBA!6uP=/P$>E4mk%n2T4P^'jEhs^!#&(d#'lcHUBFVaKDE>#k$FaW)%^RhC=^Cd!"5]A+&cBlYFP7L.e4PqH$GIpLq:V+D_M54C^qGY6Ea2o.CiKo%BF=,KHH4?Cp)L+e8%r$98)*5.UI6F44NEn17:kseuG2E\C!U"dngmlYaIce<5h0S[,dXSsV^tHKa(T=BcN=f7m;]0haiaUI>*'G\al0%+HM4q"F7Sb[ol/e_EdMk*-=s2QDQTM>QXDRah<8shi<@154Do79)&$3TYpWqs74"YAQ1u*DJT<)@YHss&=esH?Q-.9-:c:e;\\G;&S\KEO@6*PP+7*6@Z$m*.*9tbtVmrPEMIr=qb1:_Tp)8I.eqqI0fP"(i<[4?]7,mJ>N(Q^gk[^R9q)'`>TGS:rR7fGmVg&db+c$BE/j\O=g)nH=Faj-:DbK_`sQ?E:HM_NDu[Er5;T![WPl@/U[0\l>_1OtB.[kQQQ_SPP;feru&si&R@ob7O4RRt9RkB&lE7pG\`s!^)f#imZ+pqaMRT=6_hmUPkf0p)Hb@X9#Z$mTCZ.rhsGH9^U:/]o?0^XH0X5YoD3Z%1Jcl?(bWS@19kj67(fkqL>N7bC]eECVY'q]A\aRjmp%2a0+l==]mY7\&FK%=?;4k^[-s,NhBKs^$ZO&QWl!:@6_PSXu3,X='jc$*tfhFU;%4T_GL=dd*+^pC:1SD`ND&uhW'a22Vqp"SLLE^&ATjWfUk0_opLq?%'6,5:mnlHreqVE?+f,m"gIbmGHmF0f%b83%;`)(ncRSV)!5Y9KMH>4bm$#V/"RR/*D`/RS7qU5IsX_T&MsIcl)!gnt*i`F;cB1CW*\/>Lq)>e.#crEZITa=9r,8iiZ8@`)n>TdR%mWsi.M.pccpM1?IQMK\EHfs*>_8u?$/0t#7g)FYncjJ:s;ticRM9u/XYEqEA5jsrHNhbgL/mdd@mTZN-#HCkn7'pdL>!9PF3I_j6gXLE!>RqKsp4SJCjW)Knjicihft2NH!'7*kHeXi\F-]>cY0IQ^bThkPB^i1Hef>MFWT@N>JJLgq[JA"pfmQ95L05=1)h"=XA/+b\NJ]*ZD&>primJ4.c`mTJU"uQ95RK"Ejmn4#Ic`qR[`_k%O#,ffI5Letaca_+$%I&@!_*Ds&T"c(:oCata4!mlb@-\ofc4j0(@(9Vj#.oGN=m9k5uW,=&7eEipZc=?a9KM4.32N_X[qAa#7AFrJn?W(eC]\,R*c!L'+jrtuH20OHej#KGN2SX\.V:DRr6F;Zp1#8=IO1"Qm3Q(8ZE`VYpXnG7&Z0CN_KgF&>bb%;+R#nfn[IaR@3crq5H?gARZ_#FL\XH<_~> +endstream +endobj +46 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 45 0 R +>> +endobj +47 0 obj +<< /Length 1017 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gat=*9iKe#&A@sBkQVGa=LR=P,@l'K&'A5>clZO=VY:)Nm8M^,-"s\P9kT2QsWr&+mF9dS(#PtLWOaNlC,%#ZoY,K8aXJBF+/jeZd%:Cnj%%L0e1=>!c/J;cs.!/_N+D.EnXL_(\Tf]b8(>Agkahsafnm/V+Ed`J0WOUeD5(=FbMrkKq+O2"&'-GfSZ_g"k=U1-a*DL`2,T?V2V1-Ujp]5N>G[:'3m2`j6r,aX6F*QT69d.@q)kVL!Wh9)/>9fJFkJE\u/;5>Yl'7-Z;<-c@o-bkV5-T5:qOr%76>rIN-%LK97$(*M1ko4dAUR2u8H@'2JQoeAnf>VmAiK,SO1/W3E?D'fuSUuY0$VjlVKlFbUimT*s2)L]4b?:[F'A:Xd=&+Ton6%ibFTL(h;BnDr(A%tb4LZJl&oPG56^d8a[kcT>V\:)CYi68R2!SB+;s/EeeXkmcl[bjTi&>V,k/)W@-[]J17s,O2^L@"&V[F7+G@k"*p,JiKm3/$-jHQV'J:rTt4^8Vpt&e8u(!cg2A$(]>+;m=NrOhJDea04[R*3kPL)S"Q5-aTm%jmn8aEE<<_hkBe$EF9WP,(@992Mf9PcE_\Bo#gmu +endstream +endobj +48 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 47 0 R +/Annots 49 0 R +>> +endobj +49 0 obj +[ +50 0 R +51 0 R +] +endobj +50 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 353.292 557.6 424.62 545.6 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/readme.html) +/S /URI >> +/H /I +>> +endobj +51 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 124.332 510.0 162.996 498.0 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/viewcvs.cgi/lucene/pylucene/trunk/samples/LuceneInAction) +/S /URI >> +/H /I +>> +endobj +53 0 obj +<< + /Title (\376\377\0\61\0\40\0\111\0\156\0\163\0\164\0\141\0\154\0\154\0\151\0\156\0\147\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145) + /Parent 52 0 R + /Next 54 0 R + /A 9 0 R +>> endobj +54 0 obj +<< + /Title (\376\377\0\62\0\40\0\101\0\120\0\111\0\40\0\144\0\157\0\143\0\165\0\155\0\145\0\156\0\164\0\141\0\164\0\151\0\157\0\156) + /Parent 52 0 R + /First 55 0 R + /Last 61 0 R + /Prev 53 0 R + /Count -7 + /A 11 0 R +>> endobj +55 0 obj +<< + /Title (\376\377\0\62\0\56\0\61\0\40\0\123\0\141\0\155\0\160\0\154\0\145\0\163) + /Parent 54 0 R + /Next 56 0 R + /A 13 0 R +>> endobj +56 0 obj +<< + /Title (\376\377\0\62\0\56\0\62\0\40\0\124\0\150\0\162\0\145\0\141\0\144\0\151\0\156\0\147\0\40\0\163\0\165\0\160\0\160\0\157\0\162\0\164\0\40\0\167\0\151\0\164\0\150\0\40\0\141\0\164\0\164\0\141\0\143\0\150\0\103\0\165\0\162\0\162\0\145\0\156\0\164\0\124\0\150\0\162\0\145\0\141\0\144) + /Parent 54 0 R + /Prev 55 0 R + /Next 57 0 R + /A 15 0 R +>> endobj +57 0 obj +<< + /Title (\376\377\0\62\0\56\0\63\0\40\0\105\0\170\0\143\0\145\0\160\0\164\0\151\0\157\0\156\0\40\0\150\0\141\0\156\0\144\0\154\0\151\0\156\0\147\0\40\0\167\0\151\0\164\0\150\0\40\0\154\0\165\0\143\0\145\0\156\0\145\0\56\0\112\0\141\0\166\0\141\0\105\0\162\0\162\0\157\0\162) + /Parent 54 0 R + /Prev 56 0 R + /Next 58 0 R + /A 17 0 R +>> endobj +58 0 obj +<< + /Title (\376\377\0\62\0\56\0\64\0\40\0\110\0\141\0\156\0\144\0\154\0\151\0\156\0\147\0\40\0\112\0\141\0\166\0\141\0\40\0\141\0\162\0\162\0\141\0\171\0\163) + /Parent 54 0 R + /Prev 57 0 R + /Next 59 0 R + /A 19 0 R +>> endobj +59 0 obj +<< + /Title (\376\377\0\62\0\56\0\65\0\40\0\104\0\151\0\146\0\146\0\145\0\162\0\145\0\156\0\143\0\145\0\163\0\40\0\142\0\145\0\164\0\167\0\145\0\145\0\156\0\40\0\164\0\150\0\145\0\40\0\112\0\141\0\166\0\141\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\141\0\156\0\144\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\101\0\120\0\111\0\163) + /Parent 54 0 R + /Prev 58 0 R + /Next 60 0 R + /A 21 0 R +>> endobj +60 0 obj +<< + /Title (\376\377\0\62\0\56\0\66\0\40\0\120\0\171\0\164\0\150\0\157\0\156\0\151\0\143\0\40\0\145\0\170\0\164\0\145\0\156\0\163\0\151\0\157\0\156\0\163\0\40\0\164\0\157\0\40\0\164\0\150\0\145\0\40\0\112\0\141\0\166\0\141\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\101\0\120\0\111\0\163) + /Parent 54 0 R + /Prev 59 0 R + /Next 61 0 R + /A 23 0 R +>> endobj +61 0 obj +<< + /Title (\376\377\0\62\0\56\0\67\0\40\0\105\0\170\0\164\0\145\0\156\0\144\0\151\0\156\0\147\0\40\0\112\0\141\0\166\0\141\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\143\0\154\0\141\0\163\0\163\0\145\0\163\0\40\0\146\0\162\0\157\0\155\0\40\0\120\0\171\0\164\0\150\0\157\0\156) + /Parent 54 0 R + /Prev 60 0 R + /A 25 0 R +>> endobj +62 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +63 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +64 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +65 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +66 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F9 +/BaseFont /Courier +/Encoding /WinAnsiEncoding >> +endobj +67 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F6 +/BaseFont /Times-Italic +/Encoding /WinAnsiEncoding >> +endobj +68 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 6 +/Kids [6 0 R 27 0 R 38 0 R 42 0 R 46 0 R 48 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 52 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F1 62 0 R /F5 63 0 R /F3 64 0 R /F2 65 0 R /F9 66 0 R /F6 67 0 R /F7 68 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [27 0 R /XYZ 85.0 602.59 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [27 0 R /XYZ 85.0 494.656 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [27 0 R /XYZ 85.0 299.522 null] +>> +endobj +15 0 obj +<< +/S /GoTo +/D [38 0 R /XYZ 85.0 624.6 null] +>> +endobj +17 0 obj +<< +/S /GoTo +/D [38 0 R /XYZ 85.0 546.947 null] +>> +endobj +19 0 obj +<< +/S /GoTo +/D [38 0 R /XYZ 85.0 482.494 null] +>> +endobj +21 0 obj +<< +/S /GoTo +/D [42 0 R /XYZ 85.0 580.34 null] +>> +endobj +23 0 obj +<< +/S /GoTo +/D [42 0 R /XYZ 85.0 438.687 null] +>> +endobj +25 0 obj +<< +/S /GoTo +/D [46 0 R /XYZ 85.0 198.8 null] +>> +endobj +52 0 obj +<< + /First 53 0 R + /Last 54 0 R +>> endobj +xref +0 69 +0000000000 65535 f +0000019820 00000 n +0000019913 00000 n +0000020005 00000 n +0000000015 00000 n +0000000071 00000 n +0000000955 00000 n +0000001075 00000 n +0000001156 00000 n +0000020150 00000 n +0000001291 00000 n +0000020214 00000 n +0000001427 00000 n +0000020280 00000 n +0000001564 00000 n +0000020346 00000 n +0000001701 00000 n +0000020410 00000 n +0000001837 00000 n +0000020476 00000 n +0000001974 00000 n +0000020542 00000 n +0000002111 00000 n +0000020607 00000 n +0000002248 00000 n +0000020673 00000 n +0000002385 00000 n +0000005009 00000 n +0000005132 00000 n +0000005208 00000 n +0000005392 00000 n +0000005563 00000 n +0000005750 00000 n +0000005933 00000 n +0000006135 00000 n +0000006349 00000 n +0000006579 00000 n +0000006764 00000 n +0000009564 00000 n +0000009687 00000 n +0000009714 00000 n +0000009895 00000 n +0000012434 00000 n +0000012557 00000 n +0000012584 00000 n +0000012768 00000 n +0000014593 00000 n +0000014701 00000 n +0000015811 00000 n +0000015934 00000 n +0000015968 00000 n +0000016149 00000 n +0000020737 00000 n +0000016377 00000 n +0000016581 00000 n +0000016814 00000 n +0000016958 00000 n +0000017323 00000 n +0000017675 00000 n +0000017909 00000 n +0000018336 00000 n +0000018704 00000 n +0000019047 00000 n +0000019155 00000 n +0000019265 00000 n +0000019378 00000 n +0000019494 00000 n +0000019600 00000 n +0000019711 00000 n +trailer +<< +/Size 69 +/Root 2 0 R +/Info 4 0 R +>> +startxref +20788 +%%EOF diff --git a/doc/images/built-with-forrest-button.png b/doc/images/built-with-forrest-button.png new file mode 100644 index 0000000..4a787ab Binary files /dev/null and b/doc/images/built-with-forrest-button.png differ diff --git a/doc/images/instruction_arrow.png b/doc/images/instruction_arrow.png new file mode 100644 index 0000000..0fbc724 Binary files /dev/null and b/doc/images/instruction_arrow.png differ diff --git a/doc/images/lucene_green_150.gif b/doc/images/lucene_green_150.gif new file mode 100644 index 0000000..4948017 Binary files /dev/null and b/doc/images/lucene_green_150.gif differ diff --git a/doc/images/project.png b/doc/images/project.png new file mode 100644 index 0000000..178afbb Binary files /dev/null and b/doc/images/project.png differ diff --git a/doc/index.html b/doc/index.html new file mode 100644 index 0000000..f99f0cf --- /dev/null +++ b/doc/index.html @@ -0,0 +1,400 @@ + + + + + + + +Welcome to PyLucene + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

Welcome to PyLucene

+ + + +

What is PyLucene ?

+
+

+ PyLucene is a Python extension + for accessing + Java Lucene. Its goal + is to allow you to use Lucene's text indexing and searching + capabilities from Python. It is API compatible with the latest + version of Java Lucene, version 3.2 as of June 9th, 2011. +

+

+ PyLucene is not a Lucene port but a Python wrapper around + Java Lucene. PyLucene embeds a Java VM with Lucene into a Python + process. The PyLucene Python extension, a Python module called + lucene, is machine-generated by JCC. +

+

+ PyLucene is built with JCC, a C++ + code generator that makes it possible to call into Java classes from + Python via Java's Native Invocation Interface (JNI). Sources for JCC + are included with the PyLucene sources. +

+

+ See here for more + information and documentation about PyLucene. +

+
+ + +

Requirements

+
+

+ PyLucene is supported on Mac OS X, Linux, Solaris and Windows. +

+

+ PyLucene requires Python version 2.x (x >= 3.5) and Java version 1.x + (x >= 4). Building PyLucene requires GNU Make, a recent version + of Ant capable of building + Java Lucene and a C++ + compiler. Use + of setuptools + is recommended. +

+

+ See the JCC installation + instructions for more information about building JCC from sources. +

+

+ See the PyLucene installation + instructions for more information about building PyLucene from + sources. +

+
+ + +

News

+
+ +

23 Jul 2011 - PyLucene 3.3-3 available

+

+ This release tracks Lucene Core's recent 3.3 release. +

+

+ See PyLucene 3.3 CHANGES and JCC 2.10 CHANGES for details.
+ +

+

+ Source distributions are available here. +

+ +

09 Jun 2011 - PyLucene 3.2.0-1 available

+

+ This release tracks Lucene Core's recent 3.2 release. +

+

+ See PyLucene 3.2.0 CHANGES and JCC 2.9 CHANGES for details.
+ +

+

+ Source distributions are available here. +

+ +

04 Apr 2011 - PyLucene 3.1.0-1 available

+

+ This release tracks Lucene Core's recent 3.1 release. +

+

+ See PyLucene 3.1.0 CHANGES and JCC 2.8 CHANGES for details.
+ +

+

+ Source distributions are available here. +

+ +

16 Dec 2010 - PyLucene 3.0.3-1 and 2.9.4-1 available

+

+ These releases track Lucene Java's recent 2.9.4 and 3.0.3 releases. +

+

+ See PyLucene 3.0.3 CHANGES and JCC 2.7 CHANGES for details.
+ See PyLucene 2.9.4 CHANGES for details. +

+

+ Source distributions are available here. +

+ +

02 Jul 2010 - PyLucene 3.0.2-1 and 2.9.3-1 available

+

+ These releases track Lucene Java's recent 2.9.3 and 3.0.2 releases. +

+

+ See PyLucene 3.0.2 CHANGES and JCC 2.6 CHANGES for details.
+ See PyLucene 2.9.3 CHANGES for details. +

+

+ Source distributions are available here. +

+ +

03 Mar 2010 - PyLucene 3.0.1-1 and 2.9.2-1 available

+

+ These releases track Lucene Java's recent 2.9.2 and 3.0.1 releases. +

+

+ See PyLucene 3.0.1 CHANGES and JCC 2.5.1 CHANGES for details.
+ See PyLucene 2.9.2 CHANGES for details. +

+

+ Source distributions are available here. +

+ +

08 Dec 2009 - PyLucene 3.0.0-1 available

+

+ This release tracks Lucene Java's recent 3.0.0 release. +

+

+ See PyLucene CHANGES and JCC CHANGES for details. +

+

+ A source distribution is available here. +

+ +

10 Nov 2009 - PyLucene 2.9.1-1 available

+

+ This release tracks Lucene Java's recent 2.9.1 release. +

+

+ See PyLucene CHANGES and JCC CHANGES for details. +

+

+ A source distribution is available here. +

+ +

13 Oct 2009 - PyLucene 2.9.0-1 available

+

+ This release tracks Lucene Java's recent 2.9.0 release. +

+

+ See PyLucene CHANGES and JCC CHANGES for details. +

+

+ A source distribution is available here. +

+ +

03 Apr 2009 - PyLucene 2.4.1-1 available

+

+ This release tracks Lucene Java's recent 2.4.1 release. +

+

+ See PyLucene CHANGES and JCC CHANGES for details. +

+

+ A source distribution is available here. +

+ +

08 Jan 2009 - PyLucene joins Apache Lucene as a Subproject

+

+ The Lucene PMC is pleased to announce the arrival of PyLucene as a + Lucene subproject. PyLucene was previously hosted at the Open + Source Applications Foundation since its inception in early 2004. +

+
+ +
+ +
 
+
+ + + diff --git a/doc/index.pdf b/doc/index.pdf new file mode 100644 index 0000000..bf03a02 --- /dev/null +++ b/doc/index.pdf @@ -0,0 +1,1252 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 932 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gaua?9lldX&;KZO$6>[i]$YVu*'!(Zi$NkCQ`I!L%uSl^5fpN`W3I0k;B;(>)JH>THP/[sq4:^41fO7Z@'"Pn]`O'8S`H2]o0WQP7(?>_gm*4QTgsYH[_e_7?OQiB7BfPc3#6rk^n*"lb9rkRTTIPt9Nq1`j>5^"a&j;oRIqi:65kEqD@AQgdZ4Xc'DA`G]ESlc`5<+T;>+35ESLoj#XCcZ!EOY7^n(Pfr0ipu"/:EufR`XL7UeJ$*eck86SBKo:SJZK$Ym$bcS783L.tCI;,C4#KK/Ni!VS3%/rGFtT\-?=D[aT?O`Hd:_U$0ea.3^\IO;k%Ji`ao5W%QC[frq%mhE"Th6rBh8UR7Ft,hVVsDg?+FhutMXk@qXHPM9Q7D]maj>Cac?bu8#>]'e\RqQ;Ae[8#8X,jY*%8L`atI85CkqS6DZX!n?pBKe6F/EY\9;5qr*p^s%MK0JUo&fe/Q88uk`Wt6e<@T-SbY)gQX;n9PD<:5#2Rn;m74)E.i=b-MT*1(8b=j3qeZikTuMJYOMX(@E'nChX=#5-aj0\.1K53Nl-U&P9kT4_C~> +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +14 0 R +16 0 R +18 0 R +20 0 R +22 0 R +24 0 R +26 0 R +28 0 R +30 0 R +32 0 R +34 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 206.312 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 541.466 176.66 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 523.266 137.324 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 505.066 311.804 493.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 15 0 R +/H /I +>> +endobj +16 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 486.866 323.468 474.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 17 0 R +/H /I +>> +endobj +18 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 468.666 325.46 456.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 19 0 R +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 450.466 383.444 438.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 21 0 R +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 432.266 378.128 420.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 23 0 R +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 414.066 384.116 402.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 25 0 R +/H /I +>> +endobj +26 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 395.866 326.12 383.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 27 0 R +/H /I +>> +endobj +28 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 377.666 327.464 365.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 29 0 R +/H /I +>> +endobj +30 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 359.466 324.128 347.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 31 0 R +/H /I +>> +endobj +32 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 341.266 330.46 329.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 33 0 R +/H /I +>> +endobj +34 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 323.066 425.104 311.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 35 0 R +/H /I +>> +endobj +36 0 obj +<< /Length 2507 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GauHM>Ar7S'Roe[cs'_]@E".B&)5G8D3n_T/kOXqlQ0T+M1s*DcX#^])i<.O#kNd_-snC>jn5uSl5.'3FfIaV]U,&WCKOR5QCTQVeRFe;U>:1bk^#(Ic'oeCDH;AbIA[gn+X9dapr6Vk>9`sAt$[e&OBpW=2NJ0PNBmXbsiI;Ek'Xun)M#/LH,$bc\2"J#F%X9'Sj%$D.FI;O[m*0]bZmR7>Z&(S&M?i4(X0%:"/H8&&2_AI9fipk*RIRL<<;&oFn;3JrNG`Cup9]f_D]$K0mTRAnQM+M3%0f8Wtol8Erp@m;_*cj;UChHi<6]$R@ae+6,+ZVl5.7PB)FphcUun,*079_eEAu!bo91\QN%fc*3#8;%ouH1OD,j-r@m9cjHD\W8&PQbbYY-/1q'qKStl\+[b+GXOlKBQD90"b.aol%gGY5jq5>N$3-qFU'$r-VDbVKDMaQi)^S9&*eTSKQaZhc3oMkGi]IC?7sV/P;AbAFlWjkFGOpBkGZcSpNQ(mAc!K"4]ZK2"#cKscul8XF)!+>JXMKTa(KMUHQpDQdI1pS]lT_UoF.;'X/sH&L/BpNFe1E&=i"H5;3.6h@?X_r>e-1"0n%?*`Y`K:M]hB&l:T2J/k+Tbgk_UeDEt)/9;6mJLeICb[)/BXD8_'Ih?_^`NUZ>AgqZGTE^**XdI"ErVV93\3F&o9E;/SU7Z82^2dS,LcsXV0bhPmM#`7lQ`d*:GlZ`0$8ogod.4.Wqi"69G\9;VT+CYZA(4Zt/]*li8>u;VB-cS$#A,C>[d=Yf/o]JFNo)+^lWeVDcd%kZ!:j\_OO21"cqhZeG4ai/))kJuS1TiD6<#ALW7NV'p7YC0E_o0[ooPDBNZepfXcp\:pJL[js_ea,>3Xu1OJ?>FVJp3P'E5"M/HV_^/#^VGS:>T%r\86,g0*'2R_?/=g4L=\&dd0:!]i#W*0.^E`?&ci.K\((BgRGT^C>Ld^HlaTJ++g-i)8UBE)VH@c/Y3%72WX4o\>3j1HaBPYWcTG*rU`?YB`FP#uN!Orhg.[ORq,6'FP<9Tf65<44eHtHAY)i4'$_LP:9I7XMps:C+o>ucIZuZPg.l31qEeUZFl4HUKcK9"$&Eknq#'tHU2I1b]=O=7Mn2`d]:AcDkQjIHn,&5oChO1JnPqh]uAKGbRUBO#Sc_9!`R(^ZcrP$g!72AZHHRMs0MG?!OYbC:p;7OEmIODGor5Vm*n(q=&&>"Z:=L;6Fe5>i-5e)^[J:Y9;(uZnITVWd-dfaQe!gJFU?Kk#!@)K]JqH4EDZ2I^6/-)G,H.)LZQ%,71_[R_TTq,;IID'c#p:9Y$Em\p&^MdZi^hbB.crFgr)Ut^t1"qupZUiYI9.c+#Z<>O5b)P#,U4/K)D7MrSrI/3Y[<4ft[1gke]m-pV>Oi.j)ElooQM4kRLI?AQLHP>lac16q55`T9%'B,[T$iYLS%jc*Wl-;o9A2*8E4^r(s/@$F8(o]e$-8Bmg2T&m8inqYHFt7;bId%cq3F%&@N7&[!TF@>$(rIjCTI!#9]<]j#=6$j"?u.CiqoF_Q/8r`Es!%s"m?T]WMJ?l8Y\Wle,p!f\HDSV0^)\_Hu&"[a9Uug8VcN7!%K!P#qL4IcV^4![C#Ke4+>Gn+96U`9n^l^4G;~> +endstream +endobj +37 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 36 0 R +/Annots 38 0 R +>> +endobj +38 0 obj +[ +39 0 R +40 0 R +41 0 R +42 0 R +43 0 R +44 0 R +45 0 R +46 0 R +47 0 R +48 0 R +49 0 R +50 0 R +51 0 R +52 0 R +] +endobj +39 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 160.32 629.666 194.328 617.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.python.org) +/S /URI >> +/H /I +>> +endobj +40 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 336.624 629.666 371.94 617.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/) +/S /URI >> +/H /I +>> +endobj +41 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 201.336 534.466 222.012 522.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (jcc/index.html) +/S /URI >> +/H /I +>> +endobj +42 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 486.866 130.98 474.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (documentation/readme.html) +/S /URI >> +/H /I +>> +endobj +43 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 336.924 400.132 354.924 388.132 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://ant.apache.org) +/S /URI >> +/H /I +>> +endobj +44 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 477.9 400.132 513.216 388.132 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/) +/S /URI >> +/H /I +>> +endobj +45 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 206.184 386.932 254.856 374.932 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://pypi.python.org/pypi/setuptools) +/S /URI >> +/H /I +>> +endobj +46 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 151.668 365.732 264.012 353.732 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +47 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 178.98 344.532 291.324 332.532 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (documentation/install.html) +/S /URI >> +/H /I +>> +endobj +48 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 304.092 248.945 337.404 236.945 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/#1+July+2011+-+Lucene+Core+3.3+and+Solr+3.3+Available) +/S /URI >> +/H /I +>> +endobj +49 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 227.745 235.98 215.745 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_3/CHANGES) +/S /URI >> +/H /I +>> +endobj +50 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 259.308 227.745 363.648 215.745 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/trunk/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +51 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 253.308 206.545 273.96 194.545 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +52 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 304.092 155.292 337.404 143.292 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#4+June+2011+-+Lucene+Core+3.2) +/S /URI >> +/H /I +>> +endobj +53 0 obj +<< /Length 2510 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GauHO9lCt0'YNa5m&eCG%)7"39\^m^ck^J8TSKr\`KhO>ZB`RUK)GM[?'s0Dn>\DhcI"="d0OUB"rSc3l^+hcEC8j.\cDClYN9S6rVd0hfaK4454SCGQ2dJmjs']f)>-I>;8U(hucYc7)JTNNi@R8VWX(Y8'.G#mW3Sa@9rR*\sj:!(BjThQ_,?Q2Kr=LeiOVC'=5qTR5):,.8Yop9?3P%fF&Z`-5LFpphF")q`.bN%3M!aG&07SEIqTSKK#Ct"S[RIOSGF+__E=L/A4ou_^bTNC85j3ebhjpA$5SFD'2/F0&TLDatgBjmH>KgX@;FHfaP9HFh*Ka#7fsC`_^s1H9io,[@WeW77EIH7,X53c#GJY^FH$6`D=Pmr`M(=Tt6Wc6I2Kk/YM[_/K\UW8h]RP==g"kJ>iUn8:=au%[lhB/h[HC07F!<$kp;-KT(d/C/*O`Kb@f'T?h]06:^-.oW,'%3J!u3&^Oc3$('X#c8R/OpB/U#/O,MFY)PCtCtBlr5e@5U+&BZc-?g^B/c@10%C5JR,P*ZV):Khj)T`uqd=6kDiO#qJ-)aD-qO7*4:=7135KEO1K;U'=5W@WipX+)]HAfPe3fofZdWY[:c8M^G7$:Nu]\%oEH(o7q!ZfddCkDg81+ckTpjpIk/-"8=#tkA'F:f/0\jn>^a>4T4KO;Tb0$FCV.ndSHq50'Pl0gc,A;FCU!j*&jgfbUPDLu5/YK9W$eoK,*_N$XchI(PN7X(L"Fl':Y!R2q&SsGXEo!BRN)m0q?"b\@%Gn+A'4%TV?=W"+no=564E`hAL1[:N5"k#.8>@R+i!7p=\1!j-QcK^9[:Mk"j0eA?KK[RKKD&`]_NSTYK\?;Gc9=7"_Tb]^-WCl<0Pbq?UgU?/JbjTpMl#dis\#h1kNold29_9Io8)+kKD:ATR3@jKMm2D#2P?6gI^Wuf!!o4WHXRnIuNV[]r*-G.1$iC98H",#1I7R6ZK3JX,iNR5OLmTbYPft`Q;Ke1CO_CBA$<]9(U?8d[Wk=W\$//rhWrP9)3bVtT6P/I+(gL,)k+)$L^&_lCa(eoG$(^Qn@ca#\<8d^8OHd>UbY@T[c5*+I%Ne)R4#)]i+-k*JIX>L3>IM=R1rF2F%Lk32M"%>Ec$"QC3oHPqB0NrKc%#jOI!j0snK^:NRMk&7;eAA2&[RI4Z&`]_NSTYKh?;Gc9=7#Rlb]^-WCl=8d]/)k@ph.,gpX.2Q&b!p('06aSF#^\]Dg`-9BM_a5f#Gulo*Ldjp,(!@)R4K#"DO`1'WcRcf$#fe]0pm,[iCWH'ZH)&4N!of))Y2JS[%Fo!TCV#>O,)[`1Sh_i)(ah_,3)Kc$3UeXd.f]OsV5fan8a84sAMQ(bdguU#^SJhpGS(RgiZ#`(1*HN4@@lOQ>,@`JXX^C>N)4a/9M7odAs?o;?ZQ-NQ/q'KBMAkAdW,>$o$L+LUnpne`_6`6VA4?@oHPr&*._.j0g^$jR75E#EG.)`3&BVWUgb;U?B@*@D-JiD+W!^..0ZHBD/TSG+0dR%n2"bP`_L=3+#*8cip?Sm2Q04)o^V^E]QRaPa)CK-/Y[nos!u!PQQgU;!5)DE]g)#PlPNkLp5E/qK+6nT<+n(TK"J*a2o%L9pYWd?IJgCc\c+;I&>V)6OhW')Khl$@m3d/1%3Jo*>[QF8B=k4"cd"32aID#Gh;rs)<.`9$g[JXg^.4gS?$cNbF, +endstream +endobj +54 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 53 0 R +/Annots 55 0 R +>> +endobj +55 0 obj +[ +56 0 R +57 0 R +58 0 R +59 0 R +60 0 R +61 0 R +62 0 R +63 0 R +64 0 R +65 0 R +66 0 R +67 0 R +68 0 R +69 0 R +70 0 R +71 0 R +72 0 R +73 0 R +74 0 R +75 0 R +76 0 R +77 0 R +78 0 R +79 0 R +80 0 R +] +endobj +56 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 660.8 244.98 648.8 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_2_0/CHANGES) +/S /URI >> +/H /I +>> +endobj +57 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 268.308 660.8 366.648 648.8 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/trunk/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +58 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 253.308 639.6 273.96 627.6 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +59 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 304.092 588.347 337.404 576.347 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#31+March+2011+-+Lucene+Core+3.1) +/S /URI >> +/H /I +>> +endobj +60 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 567.147 244.98 555.147 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_1_0/CHANGES) +/S /URI >> +/H /I +>> +endobj +61 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 268.308 567.147 366.648 555.147 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/trunk/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +62 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 253.308 545.947 273.96 533.947 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +63 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 365.736 494.694 403.716 482.694 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#3+December+2010+-+Lucene+Java+3.0.3+and+2.9.4+available) +/S /URI >> +/H /I +>> +endobj +64 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 473.494 244.98 461.494 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_0_3/CHANGES) +/S /URI >> +/H /I +>> +endobj +65 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 268.308 473.494 366.648 461.494 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/trunk/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +66 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 460.294 244.98 448.294 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_9_4/CHANGES) +/S /URI >> +/H /I +>> +endobj +67 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 253.308 439.094 273.96 427.094 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +68 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 365.736 387.841 403.716 375.841 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#18+June+2010+-+Lucene+Java+3.0.2+and+2.9.3+available) +/S /URI >> +/H /I +>> +endobj +69 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 366.641 244.98 354.641 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_0_2/CHANGES) +/S /URI >> +/H /I +>> +endobj +70 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 268.308 366.641 366.648 354.641 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_0_2/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +71 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 353.441 244.98 341.441 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_9_3/CHANGES) +/S /URI >> +/H /I +>> +endobj +72 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 253.308 332.241 273.96 320.241 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +73 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 365.736 280.988 403.716 268.988 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#26+February+2010+-+Lucene+Java+3.0.1+and+2.9.2+available) +/S /URI >> +/H /I +>> +endobj +74 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 259.788 244.98 247.788 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_0_1/CHANGES) +/S /URI >> +/H /I +>> +endobj +75 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 268.308 259.788 375.648 247.788 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_0_1/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +76 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 246.588 244.98 234.588 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_9_2/CHANGES) +/S /URI >> +/H /I +>> +endobj +77 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 253.308 225.388 273.96 213.388 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +78 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 311.088 174.135 344.4 162.135 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#25+November+2009+-+Lucene+Java+3.0.0+available) +/S /URI >> +/H /I +>> +endobj +79 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 152.935 217.98 140.935 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_0_0/CHANGES) +/S /URI >> +/H /I +>> +endobj +80 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 241.308 152.935 321.648 140.935 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_3_0_0/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +81 0 obj +<< /Length 1596 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GauI79lo&I&;KZM't?+a(r7R#*Z\&CNi,V,8i&W;4jXgY1R1CoPjIqTn+$RtJegf'j7-ZC+eZfg]?A/WOhNAY?Pg'KAt7YohlBo(-mMS'_7rTDcY(\Ee8dj4PhaQ5T7%S>+?7?kT.`(\!^9`AbufCM_&lF@K3HWX.C+1DIsjBX)%P7^5*brX=JgP\b0""Ph;;WM:Prh%"*:_M#Y?Z/Z^s2"r`jXdB+^fL,#CQ'bq7!E.o4&S?YhC\;:,]/eJYU"@__i/O(L&brV?\bVo_RRA:UD?DM5o%!Y?e)e$OIJ0;BtIP3'+46;Q.Sg2S[EC2"oB=b`;7=C[3=9O$e]RL75!saM\o,t#EK_>6i-!7bMAZH_5>%96[,h<9ne@7L.5"'O+ITUJ;7_K<=k4DLKdQ*8!)1249FQ<2b_mOKTYb#<`;k.i.ehtR0&F7FTWm.l5,G.5$PFJ7#8U=;7Jl$%>r[';f$b]]-EUZed*c$Ye%X5B<"Wftb"7QS4h\.&'BdgN\%M'lE+H%4%i5YXT1.72pD=L)AW2X,ePV?&c5*[1h8m:HiU5'ZblCf=m$h63&\FgP^"??q=,;;<-`FRbW0TmJ4Y3(C,oPcljkRiW2U^'e87s_@Z+$I8pQ6*^\](?70*i);64\QA#<]f$5eC8h2U;QW=U?]+!+$I7E7utK4qP0hNl0&_HO/@[h9f*q^%qul>.?3QMG,EV2LjOXnJaIg46UEKX:YbL4OO0$m9H?8UVME.^FRpi*r5c3=M'8*7+4C.nS`&,`+6WOUD3=ihX"Q_$b`Pe$@h6?0.5QuE&nV.9X(0G3cu+.Co?Kcgheq'!19'[r'"imp>Mfqh]>fQK^eH;9Y*B*8[G>cBno$=X%UW)Jr31cCL6\3aH-hp_U!f?=)s%a+jD9I/D_5:hl%o"Jg?cDmKsg4!U4kc.qW>eBQE$lZLLPpBD[RPohgj?D'p-jS%dG)'KL%*T\UNHg^`Uksg*!X\UR;SIQ.ZMTFR<>o'Vj_pN*Ic^!FZYFDNZga:_npdsjEp<~> +endstream +endobj +82 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 81 0 R +/Annots 83 0 R +>> +endobj +83 0 obj +[ +84 0 R +85 0 R +86 0 R +87 0 R +88 0 R +89 0 R +90 0 R +91 0 R +92 0 R +93 0 R +94 0 R +95 0 R +96 0 R +] +endobj +84 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 251.652 660.8 272.304 648.8 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +85 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 311.088 609.547 344.4 597.547 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#6+November+2009+-+Lucene+Java+2.9.1+available) +/S /URI >> +/H /I +>> +endobj +86 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 588.347 217.98 576.347 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_9_1/CHANGES) +/S /URI >> +/H /I +>> +endobj +87 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 241.308 588.347 321.648 576.347 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_9_1/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +88 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 251.652 567.147 272.304 555.147 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +89 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 311.088 515.894 344.4 503.894 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#25+September+2009+-+Lucene+Java+2.9.0+available) +/S /URI >> +/H /I +>> +endobj +90 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 494.694 217.98 482.694 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_9_0/CHANGES) +/S /URI >> +/H /I +>> +endobj +91 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 241.308 494.694 321.648 482.694 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_9_0/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +92 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 251.652 473.494 272.304 461.494 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +93 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 311.088 422.241 344.4 410.241 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/index.html#9+March+2009+-+Lucene+Java+2.4.1+available) +/S /URI >> +/H /I +>> +endobj +94 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 401.041 217.98 389.041 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_4_1/CHANGES) +/S /URI >> +/H /I +>> +endobj +95 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 241.308 401.041 321.648 389.041 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/tags/pylucene_2_4_1/jcc/CHANGES) +/S /URI >> +/H /I +>> +endobj +96 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 251.652 379.841 272.304 367.841 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +98 0 obj +<< + /Title (\376\377\0\61\0\40\0\127\0\150\0\141\0\164\0\40\0\151\0\163\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\77) + /Parent 97 0 R + /Next 99 0 R + /A 9 0 R +>> endobj +99 0 obj +<< + /Title (\376\377\0\62\0\40\0\122\0\145\0\161\0\165\0\151\0\162\0\145\0\155\0\145\0\156\0\164\0\163) + /Parent 97 0 R + /Prev 98 0 R + /Next 100 0 R + /A 11 0 R +>> endobj +100 0 obj +<< + /Title (\376\377\0\63\0\40\0\116\0\145\0\167\0\163) + /Parent 97 0 R + /First 101 0 R + /Last 111 0 R + /Prev 99 0 R + /Count -11 + /A 13 0 R +>> endobj +101 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\40\0\62\0\63\0\40\0\112\0\165\0\154\0\40\0\62\0\60\0\61\0\61\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\63\0\56\0\63\0\55\0\63\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Next 102 0 R + /A 15 0 R +>> endobj +102 0 obj +<< + /Title (\376\377\0\63\0\56\0\62\0\40\0\60\0\71\0\40\0\112\0\165\0\156\0\40\0\62\0\60\0\61\0\61\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\63\0\56\0\62\0\56\0\60\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 101 0 R + /Next 103 0 R + /A 17 0 R +>> endobj +103 0 obj +<< + /Title (\376\377\0\63\0\56\0\63\0\40\0\60\0\64\0\40\0\101\0\160\0\162\0\40\0\62\0\60\0\61\0\61\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\63\0\56\0\61\0\56\0\60\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 102 0 R + /Next 104 0 R + /A 19 0 R +>> endobj +104 0 obj +<< + /Title (\376\377\0\63\0\56\0\64\0\40\0\61\0\66\0\40\0\104\0\145\0\143\0\40\0\62\0\60\0\61\0\60\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\63\0\56\0\60\0\56\0\63\0\55\0\61\0\40\0\141\0\156\0\144\0\40\0\62\0\56\0\71\0\56\0\64\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 103 0 R + /Next 105 0 R + /A 21 0 R +>> endobj +105 0 obj +<< + /Title (\376\377\0\63\0\56\0\65\0\40\0\60\0\62\0\40\0\112\0\165\0\154\0\40\0\62\0\60\0\61\0\60\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\63\0\56\0\60\0\56\0\62\0\55\0\61\0\40\0\141\0\156\0\144\0\40\0\62\0\56\0\71\0\56\0\63\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 104 0 R + /Next 106 0 R + /A 23 0 R +>> endobj +106 0 obj +<< + /Title (\376\377\0\63\0\56\0\66\0\40\0\60\0\63\0\40\0\115\0\141\0\162\0\40\0\62\0\60\0\61\0\60\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\63\0\56\0\60\0\56\0\61\0\55\0\61\0\40\0\141\0\156\0\144\0\40\0\62\0\56\0\71\0\56\0\62\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 105 0 R + /Next 107 0 R + /A 25 0 R +>> endobj +107 0 obj +<< + /Title (\376\377\0\63\0\56\0\67\0\40\0\60\0\70\0\40\0\104\0\145\0\143\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\63\0\56\0\60\0\56\0\60\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 106 0 R + /Next 108 0 R + /A 27 0 R +>> endobj +108 0 obj +<< + /Title (\376\377\0\63\0\56\0\70\0\40\0\61\0\60\0\40\0\116\0\157\0\166\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\62\0\56\0\71\0\56\0\61\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 107 0 R + /Next 109 0 R + /A 29 0 R +>> endobj +109 0 obj +<< + /Title (\376\377\0\63\0\56\0\71\0\40\0\61\0\63\0\40\0\117\0\143\0\164\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\62\0\56\0\71\0\56\0\60\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 108 0 R + /Next 110 0 R + /A 31 0 R +>> endobj +110 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\60\0\40\0\60\0\63\0\40\0\101\0\160\0\162\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\62\0\56\0\64\0\56\0\61\0\55\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145) + /Parent 100 0 R + /Prev 109 0 R + /Next 111 0 R + /A 33 0 R +>> endobj +111 0 obj +<< + /Title (\376\377\0\63\0\56\0\61\0\61\0\40\0\60\0\70\0\40\0\112\0\141\0\156\0\40\0\62\0\60\0\60\0\71\0\40\0\55\0\40\0\120\0\171\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\152\0\157\0\151\0\156\0\163\0\40\0\101\0\160\0\141\0\143\0\150\0\145\0\40\0\114\0\165\0\143\0\145\0\156\0\145\0\40\0\141\0\163\0\40\0\141\0\40\0\123\0\165\0\142\0\160\0\162\0\157\0\152\0\145\0\143\0\164) + /Parent 100 0 R + /Prev 110 0 R + /A 35 0 R +>> endobj +112 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +113 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +114 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +115 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +116 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F9 +/BaseFont /Courier +/Encoding /WinAnsiEncoding >> +endobj +117 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 4 +/Kids [6 0 R 37 0 R 54 0 R 82 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 97 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F1 112 0 R /F5 113 0 R /F3 114 0 R /F2 115 0 R /F9 116 0 R /F7 117 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [37 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [37 0 R /XYZ 85.0 463.866 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [37 0 R /XYZ 85.0 308.332 null] +>> +endobj +15 0 obj +<< +/S /GoTo +/D [37 0 R /XYZ 85.0 277.198 null] +>> +endobj +17 0 obj +<< +/S /GoTo +/D [37 0 R /XYZ 85.0 183.545 null] +>> +endobj +19 0 obj +<< +/S /GoTo +/D [54 0 R /XYZ 85.0 616.6 null] +>> +endobj +21 0 obj +<< +/S /GoTo +/D [54 0 R /XYZ 85.0 522.947 null] +>> +endobj +23 0 obj +<< +/S /GoTo +/D [54 0 R /XYZ 85.0 416.094 null] +>> +endobj +25 0 obj +<< +/S /GoTo +/D [54 0 R /XYZ 85.0 309.241 null] +>> +endobj +27 0 obj +<< +/S /GoTo +/D [54 0 R /XYZ 85.0 202.388 null] +>> +endobj +29 0 obj +<< +/S /GoTo +/D [82 0 R /XYZ 85.0 637.8 null] +>> +endobj +31 0 obj +<< +/S /GoTo +/D [82 0 R /XYZ 85.0 544.147 null] +>> +endobj +33 0 obj +<< +/S /GoTo +/D [82 0 R /XYZ 85.0 450.494 null] +>> +endobj +35 0 obj +<< +/S /GoTo +/D [82 0 R /XYZ 85.0 356.841 null] +>> +endobj +97 0 obj +<< + /First 98 0 R + /Last 100 0 R +>> endobj +xref +0 118 +0000000000 65535 f +0000027465 00000 n +0000027544 00000 n +0000027636 00000 n +0000000015 00000 n +0000000071 00000 n +0000001094 00000 n +0000001214 00000 n +0000001330 00000 n +0000027776 00000 n +0000001465 00000 n +0000027839 00000 n +0000001601 00000 n +0000027905 00000 n +0000001738 00000 n +0000027971 00000 n +0000001875 00000 n +0000028037 00000 n +0000002012 00000 n +0000028103 00000 n +0000002148 00000 n +0000028167 00000 n +0000002285 00000 n +0000028233 00000 n +0000002422 00000 n +0000028299 00000 n +0000002559 00000 n +0000028365 00000 n +0000002695 00000 n +0000028431 00000 n +0000002832 00000 n +0000028495 00000 n +0000002969 00000 n +0000028561 00000 n +0000003105 00000 n +0000028627 00000 n +0000003242 00000 n +0000005842 00000 n +0000005965 00000 n +0000006083 00000 n +0000006257 00000 n +0000006440 00000 n +0000006608 00000 n +0000006786 00000 n +0000006961 00000 n +0000007143 00000 n +0000007335 00000 n +0000007519 00000 n +0000007698 00000 n +0000007930 00000 n +0000008156 00000 n +0000008375 00000 n +0000008581 00000 n +0000008810 00000 n +0000011413 00000 n +0000011536 00000 n +0000011731 00000 n +0000011955 00000 n +0000012170 00000 n +0000012372 00000 n +0000012603 00000 n +0000012831 00000 n +0000013050 00000 n +0000013256 00000 n +0000013511 00000 n +0000013739 00000 n +0000013958 00000 n +0000014186 00000 n +0000014392 00000 n +0000014644 00000 n +0000014872 00000 n +0000015105 00000 n +0000015333 00000 n +0000015539 00000 n +0000015795 00000 n +0000016023 00000 n +0000016256 00000 n +0000016484 00000 n +0000016690 00000 n +0000016934 00000 n +0000017162 00000 n +0000017395 00000 n +0000019084 00000 n +0000019207 00000 n +0000019318 00000 n +0000019521 00000 n +0000019764 00000 n +0000019992 00000 n +0000020225 00000 n +0000020432 00000 n +0000020677 00000 n +0000020905 00000 n +0000021138 00000 n +0000021345 00000 n +0000021585 00000 n +0000021813 00000 n +0000022046 00000 n +0000028693 00000 n +0000022253 00000 n +0000022448 00000 n +0000022627 00000 n +0000022787 00000 n +0000023102 00000 n +0000023442 00000 n +0000023782 00000 n +0000024185 00000 n +0000024588 00000 n +0000024991 00000 n +0000025331 00000 n +0000025671 00000 n +0000026011 00000 n +0000026356 00000 n +0000026797 00000 n +0000026906 00000 n +0000027017 00000 n +0000027131 00000 n +0000027248 00000 n +0000027355 00000 n +trailer +<< +/Size 118 +/Root 2 0 R +/Info 4 0 R +>> +startxref +28745 +%%EOF diff --git a/doc/jcc/documentation/install.html b/doc/jcc/documentation/install.html new file mode 100644 index 0000000..82c9c8c --- /dev/null +++ b/doc/jcc/documentation/install.html @@ -0,0 +1,480 @@ + + + + + + + +JCC Installation Instructions + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

JCC Installation Instructions

+ + + +

Getting JCC's Source Code

+
+

+ JCC's source code is included with PyLucene's. If you've downloaded + the PyLucene source code already, JCC's is to be found in + the jcc subdirectory. +

+

+ To get the JCC source code only from SVN use:
+ +$ svn co + http://svn.apache.org/repos/asf/lucene/pylucene/trunk/jcc jcc + +

+
+ + +

Building JCC

+
+

+ JCC is a Python extension written in Python and C++. It requires a + Java Runtime Environment to operate as it uses Java's reflection + APIs to do its work. It is built and installed + via distutils + or setuptools. +

+
    + +
  1. + Edit setup.py and review that values in + the INCLUDES, CFLAGS, + DEBUG_CFLAGS, LFLAGS + and JAVAC are correct for your system. These values + are also going to be compiled into JCC's config.py + file and are going to be used by JCC when + invoking distutils or setuptools to + compile extensions it is generating code for. +
  2. + +
  3. + At the command line, enter: +
    +	    $ python setup.py build
    +	    $ sudo python setup.py install
    +	  
    + +
  4. + +
+
+ + +

Requirements

+
+

+ JCC requires a Java Development Kit to be present. It uses the Java + Native Invocation Interface and expects <jni.h> + and the Java libraries to be present at build and runtime. +

+

+ JCC requires a C++ compiler. A recent C++ compiler for your + platform is expected to work as expected. +

+
+ + +

Shared Mode: Support for the --shared Flag

+
+

+ JCC includes a small runtime that keeps track of the Java VM and of + Java objects escaping it. Because there can be only one Java VM + embedded in a given process at a time, the JCC runtime must be + compiled as a shared library when more than one JCC-built Python + extension is going to be imported into a given Python process. +

+

+ Shared mode depends on setuptools' capability of + building plain shared libraries (as opposed to shared libraries for + Python extensions). +

+

+ Currently, shared mode is supported with setuptools + 0.6c7 and above out of the box on Mac OS X and Windows. On + Linux, a patch to setuptools needs to be applied + first. This patch is included in the JCC source distribution in + the jcc/patches directory, patch.43. This + patch was submitted to the setuptools project + via issue + 43. +

+

+ The shared mode disabled error reported during the + build of JCC's on Linux contains the exact instructions on how to + patch the setuptools installation + with patch.43 on your system. +

+

+ Shared mode is also required when embedding Python in a Java VM as + JCC's runtime shared library is used by the JVM to load JCC and + bootstrap the Python VM via the JNI. +

+

+ When shared mode is not enabled, not supported + or distutils is used instead + of setuptools, static mode is used instead. The JCC + runtime code is statically linked with each JCC-built Python + extension and only one such extension can be used in a given Python + process at a time. +

+

+ As setuptools grows its shared library building capability it is + expected that more operating systems should be supported with shared + mode in the future. +

+

+ Shared mode can be forced off by building JCC with + the NO_SHARED environment variable set. +

+

+ There are two defaults to consider here: +

+
    + +
  • + Is JCC built with shared mode support or not ? +
      + +
    • + By default, on Mac OS X and Windows this is the case. +
    • + +
    • + By default, on Linux, this is the case. + if setuptools is patched. +
    • + +
    • + On other operating systems shared mode support is off by + default - not supported - because shared mode depends on + setuptools's capability of building a regular + shared library which is still an experimental feature. +
    • + +
    + +
  • + +
  • + Is a JCC-built Python extension built with shared mode ?
    + By default, no, shared mode is enabled only with + the --shared command line argument. +
  • + +
+
+ + +

Notes for Mac OS X

+
+

+ On Mac OS X, Java is installed by Apple's setup as a framework. The + values in setup.py for INCLUDES + and LFLAGS for darwin should be correct + and ready to use. +

+

+ However, if you intend to use the 'system' Python from a Java VM + on Mac OS X -- Python embedded in Java -- + you will need to add the flags "-framework", "Python" + to the LFLAGS value. +

+
+ + +

Notes for Linux

+
+

+ JCC has been built and tested on a variety of Linux distributions, + 32- and 64-bit. Getting the java configuration correct is important + and is done differently for every distribution.
+ For example: +

+
    + +
  • + on Ubuntu, to install Java 5, these commands may be used: +
    +            $ sudo apt-get install sun-java5-jdk
    +            $ sudo update-java-alternatives -s java-1.5.0-sun
    +	  
    + The samples flags for Linux in JCC's setup.py should be close to + correct. +
  • + +
  • + on Gentoo, the java-config utility should be used to + locate, and possibly change, the default java installation. + The sample flags for Linux in JCC's setup.py should + be changed to reflect the root of the Java installation which may + be obtained via: +
    +            $ java-config -O
    +	  
    + +
  • + +
+

+ See earlier section about Shared Mode for + Linux support. +

+
+ + +

Notes for Solaris

+
+

+ At this time, JCC has been built and tested only on Solaris 11 with Sun + Studio C++ 12, Java 1.6 and Python 2.4. +

+

+ Because JCC is written in C++, Python's distutils must + be nudged a bit to invoke the correct compiler. Sun Studio's C + compiler is called cc while its C++ compiler is + called CC. To build JCC, use the following shell + command to ensure that the C++ compiler is used: +

+
+	$ CC=CC python setup.py build
+      
+

+ Shared mode is not currently implemented for + Solaris, setuptools needs to be taught how to build + plain shared libraries on Solaris first. +

+
+ + +

Notes for Windows

+
+

+ At this time, JCC has been built and tested on Win2k and WinXP with + a variety of Python and Java versions. +

+
    + +
  • + Adding the Python directory to PATH is recommended. +
  • + +
  • + Adding the Java directories containing the necessary DLLs and to + PATH is a must. +
  • + +
  • + Adding the directory containing javac.exe + to PATH is required for shared mode (enabled by + default if setuptools >= 0.6c7 is found to be + installed). +
  • + +
+
+ + +

Notes for Python 2.3

+
+

+ To use JCC with Python 2.3, setuptools is required +

+
    + +
  1. + download setuptools. +
  2. + +
  3. + edit the downloaded setuptools egg file to use + python2.3 instead of python2.4. +
  4. + +
  5. + At the command line, run:
    + +$ sudo sh setuptools-0.6c7-py2.4.egg + +
  6. + +
+
+ +
+ +
 
+
+ + + diff --git a/doc/jcc/documentation/install.pdf b/doc/jcc/documentation/install.pdf new file mode 100644 index 0000000..c2545bf --- /dev/null +++ b/doc/jcc/documentation/install.pdf @@ -0,0 +1,509 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 733 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gaua=995Pr&BF6e$K%:8`:]:iI$m(2(I*]AL=\OP#D56RoJM*9&]jf`,QA3ch_gQasQ9+oKZQ?8N!324"h#A/kZIO:8G93!@9KeeVhG8E!5lT@M6Ohe:%MU-T5/fC!m5&bg->Mn;1!/go>V-`!)VMII@s0H;pR;8o\^]M<7)g].+>Vt;Jej4hgJf#g`=AShmaFq2E3g)S%7Pj.J*;UhBhlD]l/Yioi_%mp(O(G2:^6WWrFmH>g/U4aS;?3RN6(mjC4BUW]GLi0Jcc*5c@D$_Ym\Ph38V[m`3N)&iaPnTDe(gFVc#5+r'=CD:;QHa7cA,[jmq@C'MK91Hs8@%d#[t)#Y*kN*UdM75c[T22WQdC#[/?R=%h)MiV7/]d>cq= +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +14 0 R +16 0 R +18 0 R +20 0 R +22 0 R +24 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 241.16 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 541.466 175.688 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 523.266 176.66 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 505.066 319.964 493.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 15 0 R +/H /I +>> +endobj +16 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 486.866 209.312 474.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 17 0 R +/H /I +>> +endobj +18 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 468.666 186.656 456.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 19 0 R +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 450.466 191.324 438.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 21 0 R +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 432.266 203.984 420.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 23 0 R +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 414.066 209.996 402.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 25 0 R +/H /I +>> +endobj +26 0 obj +<< /Length 2383 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm=D/\/e&H;*)Taq7:@1=ede6=X98DGO_f+hTOouf6%Z:s"fA5Um81]R?ChQ3It3Y"t#E(jETHff5hKA,3iYM:S-`=p$?O4NH\[oGDsb")W[r`^&'67L=8XgBQs7JblTOhlS*pAs@sg\JFN1a\u1*qUMM(LGBR#Ga0NBml14>UF]amd,o->Pf,=R[OcT2]]7Yu3]sF#VFObSU?Y)WB-LG"?Y`F;4'BZ(q[.2GO_(i&8+C5`6RZ$[q\k2:Jan:oe0M1HOI*"jP_lW+fo\rgDgO)kgR"8JK<6OYTnO%_RKG=%O!kR-gnWs/o!)Pd\Ti0B&ri`j7D.4"+$e=1)[d-I^BiqsO,cf%\(-5Eu/GM&)itb"9F&PQuHAEMD>d0p;?&SCP/Rn9DOMQZ.ZT?`:Ooe]RJ8;IC8X025^Hs2kKHE?g;#q3*X(<\$i3HPar%l>&Pe+5O]tl69NfTa&`-!B;`for>Uj4Iqea,k=[gCKMMkWlQ.8RD3DVcm0''/*fSZX4,MHCS#Ish`uigTVt;^SZJCsn*nU4(()gN3Fik#_7V3q#U!=4a1Mt1"DQ&MaFuhV'Lb$J;cF[8>`gjZ\Z5TcbZ!1NQ-OV,Dk6$E,'RPPD\K2BhZ,doPMajHfem6XY/2OP,7V,,ZqKB*1sol8?eccG_!E69,9kgORmK)%YTHW?*%U@98IcpV\Q$m#=)hBu:(SeSkoJY[9^^sD4."(Q_"^<5OO$=l#S:&=<>`',3q`-13'+pnE?EVn1u*)Tgl-pDRNj=&:a4(&rIjklc2]g4lLBdtnZ^JiSOl7*nBFW1Qi/P9#QQc"(2MXLY>]`=lGTG6^b;8'_XXt-F!/`51,5"KAC+S7'MMX)]!_FJ]BYgGj(/^,l_CT7]aX!2TW[6^0Y%cq,\.\n:=VQ18qK%Yk>7YF'q/)CSdZj2EaXgC$:1:bIbZ!W@\>%fN]OjFko)?#_11))`4Y$t;13iLZALnCikP\T]o)=/4MSkV]j[VDC\UgOIi.2A%S:q#ksUn2T%mW!eh6%ean1rhgKe*JY#c\ZZY@0`VWmiUAHP`Qik#-(MLPLH2oS]WeMg9AX8ce3@mlD\e#Jj*XZK$gi,I@8Tb[S0Tj"Fe~> +endstream +endobj +27 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 26 0 R +/Annots 28 0 R +>> +endobj +28 0 obj +[ +29 0 R +] +endobj +29 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 188.46 490.132 237.132 478.132 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://pypi.python.org/pypi/setuptools) +/S /URI >> +/H /I +>> +endobj +30 0 obj +<< /Length 2581 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm>?'!aK&q0MX&FttuW*,8Zm`WQr:!s/YdWW+e"`B6MQ=sL)/1Qmk?N9ncP&(RR;X;RZ(:u7P%;.Yj3HKr1QZoL^GH9%?m-Ng%gK!5Wf*T>krq-BdH0l7eq2Sk.B(aXP0O;S'C&1P:3Eu,n&0[bUB";;`_hC]JX2<_+fL,(0XODlNDa>97,8BE@+R0`5I\^/1D[q&f,rpp77pIA#.3Ys6``8/b`SOk*')XEIY#5(E:5:hjM!RR$TAM[[hht<_Pli^tV>S1"6`b]73%g)@a>l;j4_tVV@cr\3G*rM9Ok4a!Chb=iPZiQ>mX4HDO!30'tQ_p4]1mEN(8eYRf8p(s,aQRmK9I"p,+jfq_:I0#f:NCg)S!5.r.cF[dR=POaVa=?HO\ma3e@2R68Q.`MmW",J]P^_MNO[o&#Y(3dM6P*NsBf1,u%*"`YP*cb6lr5q1?k.,4+49lWC?e0ER_o.'gub'dER0/NfO;\'9T-t6:X.`sIR)S9:[c-`f,qe$[5Dbh>;l9X=FO"3_t#rQk'rM3";N(]j-6$N]Qi[)h?(@!YKPYs2cjF-;l,8fh=H^Gpg_[R*3g_!L3BhMS+;cKD2ZE^ZqK[fk8=C"8Bq&!A\m"-pn!0-+mHXs%9i/$hg&C2Z#j(_O/Q:udhcVE:,tNK'nU_tJLb*!!_bCtBG%(qi;"#b4[R7MCgZgic.i93PZ;[+FPnaAmY]i/K=`YcqS10kC`FNrhQ_W7\L-P0RLbT-9pgM-Wmt%OOBZh'&J]`;POioq=Eelm%H7-p1ici;d:^^Jg"08TtSoM.76F@;f3IlG,pH0X(X`=IYXLk-uQ$`f!AanAC08Z1>'nb"FN$X3?TMlTSTSZpB*(V_@mP7!AcG3tf*'We*C+[ONC'c0;_QoY?Jlr,(b3mqL0)c&AJ4\&e#O.Wdsh#HCN,ESA$![]o9EZX#\d^i"skKP2"e5/-m_bBL..(CpTl'MQHV7'J2!,;^#-nE.X5gEeGio+<[>H0\TLqLiGfAGHX5*UGa>aoZfbF1tW=88YV;+0A]E@)mfk1m0UFcg[uipX#&+1W#1[q#-)'IcNOCi9hB`#_Lj.!dN-dnH=.cGm?]F5.>ds&go]NX5Y2ePZ,.PEr1aG!:*ST_];1Ke4dREH^uL6\DDI4Vd-h2u'i!Z^Q>`8Qd/ +endstream +endobj +31 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 30 0 R +/Annots 32 0 R +>> +endobj +32 0 obj +[ +33 0 R +] +endobj +33 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 437.916 621.2 476.916 609.2 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://bugs.python.org/setuptools/issue43) +/S /URI >> +/H /I +>> +endobj +34 0 obj +<< /Length 2411 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!#]>E@N,'RnB3n,j<*fF4@@01\NH""dIU$6'*\jhZd$M5b!_p3=(DmlBF\nJTAjZ>,NK9"UM5S=)1j\UA,a`H(\NEqWr(Ort0B!/J2_%,s%SYN.AYhX;cHfqR[*dR%p=hW(DT6%-WDRdfHDX]!pWfKrH^RaKPU].XWp/Rjs(F8L_rZ;N/>UFJiX2iP&P)o)Wa6K9'-7GZ"rW(Cd+SH-g%Ht,@Fk-XcmRZeD.#-9,e=!+KRlhTBHb6][!iXZE9#*BS!(>6ELh:#sp5s+7_`gfbdb7D4G5G?[8tTL!"N60]ET-:)+;W(m1M]a\WVZ\_0VkUQ(>sB'Mpn!"O2c?ZJS_`VCQ1ISKg\2;FbHT9t[p9+E&eEQ3pi*^B\imV#(DuZpIEd;T7PO:i_C?HriY!qiq!%r:OKq9a)bKRT.X?'gg_E+#ca7Gt>>5oJ%LuVYPLn]Rn5NF5,;BRt^$p"dFZ`ZaQoOF%faNC+rEpg6710Os^#te4_8"K9kY8E$]T2/+jqRZ:n:Y]<-9S[T.s([QtR85_;JW=Dac>f-3:j2fTHf=g=M=E5'Z[H_WJ30%>8W.GssaW-s?>M;OL8Yf75HJL"PHEeJVS:5.Dtf>1eqgAlT=jdmI@oMVuc@O(J\0W%51#FbDd'Rl^+Fgp=tnTS@JlrhO0Wf;\RE*k)`4M)PMpEnmp(3YgC\FYMm9fREQd*J:#T\AGkXB<_?QAV:,UYEegblkaR2'KC7rg+i(./4S;a)^.ci>gkEH#R/n%QKM<.^k>`9H^r@\S)+2_&0)ahbI8.LU-t5ZP_X-,/>oO.6-4-:eQ6DpL@8&R]Q9[h4eM3Z*H9ktX1Wl-401+\@Z\$>E,aNKpHl!W$Bl/3<.*O!,1.=HMG"[hlDCc,AF3hl5YNZ8PVF26TaKmLQK(5KBjYgdPo%=eCIis%QPamg)FDP`<\0Ai9hL3nVap/j`Il-$2L1bRpK#mQmM,(@S!J'HYuFb)Ksc+]7ZngDn7QQ#)pg"Yi`AtpDE&18"n=a!7V*?nB6@*dL5.Z_[I%GT;`-@jFf(Q:o)]Q504$5m3&/7\@Lo/t@Igt3MZ/N)@]9/F-Ll7Hh#n,2">cnD'*NJ,SEY,pD0P2]]DR++,aah7Dn"VA&<&7!>)r^a=S^E7)PN-`DIr>KO'#@7"#b+O@h-MG444k=2tq[kj`ma&@P]]\"hfTrM,HRFO)X9HO'fPUr_<)K*^gPbMJ5SYDiYt$pL:`h+YaX2"Trl.#h\DkG'J/l=tRK+;i5b:V.$g\Pa5%":Costb;DstkW?NCn7"K#H_Nq]>"7CLI^HEIhh`k(30j:[UF_fmBni`d/9#^7LW^Ws&$a/uK7f#o6a^tlgkg;cX7ed,1#$TaPmBc4:d'U2Fak/Tb\)*u__9K#8)N%Zbo<+tMG>CFIXg/EOkVb"u"W"q%+jgr5"Z@'@dKp'3X*J;ajG/L7Bt0CGHimhXdVd!.2sa#YbO9H?r"-"#pbq$nqkub909'8KfAf?efKpP_p":W0X&9[V>ri5WrWUdSn8e~> +endstream +endobj +35 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 34 0 R +/Annots 36 0 R +>> +endobj +36 0 obj +[ +37 0 R +] +endobj +37 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 210.636 441.046 274.956 429.046 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 15 0 R +/H /I +>> +endobj +38 0 obj +<< /Length 765 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gasam4)`k-&Dm(13-9+iRNX+XUq.jA*Ag-G2VQjpW^Y)4AC8f"7fIi3Z:WYf>R6F5dZNGImW`(aT8VbV5u?VDV8,2B`obB>/cb*NE;A\k#E\!P,*4OZn!'773G&i#'_E%9:;/%K7<6$$!1SXNSKL`VH3\k,i0MejVe;[TJfh<=hLIVkgW[C3;%=aSX$-Y^YN&)p+XSnV!P^nikgWuJo&.'T'T+G'YiCI/b`)Ws*4JjQ"@LV&4GDoU&6'L.=cOQ3\eQ"hOt:%[;cQ`94\j]1+hnHG4,e\uJY0g''?CViWpF:*j@t9n2Zk8Sn;X<^dJ(_h+2Gc>kdM2?C6*8K)l#fc^LX^[>reIC4?$IN:QQ0;1mC)@*C2MGABHI#Lqt,p#*\!j)FOdN4XG\p;:o`n:qP>nWAh7ngpfHC8JhpL5q!7@iJQlO]3Vt)j(Gb@pLPFp=fSI%X?eODf!Za+fL$@r62#e="bUfE!R@Bd"9~> +endstream +endobj +39 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 38 0 R +/Annots 40 0 R +>> +endobj +40 0 obj +[ +41 0 R +] +endobj +41 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 158.328 612.466 207.0 600.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://pypi.python.org/pypi/setuptools) +/S /URI >> +/H /I +>> +endobj +43 0 obj +<< + /Title (\376\377\0\61\0\40\0\107\0\145\0\164\0\164\0\151\0\156\0\147\0\40\0\112\0\103\0\103\0\47\0\163\0\40\0\123\0\157\0\165\0\162\0\143\0\145\0\40\0\103\0\157\0\144\0\145) + /Parent 42 0 R + /Next 44 0 R + /A 9 0 R +>> endobj +44 0 obj +<< + /Title (\376\377\0\62\0\40\0\102\0\165\0\151\0\154\0\144\0\151\0\156\0\147\0\40\0\112\0\103\0\103) + /Parent 42 0 R + /Prev 43 0 R + /Next 45 0 R + /A 11 0 R +>> endobj +45 0 obj +<< + /Title (\376\377\0\63\0\40\0\122\0\145\0\161\0\165\0\151\0\162\0\145\0\155\0\145\0\156\0\164\0\163) + /Parent 42 0 R + /Prev 44 0 R + /Next 46 0 R + /A 13 0 R +>> endobj +46 0 obj +<< + /Title (\376\377\0\64\0\40\0\123\0\150\0\141\0\162\0\145\0\144\0\40\0\115\0\157\0\144\0\145\0\72\0\40\0\123\0\165\0\160\0\160\0\157\0\162\0\164\0\40\0\146\0\157\0\162\0\40\0\164\0\150\0\145\0\40\0\55\0\55\0\163\0\150\0\141\0\162\0\145\0\144\0\40\0\106\0\154\0\141\0\147) + /Parent 42 0 R + /Prev 45 0 R + /Next 47 0 R + /A 15 0 R +>> endobj +47 0 obj +<< + /Title (\376\377\0\65\0\40\0\116\0\157\0\164\0\145\0\163\0\40\0\146\0\157\0\162\0\40\0\115\0\141\0\143\0\40\0\117\0\123\0\40\0\130) + /Parent 42 0 R + /Prev 46 0 R + /Next 48 0 R + /A 17 0 R +>> endobj +48 0 obj +<< + /Title (\376\377\0\66\0\40\0\116\0\157\0\164\0\145\0\163\0\40\0\146\0\157\0\162\0\40\0\114\0\151\0\156\0\165\0\170) + /Parent 42 0 R + /Prev 47 0 R + /Next 49 0 R + /A 19 0 R +>> endobj +49 0 obj +<< + /Title (\376\377\0\67\0\40\0\116\0\157\0\164\0\145\0\163\0\40\0\146\0\157\0\162\0\40\0\123\0\157\0\154\0\141\0\162\0\151\0\163) + /Parent 42 0 R + /Prev 48 0 R + /Next 50 0 R + /A 21 0 R +>> endobj +50 0 obj +<< + /Title (\376\377\0\70\0\40\0\116\0\157\0\164\0\145\0\163\0\40\0\146\0\157\0\162\0\40\0\127\0\151\0\156\0\144\0\157\0\167\0\163) + /Parent 42 0 R + /Prev 49 0 R + /Next 51 0 R + /A 23 0 R +>> endobj +51 0 obj +<< + /Title (\376\377\0\71\0\40\0\116\0\157\0\164\0\145\0\163\0\40\0\146\0\157\0\162\0\40\0\120\0\171\0\164\0\150\0\157\0\156\0\40\0\62\0\56\0\63) + /Parent 42 0 R + /Prev 50 0 R + /A 25 0 R +>> endobj +52 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +53 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +54 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +55 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +56 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F9 +/BaseFont /Courier +/Encoding /WinAnsiEncoding >> +endobj +57 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 5 +/Kids [6 0 R 27 0 R 31 0 R 35 0 R 39 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 42 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F1 52 0 R /F5 53 0 R /F3 54 0 R /F2 55 0 R /F9 56 0 R /F7 57 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [27 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [27 0 R /XYZ 85.0 545.866 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [27 0 R /XYZ 85.0 361.692 null] +>> +endobj +15 0 obj +<< +/S /GoTo +/D [27 0 R /XYZ 85.0 261.758 null] +>> +endobj +17 0 obj +<< +/S /GoTo +/D [31 0 R /XYZ 85.0 234.6 null] +>> +endobj +19 0 obj +<< +/S /GoTo +/D [35 0 R /XYZ 85.0 659.0 null] +>> +endobj +21 0 obj +<< +/S /GoTo +/D [35 0 R /XYZ 85.0 418.046 null] +>> +endobj +23 0 obj +<< +/S /GoTo +/D [35 0 R /XYZ 85.0 240.932 null] +>> +endobj +25 0 obj +<< +/S /GoTo +/D [39 0 R /XYZ 85.0 659.0 null] +>> +endobj +42 0 obj +<< + /First 43 0 R + /Last 51 0 R +>> endobj +xref +0 58 +0000000000 65535 f +0000014772 00000 n +0000014858 00000 n +0000014950 00000 n +0000000015 00000 n +0000000071 00000 n +0000000895 00000 n +0000001015 00000 n +0000001096 00000 n +0000015084 00000 n +0000001230 00000 n +0000015147 00000 n +0000001367 00000 n +0000015213 00000 n +0000001503 00000 n +0000015279 00000 n +0000001640 00000 n +0000015345 00000 n +0000001777 00000 n +0000015409 00000 n +0000001914 00000 n +0000015473 00000 n +0000002051 00000 n +0000015539 00000 n +0000002188 00000 n +0000015605 00000 n +0000002325 00000 n +0000004801 00000 n +0000004924 00000 n +0000004951 00000 n +0000005142 00000 n +0000007816 00000 n +0000007939 00000 n +0000007966 00000 n +0000008157 00000 n +0000010661 00000 n +0000010784 00000 n +0000010811 00000 n +0000010950 00000 n +0000011807 00000 n +0000011930 00000 n +0000011957 00000 n +0000015669 00000 n +0000012147 00000 n +0000012384 00000 n +0000012561 00000 n +0000012739 00000 n +0000013088 00000 n +0000013298 00000 n +0000013492 00000 n +0000013698 00000 n +0000013904 00000 n +0000014110 00000 n +0000014218 00000 n +0000014328 00000 n +0000014441 00000 n +0000014557 00000 n +0000014663 00000 n +trailer +<< +/Size 58 +/Root 2 0 R +/Info 4 0 R +>> +startxref +15720 +%%EOF diff --git a/doc/jcc/documentation/javadoc/allclasses-frame.html b/doc/jcc/documentation/javadoc/allclasses-frame.html new file mode 100644 index 0000000..40b88eb --- /dev/null +++ b/doc/jcc/documentation/javadoc/allclasses-frame.html @@ -0,0 +1,33 @@ + + + + + + +All Classes + + + + + + + + + + + +All Classes +
+ + + + + +
PythonException +
+PythonVM +
+
+ + + diff --git a/doc/jcc/documentation/javadoc/allclasses-noframe.html b/doc/jcc/documentation/javadoc/allclasses-noframe.html new file mode 100644 index 0000000..7b4168c --- /dev/null +++ b/doc/jcc/documentation/javadoc/allclasses-noframe.html @@ -0,0 +1,33 @@ + + + + + + +All Classes + + + + + + + + + + + +All Classes +
+ + + + + +
PythonException +
+PythonVM +
+
+ + + diff --git a/doc/jcc/documentation/javadoc/constant-values.html b/doc/jcc/documentation/javadoc/constant-values.html new file mode 100644 index 0000000..cc9de0a --- /dev/null +++ b/doc/jcc/documentation/javadoc/constant-values.html @@ -0,0 +1,142 @@ + + + + + + +Constant Field Values + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Constant Field Values

+
+
+Contents
    +
+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/deprecated-list.html b/doc/jcc/documentation/javadoc/deprecated-list.html new file mode 100644 index 0000000..da52259 --- /dev/null +++ b/doc/jcc/documentation/javadoc/deprecated-list.html @@ -0,0 +1,142 @@ + + + + + + +Deprecated List + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Deprecated API

+
+
+Contents
    +
+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/help-doc.html b/doc/jcc/documentation/javadoc/help-doc.html new file mode 100644 index 0000000..bd73a6b --- /dev/null +++ b/doc/jcc/documentation/javadoc/help-doc.html @@ -0,0 +1,209 @@ + + + + + + +API Help + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+How This API Document Is Organized

+
+This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.

+Package

+
+ +

+Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain four categories:

    +
  • Interfaces (italic)
  • Classes
  • Enums
  • Exceptions
  • Errors
  • Annotation Types
+
+

+Class/Interface

+
+ +

+Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
  • Class inheritance diagram
  • Direct Subclasses
  • All Known Subinterfaces
  • All Known Implementing Classes
  • Class/interface declaration
  • Class/interface description +

    +

  • Nested Class Summary
  • Field Summary
  • Constructor Summary
  • Method Summary +

    +

  • Field Detail
  • Constructor Detail
  • Method Detail
+Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.
+ +

+Annotation Type

+
+ +

+Each annotation type has its own separate page with the following sections:

    +
  • Annotation Type declaration
  • Annotation Type description
  • Required Element Summary
  • Optional Element Summary
  • Element Detail
+
+ +

+Enum

+
+ +

+Each enum has its own separate page with the following sections:

    +
  • Enum declaration
  • Enum description
  • Enum Constant Summary
  • Enum Constant Detail
+
+

+Tree (Class Hierarchy)

+
+There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.
    +
  • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
  • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
+
+

+Deprecated API

+
+The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.
+

+Index

+
+The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.
+

+Prev/Next

+These links take you to the next or previous class, interface, package, or related page.

+Frames/No Frames

+These links show and hide the HTML frames. All pages are available with or without frames. +

+

+Serialized Form

+Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description. +

+

+Constant Field Values

+The Constant Field Values page lists the static final fields and their values. +

+ + +This help file applies to API documentation generated using the standard doclet. + +
+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/index-all.html b/doc/jcc/documentation/javadoc/index-all.html new file mode 100644 index 0000000..3782455 --- /dev/null +++ b/doc/jcc/documentation/javadoc/index-all.html @@ -0,0 +1,259 @@ + + + + + + +Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A C E G I M O P R S T V W
+

+A

+
+
acquireThreadState() - +Method in class org.apache.jcc.PythonVM +
Bump the Python thread state counter. +
+
+

+C

+
+
clear() - +Method in exception org.apache.jcc.PythonException +
  +
+
+

+E

+
+
errorName - +Variable in exception org.apache.jcc.PythonException +
  +
+
+

+G

+
+
get() - +Static method in class org.apache.jcc.PythonVM +
Obtain the PythonVM instance, or null if the Python VM + has not yet been started. +
getErrorInfo() - +Method in exception org.apache.jcc.PythonException +
  +
getErrorName() - +Method in exception org.apache.jcc.PythonException +
  +
getMessage(boolean) - +Method in exception org.apache.jcc.PythonException +
  +
getMessage() - +Method in exception org.apache.jcc.PythonException +
  +
getTraceback() - +Method in exception org.apache.jcc.PythonException +
  +
+
+

+I

+
+
init(String, String[]) - +Method in class org.apache.jcc.PythonVM +
  +
instantiate(String, String) - +Method in class org.apache.jcc.PythonVM +
Instantiate the specified Python class, and return the instance. +
+
+

+M

+
+
message - +Variable in exception org.apache.jcc.PythonException +
  +
+
+

+O

+
+
org.apache.jcc - package org.apache.jcc
 
+
+

+P

+
+
PythonException - Exception in org.apache.jcc
 
PythonException(String) - +Constructor for exception org.apache.jcc.PythonException +
  +
PythonVM - Class in org.apache.jcc
 
PythonVM() - +Constructor for class org.apache.jcc.PythonVM +
  +
+
+

+R

+
+
releaseThreadState() - +Method in class org.apache.jcc.PythonVM +
Release the Python thread state counter. +
+
+

+S

+
+
start(String, String[]) - +Static method in class org.apache.jcc.PythonVM +
Start the embedded Python interpreter. +
start(String) - +Static method in class org.apache.jcc.PythonVM +
Start the embedded Python interpreter. +
+
+

+T

+
+
traceback - +Variable in exception org.apache.jcc.PythonException +
  +
+
+

+V

+
+
vm - +Static variable in class org.apache.jcc.PythonVM +
  +
+
+

+W

+
+
withTrace - +Variable in exception org.apache.jcc.PythonException +
  +
+
+A C E G I M O P R S T V W + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/index.html b/doc/jcc/documentation/javadoc/index.html new file mode 100644 index 0000000..b4bcd24 --- /dev/null +++ b/doc/jcc/documentation/javadoc/index.html @@ -0,0 +1,36 @@ + + + + + + +Generated Documentation (Untitled) + + + + + + + + +<H2> +Frame Alert</H2> + +<P> +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. +<BR> +Link to<A HREF="org/apache/jcc/package-summary.html">Non-frame version.</A> + + + diff --git a/doc/jcc/documentation/javadoc/org/apache/jcc/PythonException.html b/doc/jcc/documentation/javadoc/org/apache/jcc/PythonException.html new file mode 100644 index 0000000..00c2057 --- /dev/null +++ b/doc/jcc/documentation/javadoc/org/apache/jcc/PythonException.html @@ -0,0 +1,453 @@ + + + + + + +PythonException + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +org.apache.jcc +
+Class PythonException

+
+java.lang.Object
+  extended by java.lang.Throwable
+      extended by java.lang.Exception
+          extended by java.lang.RuntimeException
+              extended by org.apache.jcc.PythonException
+
+
+
All Implemented Interfaces:
java.io.Serializable
+
+
+
+
public class PythonException
extends java.lang.RuntimeException
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+protected  java.lang.StringerrorName + +
+           
+protected  java.lang.Stringmessage + +
+           
+protected  java.lang.Stringtraceback + +
+           
+ booleanwithTrace + +
+           
+  + + + + + + + + + + +
+Constructor Summary
PythonException(java.lang.String message) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidclear() + +
+           
+protected  voidgetErrorInfo() + +
+           
+ java.lang.StringgetErrorName() + +
+           
+ java.lang.StringgetMessage() + +
+           
+ java.lang.StringgetMessage(boolean trace) + +
+           
+ java.lang.StringgetTraceback() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Throwable
fillInStackTrace, getCause, getLocalizedMessage, getStackTrace, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+withTrace

+
+public boolean withTrace
+
+
+
+
+
+ +

+message

+
+protected java.lang.String message
+
+
+
+
+
+ +

+errorName

+
+protected java.lang.String errorName
+
+
+
+
+
+ +

+traceback

+
+protected java.lang.String traceback
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+PythonException

+
+public PythonException(java.lang.String message)
+
+
+ + + + + + + + +
+Method Detail
+ +

+getMessage

+
+public java.lang.String getMessage(boolean trace)
+
+
+
+
+
+
+ +

+getMessage

+
+public java.lang.String getMessage()
+
+
+
Overrides:
getMessage in class java.lang.Throwable
+
+
+
+
+
+
+ +

+getErrorName

+
+public java.lang.String getErrorName()
+
+
+
+
+
+
+ +

+getTraceback

+
+public java.lang.String getTraceback()
+
+
+
+
+
+
+ +

+getErrorInfo

+
+protected void getErrorInfo()
+
+
+
+
+
+
+ +

+clear

+
+public void clear()
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/org/apache/jcc/PythonVM.html b/doc/jcc/documentation/javadoc/org/apache/jcc/PythonVM.html new file mode 100644 index 0000000..18306d5 --- /dev/null +++ b/doc/jcc/documentation/javadoc/org/apache/jcc/PythonVM.html @@ -0,0 +1,446 @@ + + + + + + +PythonVM + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +org.apache.jcc +
+Class PythonVM

+
+java.lang.Object
+  extended by org.apache.jcc.PythonVM
+
+
+
+
public class PythonVM
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Field Summary
+protected static PythonVMvm + +
+           
+  + + + + + + + + + + + +
+Constructor Summary
+protected PythonVM() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ intacquireThreadState() + +
+          Bump the Python thread state counter.
+static PythonVMget() + +
+          Obtain the PythonVM instance, or null if the Python VM + has not yet been started.
+protected  voidinit(java.lang.String programName, + java.lang.String[] args) + +
+           
+ java.lang.Objectinstantiate(java.lang.String moduleName, + java.lang.String className) + +
+          Instantiate the specified Python class, and return the instance.
+ intreleaseThreadState() + +
+          Release the Python thread state counter.
+static PythonVMstart(java.lang.String programName) + +
+          Start the embedded Python interpreter.
+static PythonVMstart(java.lang.String programName, + java.lang.String[] args) + +
+          Start the embedded Python interpreter.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+vm

+
+protected static PythonVM vm
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+PythonVM

+
+protected PythonVM()
+
+
+ + + + + + + + +
+Method Detail
+ +

+start

+
+public static PythonVM start(java.lang.String programName,
+                             java.lang.String[] args)
+
+
Start the embedded Python interpreter. The specified + program name and args are set into the Python variable sys.argv. + This returns an instance of the Python VM; it may be called + multiple times, and will return the same VM instance each time. +

+

+
Parameters:
programName - the name of the Python program, typically + /usr/bin/python. This is informational; the program is not + actually executed.
args - additional arguments to be put into sys.argv. +
Returns:
a singleton instance of PythonVM
+
+
+
+ +

+start

+
+public static PythonVM start(java.lang.String programName)
+
+
Start the embedded Python interpreter. The specified + program name is set into the Python variable sys.argv[0]. + This returns an instance of the Python VM; it may be called + multiple times, and will return the same VM instance each time. +

+

+
Parameters:
programName - the name of the Python program, typically + /usr/bin/python. This is informational; the program is not + actually executed. +
Returns:
a singleton instance of PythonVM
+
+
+
+ +

+get

+
+public static PythonVM get()
+
+
Obtain the PythonVM instance, or null if the Python VM + has not yet been started. +

+

+ +
Returns:
a singleton instance of PythonVM, or null
+
+
+
+ +

+init

+
+protected void init(java.lang.String programName,
+                    java.lang.String[] args)
+
+
+
+
+
+
+ +

+instantiate

+
+public java.lang.Object instantiate(java.lang.String moduleName,
+                                    java.lang.String className)
+                             throws PythonException
+
+
Instantiate the specified Python class, and return the instance. +

+

+
Parameters:
moduleName - the Python module the class is defined in
className - the Python class to instantiate. +
Returns:
a handle on the Python instance. +
Throws: +
PythonException
+
+
+
+ +

+acquireThreadState

+
+public int acquireThreadState()
+
+
Bump the Python thread state counter. Every thread should + do this before calling into Python, to prevent the Python + thread state from being inadvertently collected (and causing loss + of thread-local variables) +

+

+ +
Returns:
the Python thread state counter. A return value less + than zero signals an error.
+
+
+
+ +

+releaseThreadState

+
+public int releaseThreadState()
+
+
Release the Python thread state counter. Every thread that has + called acquireThreadState() should call this before + terminating. +

+

+ +
Returns:
the Python thread state counter. A return value less + than zero signals an error.
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/org/apache/jcc/package-frame.html b/doc/jcc/documentation/javadoc/org/apache/jcc/package-frame.html new file mode 100644 index 0000000..61e3f13 --- /dev/null +++ b/doc/jcc/documentation/javadoc/org/apache/jcc/package-frame.html @@ -0,0 +1,43 @@ + + + + + + +org.apache.jcc + + + + + + + + + + + +org.apache.jcc + + + + +
+Classes  + +
+PythonVM
+ + + + + + +
+Exceptions  + +
+PythonException
+ + + + diff --git a/doc/jcc/documentation/javadoc/org/apache/jcc/package-summary.html b/doc/jcc/documentation/javadoc/org/apache/jcc/package-summary.html new file mode 100644 index 0000000..77878f9 --- /dev/null +++ b/doc/jcc/documentation/javadoc/org/apache/jcc/package-summary.html @@ -0,0 +1,167 @@ + + + + + + +org.apache.jcc + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package org.apache.jcc +

+ + + + + + + + + +
+Class Summary
PythonVM 
+  + +

+ + + + + + + + + +
+Exception Summary
PythonException 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/org/apache/jcc/package-tree.html b/doc/jcc/documentation/javadoc/org/apache/jcc/package-tree.html new file mode 100644 index 0000000..aed1643 --- /dev/null +++ b/doc/jcc/documentation/javadoc/org/apache/jcc/package-tree.html @@ -0,0 +1,153 @@ + + + + + + +org.apache.jcc Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package org.apache.jcc +

+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • org.apache.jcc.PythonVM
    • java.lang.Throwable (implements java.io.Serializable) +
        +
      • java.lang.Exception +
      +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/overview-tree.html b/doc/jcc/documentation/javadoc/overview-tree.html new file mode 100644 index 0000000..0a88975 --- /dev/null +++ b/doc/jcc/documentation/javadoc/overview-tree.html @@ -0,0 +1,155 @@ + + + + + + +Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For All Packages

+
+
+
Package Hierarchies:
org.apache.jcc
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • org.apache.jcc.PythonVM
    • java.lang.Throwable (implements java.io.Serializable) +
        +
      • java.lang.Exception +
      +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/package-list b/doc/jcc/documentation/javadoc/package-list new file mode 100644 index 0000000..84b468c --- /dev/null +++ b/doc/jcc/documentation/javadoc/package-list @@ -0,0 +1 @@ +org.apache.jcc diff --git a/doc/jcc/documentation/javadoc/resources/inherit.gif b/doc/jcc/documentation/javadoc/resources/inherit.gif new file mode 100644 index 0000000..c814867 Binary files /dev/null and b/doc/jcc/documentation/javadoc/resources/inherit.gif differ diff --git a/doc/jcc/documentation/javadoc/serialized-form.html b/doc/jcc/documentation/javadoc/serialized-form.html new file mode 100644 index 0000000..0c1a57e --- /dev/null +++ b/doc/jcc/documentation/javadoc/serialized-form.html @@ -0,0 +1,202 @@ + + + + + + +Serialized Form + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Serialized Form

+
+
+ + + + + +
+Package org.apache.jcc
+ +

+ + + + + +
+Class org.apache.jcc.PythonException extends java.lang.RuntimeException implements Serializable
+ +

+ + + + + +
+Serialized Fields
+ +

+withTrace

+
+boolean withTrace
+
+
+
+
+
+

+message

+
+java.lang.String message
+
+
+
+
+
+

+errorName

+
+java.lang.String errorName
+
+
+
+
+
+

+traceback

+
+java.lang.String traceback
+
+
+
+
+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/jcc/documentation/javadoc/stylesheet.css b/doc/jcc/documentation/javadoc/stylesheet.css new file mode 100644 index 0000000..6ea9e51 --- /dev/null +++ b/doc/jcc/documentation/javadoc/stylesheet.css @@ -0,0 +1,29 @@ +/* Javadoc style sheet */ + +/* Define colors, fonts and other style attributes here to override the defaults */ + +/* Page background color */ +body { background-color: #FFFFFF; color:#000000 } + +/* Headings */ +h1 { font-size: 145% } + +/* Table colors */ +.TableHeadingColor { background: #CCCCFF; color:#000000 } /* Dark mauve */ +.TableSubHeadingColor { background: #EEEEFF; color:#000000 } /* Light mauve */ +.TableRowColor { background: #FFFFFF; color:#000000 } /* White */ + +/* Font used in left-hand frame lists */ +.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } + +/* Navigation bar fonts and colors */ +.NavBarCell1 { background-color:#EEEEFF; color:#000000} /* Light mauve */ +.NavBarCell1Rev { background-color:#00008B; color:#FFFFFF} /* Dark Blue */ +.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;color:#000000;} +.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;color:#FFFFFF;} + +.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} +.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} + diff --git a/doc/jcc/documentation/readme.html b/doc/jcc/documentation/readme.html new file mode 100644 index 0000000..4c94ee9 --- /dev/null +++ b/doc/jcc/documentation/readme.html @@ -0,0 +1,1173 @@ + + + + + + + +JCC Features + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

JCC Features

+ + +
+
Warning
+
+ Before calling any PyLucene API that requires the Java VM, start it by + calling initVM(classpath, ...). More about this function + in here. +
+
+ + +

Installing JCC

+
+

+ JCC is a Python extension written in Python and C++. It requires a + Java Runtime Environment (JRE) to operate as it uses Java's + reflection APIs to do its work. It is built and installed + via distutils or setuptools. +

+

+ See installation for more + information and operating system specific notes. +

+
+ + +

Invoking JCC

+
+

+ JCC is installed as a package and how to invoke it depends on the + Python version used: +

+
    + +
  • python 2.7: python -m jcc +
  • + +
  • python 2.6: python -m jcc.__main__ +
  • + +
  • python 2.5: python -m jcc +
  • + +
  • python 2.4: +
      + +
    • no setuptools: python site-packages/jcc/__init__.py +
    • + +
    • with setuptools: python site-packages/jcc egg directory/jcc/__init__.py +
    • + +
    + +
  • + +
  • python 2.3: python site-packages/jcc egg directory/jcc/__init__.py +
  • + +
+
+ + +

Generating C++ and Python wrappers with JCC

+
+

+ JCC started as a C++ code generator for hiding the gory details of + accessing methods and fields on Java classes via + Java's Native Invocation Interface. + These C++ wrappers make it possible to access a Java object as if it + was a regular C++ object very much like GCJ's + CNI + interface. +

+

+ It then became apparent that JCC could also generate the C++ + wrappers for making these classes available to Python. Every class + that gets thus wrapped becomes a + CPython + type. +

+

+ JCC generates wrappers for all public classes that are requested by + name on the command line or via the --jar command line + argument. It generates wrapper methods for all public methods and + fields on these classes whose return type and parameter types are + found in one of the following ways: +

+
    + +
  • + the type is one of the requested classes +
  • + +
  • + the type is one of the requested classes' superclass or implemented + interfaces +
  • + +
  • + the type is available from one of the packages listed via the + --package command line argument +
  • + +
+

+ Overloaded methods are supported and are selected at runtime on the + basis of the type and number of arguments passed in. +

+

+ JCC does not generate wrappers for methods or fields which don't + satisfy these requirements. Thus, JCC can avoid generating code for + runaway transitive closures of type dependencies. +

+

+ JCC generates property accessors for a property + called field when it finds Java methods + named setField(value), + getField() or + isField(). +

+

+ The C++ wrappers are declared in a C++ namespace structure that + mirrors the Java classes' Java packages. The Python types are + declared in a flat namespace at the top level of the resulting + Python extension module. +

+

+ JCC's command-line arguments are best illustrated via the PyLucene + example: +

+
+    $ python -m jcc           # run JCC to wrap
+        --jar lucene.jar      # all public classes in the lucene jar file
+        --jar analyzers.jar   # and the lucene analyzers contrib package
+        --jar snowball.jar    # and the snowball contrib package
+        --jar highlighter.jar # and the highlighter contrib package
+        --jar regex.jar       # and the regex search contrib package
+        --jar queries.jar     # and the queries contrib package
+        --jar extensions.jar  # and the Python extensions package
+        --package java.lang   # including all dependencies found in the 
+                              # java.lang package
+        --package java.util   # and the java.util package
+        --package java.io     # and the java.io package
+          java.lang.System    # and to explicitely wrap java.lang.System
+          java.lang.Runtime   # as well as java.lang.Runtime
+          java.lang.Boolean   # and java.lang.Boolean
+          java.lang.Byte      # and java.lang.Byte
+          java.lang.Character # and java.lang.Character
+          java.lang.Integer   # and java.lang.Integer
+          java.lang.Short     # and java.lang.Short
+          java.lang.Long      # and java.lang.Long
+          java.lang.Double    # and java.lang.Double
+          java.lang.Float     # and java.lang.Float
+          java.text.SimpleDateFormat
+                              # and java.text.SimpleDateFormat
+          java.io.StringReader
+                              # and java.io.StringReader
+          java.io.InputStreamReader
+                              # and java.io.InputStreamReader
+          java.io.FileInputStream
+                              # and java.io.FileInputStream
+          java.util.Arrays    # and java.util.Arrays
+        --exclude org.apache.lucene.queryParser.Token
+                              # while explicitely not wrapping
+                              # org.apache.lucene.queryParser.Token
+        --exclude org.apache.lucene.queryParser.TokenMgrError
+                              # nor org.apache.lucene.queryParser.TokenMgrError
+        --exclude org.apache.lucene.queryParser.ParseException
+                              # nor.apache.lucene.queryParser.ParseException
+        --python lucene       # generating Python wrappers into a module
+                              # called lucene
+        --version 2.4.0       # giving the Python extension egg version 2.4.0
+        --mapping org.apache.lucene.document.Document 
+                  'get:(Ljava/lang/String;)Ljava/lang/String;' 
+                              # asking for a Python mapping protocol wrapper
+                              # for get access on the Document class by
+                              # calling its get method
+        --mapping java.util.Properties 
+                  'getProperty:(Ljava/lang/String;)Ljava/lang/String;'
+                              # asking for a Python mapping protocol wrapper
+                              # for get access on the Properties class by
+                              # calling its getProperty method
+        --sequence org.apache.lucene.search.Hits
+                   'length:()I' 
+                   'doc:(I)Lorg/apache/lucene/document/Document;'
+                              # asking for a Python sequence protocol wrapper
+                              # for length and get access on the Hits class by
+                              # calling its length and doc methods
+        --files 2             # generating all C++ classes into about 2 .cpp
+                              # files
+        --build               # and finally compiling the generated C++ code
+                              # into a Python egg via setuptools - when
+                              # installed - or a regular Python extension via
+                              # distutils or setuptools otherwise 
+        --module collections.py
+                              # copying the collections.py module into the egg
+        --install             # installing it into Python's site-packages
+                              # directory.
+      
+

+ There are limits to both how many files can fit on the command line + and how large a C++ file the C++ compiler can handle. By default, + JCC generates one large C++ file containing the source code for all + wrapper classes. +

+

+ Using the --files command line argument, this behaviour + can be tuned to workaround various limits:
+ for example: +

+
    + +
  • + to break up the large wrapper class file into about 2 files:
    + +--files 2 + +
  • + +
  • + to break up the large wrapper class file into about 10 files:
    + + --files 10 + +
  • + +
  • + to generate one C++ file per Java class wrapped:
    + +--files separate + +
  • + +
+

+ The --prefix and --root arguments are + passed through to distutils' setup(). +

+
+ + +

Classpath considerations

+
+

+ When generating wrappers for Python, the JAR files passed to JCC + via --jar are copied into the resulting Python extension + egg as resources and added to the extension + module's CLASSPATH variable. Classes or JAR files that + are required by the classes contained in the argument JAR files need + to be made findable via JCC's --classpath command line + argument. At runtime, these need to be appended to the + extension's CLASSPATH variable before starting the VM + with initVM(CLASSPATH). +

+

+ To have such required jar files also automatically copied into + resulting Python extension egg and added to the classpath at build + and runtime, use the --include option. This option + works like the --jar option except that no wrappers are + generated for the classes contained in them unless they're + explicitely named on the command line. +

+

+ When more than one JCC-built extension module is going to be used in + the same Python VM and these extension modules share Java classes, + only one extension module should be generated with wrappers for these + shared classes. The other extension modules must be built by importing + the one with the shared classes by using the --import + command line parameter. This ensures that only one copy of the + wrappers for the shared classes are generated and that they are + compatible among all extension modules sharing them. +

+
+ + +

Using distutils vs setuptools

+
+

+ By default, when building a Python extension, + if setuptools is found to be installed, it is used + over distutils. If you want to force the use + of distutils over setuptools, use + the --use-distutils command line argument. +

+
+ + +

Distributing an egg

+
+

+ The --bdist option can be used to ask JCC to + invoke distutils with bdist + or setuptools + with bdist_egg. If setuptools is used, + the resulting egg has to be installed with the + easy_install + installer which is normally part of a Python installation that + includes setuptools. +

+
+ + +

JCC's runtime API functions

+
+

+ JCC includes a small runtime component that is compiled into any + Python extension it produces. +

+

+ This runtime component makes it possible to manage the Java VM from + Python. Because a Java VM can be configured with a myriad of + options, it is not automatically started when the resulting Python + extension module is loaded into the Python interpreter. +

+

+ Instead, the initVM() function must be called from the + main thread before using any of the wrapped classes. It takes the + following keyword arguments: +

+
    + +
  • + +classpath +
    + A string containing one or more directories or jar files for the + Java VM to search for classes. Every Python extension produced by + JCC exports a CLASSPATH variable that is hardcoded to + the jar files that it was produced from. A copy of each jar file + is installed as a resource file with the extension when JCC is + invoked with the --install command line argument. + This parameter is optional and defaults to the + CLASSPATH string exported by the module + initVM is imported from. +
    +            >>> import lucene
    +            >>> lucene.initVM(classpath=lucene.CLASSPATH)
    +          
    + +
  • + +
  • + +initialheap +
    + The initial amount of Java heap to start the Java VM with. This + argument is a string that follows the same syntax as the + similar -Xms java command line argument. +
    +            >>> import lucene
    +            >>> lucene.initVM(initialheap='32m')
    +            >>> lucene.Runtime.getRuntime().totalMemory()
    +            33357824L
    +          
    + +
  • + +
  • + +maxheap +
    + The maximum amount of Java heap that could become available to the + Java VM. This argument is a string that follows the same syntax as + the similar -Xmx java command line argument. +
  • + +
  • + +maxstack +
    + The maximum amount of stack space that available to the Java + VM. This argument is a string that follows the same syntax as the + similar -Xss java command line argument. +
  • + +
  • + +vmargs +
    + A string of comma separated additional options to pass to the VM + startup rountine. These are passed through as-is. For example: +
    +            >>> import lucene
    +            >>> lucene.initVM(vmargs='-Xcheck:jni,-verbose:jni,-verbose:gc')
    +          
    + +
  • + +
+

+ The initVM() and getVMEnv() functions + return a JCCEnv object that has a few utility methods on it: +

+
    + +
  • + +attachCurrentThread(name, asDaemon) +
    + Before a thread created in Python or elsewhere but not in the Java + VM can be used with the Java VM, this method needs to be + invoked. The two arguments it takes are optional and + self-explanatory. +
  • + +
  • + +detachCurrentThread() + The opposite of attachCurrentThread(). This method + should be used with extreme caution as Python's and java VM's + garbage collectors may use a thread detached too early causing a + system crash. The utility of this method seems dubious at the + moment. +
  • + +
+

+ There are several differences between JNI's findClass() + and Java's Class.forName(): +

+
    + +
  • + className is a '/' separated string of names +
  • + +
  • + the class loaders are different, findClass() may find + classes that Class.forName() won't. +
  • + +
+

+ For example: +

+
+        >>> from lucene import *
+        >>> initVM(CLASSPATH)
+        >>> findClass('org/apache/lucene/document/Document')
+        <Class: class org.apache.lucene.document.Document>
+        >>> Class.forName('org.apache.lucene.document.Document')
+        Traceback (most recent call last):
+          File "<stdin>", line 1, in <module>
+        lucene.JavaError: java.lang.ClassNotFoundException:
+                          org/apache/lucene/document/Document
+        >>> Class.forName('java.lang.Object')
+        <Class: class java.lang.Object>
+      
+
+ + +

Type casting and instance checks

+
+

+ Many Java APIs are declared to return types that are less specific + than the types actually returned. In Java 1.5, this is worked around + with type parameters. JCC generates code to heed type parameters + unless the --no-generics is used. See next section for + details on Java generics support. +

+

+ In C++, casting the object into its actual type is supported via the + regular C casting operator. +

+

+ In Python each wrapped class has a class method + called cast_ that implements the same functionality. +

+

+ Similarly, each wrapped class has a class method + called instance_ that tests whether the wrapped java + instance is of the given type. For example: +

+
+	if BooleanQuery.instance_(query):
+            booleanQuery = BooleanQuery.cast_(query)
+
+        print booleanQuery.getClauses()
+      
+
+ + +

Handling generic classes

+
+

+ Java 1.5 added support for parameterized types. JCC generates code + to heed type parameters unless the --no-generics + command line parameter is used. Java type parameterization is a + runtime feature. The same class is used for all its + parameterizations. Similarly, JCC wrapper objects all use the same + class but store type parameterizations on instances and make them + accessible as a tuple via the parameters_ property. +

+

+ For example, an ArrayList<Document> instance, + has (<type 'Document'>,) + for parameters_ and its get() method uses + that type parameter to wrap its return values. +

+

+ To allocate an instance of a generic Java class with specific type + parameters use the of_() method. This method accepts + one or more Python wrapper classes to use as type parameters. For + example, java.util.ArrayList<E> is declared to + accept one type parameter. Its wrapper's of_() method + hence accepts one parameter, a Python class, to use as type + parameter for the return type of its get() method, among + others: +

+
+	>>> a = ArrayList().of_(Document)
+	>>> a
+        <ArrayList: []>
+	>>> a.parameters_
+	(<type 'Document'>,)
+        >>> a.add(Document())
+        True
+        >>> a.get(0)
+        <Document: Document<>>
+      
+

+ The use of type parameters is, of course, optional. A generic Java + class can still be used as before, without type parameters. + Downcasting from Object is then necessary: +

+
+	>>> a = ArrayList()
+	>>> a
+        <ArrayList: []>
+	>>> a.parameters_
+	(None,)
+        >>> a.add(Document())
+        True
+        >>> a.get(0)
+        <Object: Document<>>
+        >>> Document.cast_(a.get(0))
+        <Document: Document<>>
+      
+
+ + +

Handling arrays

+
+

+ Java arrays are wrapped with a C++ JArray + template. The [] is available for read + access. This template, JArray<T>, accomodates all + java primitive types, jstring, jobject and + wrapper class arrays. +

+

+ Java arrays are returned to Python in a JArray wrapper + instance that implements the Python sequence protocol. It is + possible to change an array's elements but not to change an array's + size. +

+

+ To convert a char array to a Python string use + a ''.join(array) construct. +

+

+ Any Java method expecting an array can be called with the corresponding + sequence object from python. +

+

+ To instantiate a Java array from Python, use one of the following + forms: +

+
+	>>> array = JArray('int')(size)
+	# the resulting Java int array is initialized with zeroes
+
+	>>> array = JArray('int')(sequence)
+	# the sequence must only contain ints
+	# the resulting Java int array contains the ints in the sequence
+      
+

+ Instead of 'int', you may also use one + of 'object', 'string', 'bool', + 'byte', 'char', 'double', + 'float', 'long' and 'short' + to create an array of the corresponding type. +

+

+ Because there is only one wrapper class for object arrays, + the JArray('object') type's constructor takes a second + argument denoting the class of the object elements. This argument is + optional and defaults to Object. +

+

+ As with the Object types, the JArray types + also include a cast_ method. This method becomes useful + when the array returned to Python is wrapped as a + plain Object. This is the case, for example, with + nested arrays since there is no distinct Python type for every + different java object array class - all java object arrays are + wrapped by JArray('object'). For example: +

+
+	# cast obj to an array of ints
+        >>> JArray('int').cast_(obj)
+	# cast obj to an array of Document
+        >>> JArray('object').cast_(obj, Document)
+      
+

+ In both cases, the java type of obj must be compatible with the + array type it is being cast to. +

+
+	# using nested array:
+
+        >>> d = JArray('object')(1, Document)
+        >>> d[0] = Document()
+        >>> d
+        JArray<object>[<Document: Document<>>]
+        >>> d[0]
+        <Document: Document<>>
+        >>> a = JArray('object')(2)
+        >>> a[0] = d
+        >>> a[1] = JArray('int')([0, 1, 2])
+        >>> a
+        JArray<object>[<Object: [Lorg.apache.lucene.document.Document;@694f12>, <Object: [I@234265>]
+        >>> a[0]
+        <Object: [Lorg.apache.lucene.document.Document;@694f12>
+        >>> a[1]
+        <Object: [I@234265>
+        >>> JArray('object').cast_(a[0])[0]
+        <Object: Document<>>
+        >>> JArray('object').cast_(a[0], Document)[0]
+        <Document: Document<>>
+        >>> JArray('int').cast_(a[1])
+        JArray<int>[0, 1, 2]
+        >>> JArray('int').cast_(a[1])[0]
+        0
+      
+

+ To verify that a Java object is of a given array type, use + the instance_() method available on the array + type. This is not the same as verifying that it is assignable with + elements of a given type. For example, using the arrays created + above: +

+
+	# is d array of Object ? are d's elements of type Object ?
+        >>> JArray('object').instance_(d)
+        True
+
+	# can it receive Object instances ?
+        >>> JArray('object').assignable_(d)
+        False
+
+	# is it array of Document ? are d's elements of type Document ?
+        >>> JArray('object').instance_(d, Document)
+        True
+
+	# is it array of Class ? are d's elements of type Class ?
+        >>> JArray('object').instance_(d, Class)
+        False
+
+	# can it receive Document instances ?
+        >>> JArray('object').assignable_(d, Document)
+        True
+      
+
+ + +

Exception reporting

+
+

+ Exceptions that occur in the Java VM and that escape to C++ are + reported as a javaError C++ exception. When using + Python wrappers, the C++ exceptions are handled and reported with + Python exceptions. When using C++ only, failure to handle the + exception in your C++ code will cause the process to crash. +

+

+ Exceptions that occur in the Java VM and that escape to the Python + VM are reported with a JavaError python exception + object. The getJavaException() method can be called + on JavaError objects to obtain the original java + exception object wrapped as any other Java object. This Java object + can be used to obtain a Java stack trace for the error, for example. +

+

+ Exceptions that occur in the Python VM and that escape to the Java + VM, as for example can happen in Python extensions (see topic below) + are reported to the Java VM as a RuntimeException or as + a PythonException when using shared + mode. See installation + instructions for more information about shared mode. +

+
+ + +

Writing Java class extensions in Python

+
+

+ JCC makes it relatively easy to extend a Java class from + Python. This is done via an intermediary class written in Java that + implements a special method called pythonExtension() + and that declares a number of native methods that are to be + implemented by the actual Python extension. +

+

+ When JCC sees these special extension java classes it generates the + C++ code implementing the native methods they declare. These native + methods call the corresponding Python method implementations passing + in parameters and returning the result to the Java VM caller. +

+

+ For example, to implement a Lucene analyzer in Python, one would + implement first such an extension class in Java: +

+
+    package org.apache.pylucene.analysis;
+
+    import org.apache.lucene.analysis.Analyzer;
+    import org.apache.lucene.analysis.TokenStream;
+    import java.io.Reader;
+
+    public class PythonAnalyzer extends Analyzer {
+        private long pythonObject;
+
+        public PythonAnalyzer()
+        {
+        }
+
+        public void pythonExtension(long pythonObject)
+        {
+            this.pythonObject = pythonObject;
+        }
+        public long pythonExtension()
+        {
+            return this.pythonObject;
+        }
+
+        public void finalize()
+            throws Throwable
+        {
+            pythonDecRef();
+        }
+
+        public native void pythonDecRef();
+        public native TokenStream tokenStream(String fieldName, Reader reader);
+    }
+      
+

+ The pythonExtension() methods is what makes this class + recognized as an extension class by JCC. They should be included + verbatim as above along with the declaration of + the pythonObject instance variable. +

+

+ The implementation of the native pythonDecRef() method + is generated by JCC and is necessary because it seems + that finalize() cannot itself be native. Since an + extension class wraps the Python instance object it's going to be + calling methods on, its ref count needs to be decremented when this + Java wrapper class disappears. A declaration + for pythonDecRef() and a finalize() + implementation should always be included verbatim as above. +

+

+ Really, the only non boilerplate user input is the constructor of the + class and the other native methods, tokenStream() in + the example above. +

+

+ The corresponding Python class(es) are implemented as follows: +

+
+        class _analyzer(PythonAnalyzer):
+            def tokenStream(_self, fieldName, reader):
+                class _tokenStream(PythonTokenStream):
+                    def __init__(self_):
+                        super(_tokenStream, self_).__init__()
+                        self_.TOKENS = ["1", "2", "3", "4", "5"]
+                        self_.INCREMENTS = [1, 2, 1, 0, 1]
+                        self_.i = 0
+                        self_.posIncrAtt = self_.addAttribute(PositionIncrementAttribute.class_)
+                        self_.termAtt = self_.addAttribute(TermAttribute.class_)
+                        self_.offsetAtt = self_.addAttribute(OffsetAttribute.class_)
+                    def incrementToken(self_):
+                        if self_.i == len(self_.TOKENS):
+                            return False
+                        self_.termAtt.setTermBuffer(self_.TOKENS[self_.i])
+                        self_.offsetAtt.setOffset(self_.i, self_.i)
+                        self_.posIncrAtt.setPositionIncrement(self_.INCREMENTS[self_.i])
+                        self_.i += 1
+                        return True
+                    def end(self_):
+                        pass
+                    def reset(self_):
+                        pass
+                    def close(self_):
+                        pass
+                return _tokenStream()
+      
+

+ When an __init__() is declared, super() + must be called or else the Java wrapper class will not know about + the Python instance it needs to invoke. +

+

+ When a java extension class declares native methods for which there + are public or protected equivalents available on the parent class, + JCC generates code that makes it possible to + call super() on these methods from Python as well. +

+

+ There are a number of extension examples available in PyLucene's test + suite + and samples. +

+
+ + +

Embedding a Python VM in a Java VM

+
+

+ Using the same techniques used when writing a Python extension of a + Java class, JCC may also be used to embed a Python VM in a Java VM. + Following are the steps and constraints to follow to achieve this: +

+
    + +
  • + JCC must be built in shared mode. See + installation + instructions for more information about shared mode. + Note that for this use on Mac OS X, JCC must also be built + with the link flags "-framework", "Python" in + the LFLAGS value. +
  • + +
  • + As described in the previous section, define one or more Java + classes to be "extended" from Python to provide the + implementations of the native methods declared on them. Instances + of these classes implement the bridges into the Python VM from + Java. +
  • + +
  • + The org.apache.jcc.PythonVM Java class is going be + used from the Java VM's main thread to initialize the embedded + Python VM. This class is installed inside the JCC egg under the + jcc/classes directory and the full path to this + directory must be on the Java CLASSPATH. +
  • + +
  • + The JCC egg directory contains the JCC shared runtime library - not + the JCC Python extension shared library - but a library + called libjcc.dylib on Mac OS X, + libjcc.so on Linux or jcc.dll on Windows. + This directory must be added to the Java VM's shared library path + via the -Djava.library.path command line parameter. +
  • + +
  • + In the Java VM's main thread, initialize the Python VM by + calling its static start() method passing it a + Python program name string and optional start-up arguments + in a string array that will be made accessible in Python via + sys.argv. Note that the program name string is + purely informational, and is not used by the + start() code other than to initialize that + Python variable. This method returns the singleton PythonVM + instance to be used in this Java VM. start() + may be called multiple times; it will always return the same + singleton instance. This instance may also be retrieved at any + later time via the static get() method defined + on the org.apache.jcc.PythonVM class. +
  • + +
  • + Any Java VM thread that is going to be calling into the Python VM + should start with acquiring a reference to the Python thread state + object by calling acquireThreadState() method on the + Python VM instance. It should then release the Python thread state + before terminating by calling releaseThreadState(). + Calling these methods is optional but strongly recommended as it + ensures that Python is not creating and throwing away a thread + state everytime the Python VM is entered and exited from a given + Java VM thread. +
  • + +
  • + Any Java VM thread may instantiate a Python object for which an + extension class was defined in Java as described in the previous + section by calling the instantiate() method on the + PythonVM instance. This method takes two string parameters, the + name of the Python module and the name of the Python class to + import and instantiate from it. The __init__() + constructor on this class must be callable without any parameters + and, if defined, must call super() in order to + initialize the Java side. The instantiate() method is + declared to return java.lang.Object but the return + value is actually an instance of the Java extension class used and + must be downcast to it. +
  • + +
+
+ + +

Pythonic protocols

+
+

+ When generating wrappers for Python, JCC attempts to detect which + classes can be made iterable: +

+
    + +
  • + When a class declares to + implement java.lang.Iterable, JCC makes it iterable + from Python. +
  • + +
  • + When a Java class declares a method called next() + with no arguments returning an object type, this class is made + iterable. Its next() method is assumed to terminate + iteration by returning null. +
  • + +
+

+ JCC generates a Python mapping get method for a class when requested + to do so via the --mapping command line option which + takes two arguments, the class to generate the mapping get for and + the Java method to use. The method is specified with its name + followed by ':' and its Java + signature. +

+

+ For example, System.getProperties()['java.class.path'] is + made possible by: +

+
+        --mapping java.util.Properties 
+                  'getProperty:(Ljava/lang/String;)Ljava/lang/String;'
+                              # asking for a Python mapping protocol wrapper
+                              # for get access on the Properties class by
+                              # calling its getProperty method
+      
+

+ JCC generates Python sequence length and get methods for a class + when requested to do so via the --sequence command line + option which takes three arguments, the class to generate the + sequence length and get for and the two java methods to use. The + methods are specified with their name followed by ':' and their Java + signature. For example: +

+
+      for i in xrange(len(hits)): 
+          doc = hits[i]
+          ...
+      
+

+ is made possible by: +

+
+        --sequence org.apache.lucene.search.Hits
+                   'length:()I' 
+                   'doc:(I)Lorg/apache/lucene/document/Document;'
+      
+
+ +
+ +
 
+
+ + + diff --git a/doc/jcc/documentation/readme.pdf b/doc/jcc/documentation/readme.pdf new file mode 100644 index 0000000..30b1aec --- /dev/null +++ b/doc/jcc/documentation/readme.pdf @@ -0,0 +1,950 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 1002 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!$F9lldX&;KZQ'tVq`9N=Y,5)tJh[)l!Sg*pkLD%?u>D'fCa&5N/l.fm']EJuuCOHMt5]IZ/u];sJ-1jO<0!9-\(fb@=XdLB-J?g0[UuPci+j>/76Ket_Z`so%.?0;ZoDS5hX\D'f(n]_L5nX!@6Y)>;i"ToB,KIqrkL8L"Z0fNsg`>Ye31Mf^<"Ke8%Md>J.O]nr[-d@*0W0`-294fBY+OaA70NO851TCbmDTNuY3scr5]1\t="-\JL]snF0+BHgV[\0e[Wk>MXfG\S,_d<-h:+KJg16hHc0?fP7kNFu4Sk)PfOj+!TrMq__Ta?F!f$_1NZNbGGH3Npq:(H<.TA,]L4aE/YcOg>2H\I!@uR4njlH)a9m<$fL9+nGB-^19d\;e2KL..]M(!acosKB36.]3SS^=I!Anl4nf0bB^?)a`8]Pf(TuSfN89Mcre\Q)rn#-Z#,!8(\4.(lcNdFAb163L=U3UC5i#BV.K7]M`%+oKDlYoCd`^~> +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +14 0 R +16 0 R +18 0 R +20 0 R +22 0 R +24 0 R +26 0 R +28 0 R +30 0 R +32 0 R +34 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 179.012 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 541.466 177.008 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 523.266 340.184 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 505.066 228.992 493.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 15 0 R +/H /I +>> +endobj +16 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 486.866 245.024 474.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 17 0 R +/H /I +>> +endobj +18 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 468.666 202.664 456.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 19 0 R +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 450.466 247.832 438.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 21 0 R +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 432.266 269.96 420.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 23 0 R +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 414.066 229.304 402.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 25 0 R +/H /I +>> +endobj +26 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 395.866 191.98 383.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 27 0 R +/H /I +>> +endobj +28 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 377.666 210.652 365.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 29 0 R +/H /I +>> +endobj +30 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 359.466 305.992 347.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 31 0 R +/H /I +>> +endobj +32 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 341.266 305.32 329.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 33 0 R +/H /I +>> +endobj +34 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 323.066 205.336 311.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 35 0 R +/H /I +>> +endobj +36 0 obj +<< /Length 2646 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm>=`<(T&q9SYd+^UK2(__UQ+)S^#;J,hCgtUPN!BQhAp5b&90JHl^&Ib1'Qk*9;3i"3_["kp]BWl@Pn3U*+7Cl^\o:7!_EUP=3T\O@2nj]\/uT^$f*QgHIWETik7r,27%I4::Da&FpgE,=-)C#><81ua:f24-B\X.qg'V%0o,1aZ]=n^pd+`^2[hmQI_7-/8:9,ZO$00>0_>3-^5GHBNI/!W2o;?YZVrbbD,j(>GrGSh)F;qba/(=?RGpHMNZGq,-'44t[\>EG>q\uM<,rO4XI'.C9-diKGZ,ZWdo_bn"57`Zu0ButGb.g\c.4bI,Y19K0r"u0H0RF)Y>>niC);i%0(I*Nn%7N$VuF(=3^1@S5C5St+$#Fu&b6m>"4mnbni56.Kq`Ad/\i1\L)>Y"H.a&r>MQ?d%prEQ3WCmoWWUDN`<=PWiF$hhAF71*6^IQQY6HB=ooe].H9c`OGooB3J3nq5/KR"rksD]_t"olXUMIhtUXMAhgQ2.,UG$7i-`qaZ?7fk,8(.=ak;][0B(AY=h(7JYEl/0)7!;ju5]GL&7gMb)3i9Ts]6`T9)p#cbmi7)>pq`sa;a[&QoERT0WV`cMVa!BQr*aot)T@o0h#5u>]"FKCC0;3L,5#pG`0!t>QT+8@Ib%DLakpCo\-&<1!'Xb2'IC[`KP#-XW=+L1UPd((mMYTRVU$m]'=YV&TS"2rY.5!0*,d>W+p$#LeYohs?2JC[5YUt[^DG4MAne_c4R*0;rnJ87;iEWr!gduIf?F?UL*+./s5KhIXFhkB/qnC"I`?imXqV>lV^mMl5PI_E,'R`2d*%A]ppjNNn+94^K/3MY#kisl"r?Fm?[/g%Jmqig?7;0.Q=Co<"O*M,Sj5WG$q;^M9VD(X+gIqVThen=db*6$Ms(=#n0T"\!!W;TV(@_(YVRE2h@T:HQc5!/rg`\BdM_CH%lYO@4sm>Y>$eVP:bBc\\3#t$%5,ADHrO%^j!-Ru]]qX5U-g)`r,>EsrEDG$>\N7[2u0WG":XbXQu(Um1IjThV_YHB`-D@XE2jfZ9lbEF.X/LEg;"C'mXcLlYM'G-Vb(0Y08l5!Vk6OmNKL%_P^.0tC.5C(eD?XgT&m3g5\Pf*1"kgl](&b^EWE):P93`9(.%=,0,WVV<l&c=^O,7iKLUGH'-)0mk,UOK!RoMB2m8`l5&%u)]0cS9;EcDL9Ycc/YW[tb/cfh['X-OKKQ!n#tCf&`ie*b"7[*__=duE<9ULi.:WA:ha[:mRN^=Cp]D3@:fQ,2`._7-p3ZV4t)Tg`Vo2O0]4SMQ&Zk)'F?U!3j%*_C"B)ohQT_ub(V_-joaTt3)F2qEpH!u%L0Dif7j[\7F]6qN@SV+qoH-3MS8$&>:kb+VnFUI;\n)'Zn\fN?pr]9;>/bF>g,`[nn3DY1N["/V]3/^(7ek3g!1`oq]$$ho!N8P@0!X8X6]"34*l_BJ>kP)]]Vs&4Z.n-#MeA&Z\fnalnpg2d5!2)X[,\"H&:GGe-!(7W@-?lVM@IQ/VZos,9Yj3u8D[Ba0s+q/k9Hl+W-Gf+Mi@[MSUSMd6$E'YPZq&mq&&ZG_.#ki"o?,oL)Ns*EiI`;S:qF'-0+H%(F5OWL&O4>!6T'5\aODKa)*PfJ8FeSNf)!b60WN]bAi_jJCjt'ZgA(#6NNLqP:+m*P9@hdli\7IP!'dX#JN5\PMBJ$a^U#8LHR?fm2d^QK3>"?9!che^/9es?6i](MI7HO)4,[G&SeNVY^DuG%@:4taSZN-SDm>6PmLjXT0l&t1/49/'Vl'r1geH5B9GE'H%b5^6XGlQS^V7jMd^3=15r'H3D%-rf!PG]P^R)hB)-onQHf5_Y#!^+%c<"(37(G,u`R0pK=*m2WXIgnb-hZ'+EqsU6g-YE*H5(!s(q9[tcF9!#qh;bh2NRi/QCR\ +endstream +endobj +37 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 36 0 R +/Annots 38 0 R +>> +endobj +38 0 obj +[ +39 0 R +40 0 R +41 0 R +42 0 R +43 0 R +44 0 R +] +endobj +39 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 161.448 631.59 175.216 623.59 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 21 0 R +/H /I +>> +endobj +40 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 525.656 163.668 513.656 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../../jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +41 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 244.128 276.988 376.08 264.988 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/invocation.html) +/S /URI >> +/H /I +>> +endobj +42 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 506.292 263.788 526.956 251.788 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://gcc.gnu.org/onlinedocs/gcj/About-CNI.html) +/S /URI >> +/H /I +>> +endobj +43 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 250.588 131.976 238.588 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://gcc.gnu.org/onlinedocs/gcj/About-CNI.html) +/S /URI >> +/H /I +>> +endobj +44 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 446.94 216.188 512.616 204.188 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://docs.python.org/ext/defining-new-types.html) +/S /URI >> +/H /I +>> +endobj +45 0 obj +<< /Length 2178 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GauHM>Ar7S'Roe[&C:GPd=rC8qqfnG/m8rkfD'49""i!13_=EBaf51'GBC*4.+*pC!CIF=Pk=KnkO;`AGDk/CMqRhB[5ROO@2@P7[X?tf:Y2Y^hg1;Y>i/Gr>0d,;]j&_;cDa5MgfgdlFp#)0LKZ:Zo*Gk3CQVbLh)mCR]s@XG(7tTfSNo[F[DNL5gE!$DMHA@\CF%f%LE0PE7K6ZUk_<=e#f\Cb]b3a_M'.GsdeS=%u&m_DhlO`f?f&C7jV5SQ(24)^N$Am#NWQ7K1D3Mb5Xa,;F4]k+(lg3T_KO&:&^?8)1QRTMZ<\h;30=YiUb1;SBm?7=qmJg8)7#\?3ULbG+Uqg.U*6khYLp)!]p`WOVSJG.)Lo=d/J-2f)o%dHW_t`3Ar&tMO-kb8&]Z`U^43E&NVs$Hukj7']48m-LhlCaBp.E"YZ'KIF-2A$o1GLRWMAn20SrTCDYa*Ggh:&"2l0"1>g8c26oK1Dp\)r--k]brkfg<-EVN!,\=8OLcCYgV8PC_d2ZaEFs::M4ANg4/Xb2qRASR*n?B)E$&;S+IQWA'=idoSb,<$0:K+O`e#ki/4J#3Y0[s5`IP?n,*(U;Zd,S0";bB[K<]TlR:QU)9%V`-eV1$q:H%n-nB2H`U&k,J!'r%3rSq7&j6d/e6hB44WA'i-(QuD^YG]gQXRP*.()P1G;+f"Q>4L'OZ#BCGH@o5pnta*^SIk$=BnlG(J.U'""T"`"ZVIck#0YrZHGPq0e/O7X811tOK->WOY?FaJW`1W86Z'f';F#rPQSQ+O;IO`%B^g#LG_ljZ20QHG5<$u0BGOl?=-'GNVA8k7J"fg-Q2+\(!fI^VGhqH\g6$r8_2pZbqp^&CNUJA]]oqmS\K["3eUmfM#Pq8'cg9`Zo)289A`l,4)jl6u4f[46t'f;!(kHYF]i_q)&D6hiln-c!Ed;E4-$_a1cbn!D()6#'i&O/W(QXZhSeh$-`MB]b7>_VRqYhY@-!KT?G?9Li!WV;\D;`__m2G1<<#bDANq,,!o)^IsSC2ofXZODi0W"8s;A4X&WB!9K<>R0IUMR$L9Lj3uUg/e2J?_&Z6"l(M7u[-qdtYp84+Was=YY-"5^X2!bKoib9460F:V,Ehpk;2X_'9II*cm@^U-V+;-7C,S,MP6;Pq0HspZV"F?P;Tb<4Gt%4e6PqL(.H&J#)bE%HY?=O6r/?F9@+l]71_pPp^Y4@TU?"K-,YLCueo-Dasr+>(C95I3p@3q2mLlcMTCD)4,)nWsESbWVhBad+Lt)H,SS1J9"(LK^3d^'_Rm\PgHmTZ$FSi%_oJa2"UkO)tRJo3G>t;XeL[L.=i!>XmbQ*BPl+,uL=VR6so5ct4U7,BR0gND.Rn]q$qeHKW]U>$r0loU(?Df9.iI7tK!fS%7HgqZ7H@H.Ji5f;K_>X^sSt5pl3g@;Xj#e'93jEhR;k_S3C2[$TntWlI[A!g3`6#&KAE4VIR^'tV2m1Wc3C*E:iB2uD-:qrWHTZsQ+c(A4`rX[Ker%8&9[hog0KH!j+m7e6]4IDn(8tJ/%T`~> +endstream +endobj +46 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 45 0 R +>> +endobj +47 0 obj +<< /Length 1846 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GauHL966Um&AII3:mH[oEn&8pGU4_AZ2CPr8r<9XjabudQC*C!!0['5VdTcHQ9hrj=%XcqAffe,of(\i]O%5D=eC++,>O-2DpKUD>fZSer6)eIq9ib*"Pf-V\NB][m'WGRK$OPNF)[)BJ@>I[I,d1AFd<:4HL"Zin;mZ*26\InCW'?a!==I5XJEO%/*3n&*?-rSe,90t6*V`7QnCNBF`!/`%uNAf3)oeS1bWog;^Taq'K,fc+&9;)\ns0;L7/8.WMkV$,aP(&X>Xme%uD?7\g9`k-;JfmV''X+M\%/b9u.ZFgYW:?6F2+.ri'p^a"cU)2m7//.o0K'=.bl0njrWHuEPQSppT4`c+ZqQl!:4LG=`Q`4ggXP_(uXcOXEBM>9)NL/=Q7.elHXoO7VXm^Nt`d*u"r.nl'f=n*sRb@CkH3jhnm>tf)FM_N-7E1cfN_m$q8;DSLZBnu0qUd#^!\NnUb9BX[BT`:<"-TH&@$=S54HanNCHb]anb?!?^Uf(>E@e"i\M)r2P2M9c\qgQS2-b&^-0Y?.m^@PnjW3"()]BP/G*GU<=[`YX#a0ELfQ-_P,MjCU1SV0QNf"h/'D!I5UR];XP&u^>/Qu[*8NM_e8Ba%IjjB)C4FahVuq5".>e&`GT\HGtm"oba;Oi\iJ8OR2`EB2"+6KG:r0%@.Vm:'if\1G8e(F,Rc-M-BI7%k)YLs"[J+Nf+dFRZWYI3Gk2!Jaf;p(l]p9o)`B!s=[$A]]of1N6bnF?6MKgkBb6].]k^SdYU9TH3J<\NV`Yrca`b8al(,XcQYm0=SAYM2#I^CebtZdl16amAF*lWfK-qORaX.btEs(k<0C5$-`BM9*lpNAF8E9fZ`E"00V*'?9[CZ&Olj(,J*ts\A?5U6!eIH@'D[RHU'MD.Dch8h+CUpcV5Mq`3>1V-)Hl.Qc>T?85caI"Nikc\98"haD<4MfOjaOD=dpQ6cRVWI.%)F`cRM3_3Q+IJ5D9O,o&%'Ak@aS_s8r[]<[edL$qf2_tMq8.m)mPBl]`*2(oAStL'`+n7R4*n\#;'8A7M^-:'oJ;(a;27iPe4W`H(ZHpOP<:eMTgDpSP%%YVH:_h'6SC@jrS)!(5Lb/aN!j%$M-b+mU#TsJ%lEP3i8J/iKNOn[aX9Lf,.n1iD'OH2IYBdUB'HInGgN=MMd,J/;5134NRCuB9F7o65'E/TQJ;7%T2#"@\nm6ugu^jUQDkEDU!m9bZD`Gh(E43rq[^fV9%E~> +endstream +endobj +48 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 47 0 R +>> +endobj +49 0 obj +<< /Length 2352 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!#]968iG&AII3n.oYG+s[H`OdkC^Bf""-R],JEmK&In#n7s`+*RSN]+?9/_3_-8%\8P'6.,hH+*PEt!5;_kIU4Y@/R3&rJ+"/86QF;DIJBFSrSD]XK9ohHX2XOc\')bV4)VSC5$Dh?cI'dcJ$)dUE_:_7=j+b[lfsNtV7W([9ofrnQ$`>TqV,2fLFJs0MR0kZ7E"er3J20e<@j'JN;(R@8qO5!3_L*JDVCpsQ-+7qnm.O%[(ODmWM1E8jBasIXNK:Yefr)j,KhR%BVdJ"h`k`K=-7-\p>d'@#o*Tec5o3+*7cXEa,):(2hQk7-hM5bc&e.[]1Q*^MV5^bSo5N1*`D!(KV#XAs.AMccK4Qk-ujV'km3LA3Zl+4h-9af]>jDq<8:LP/<69AeRiMaSHJ7NpBG#uhmXINCJH6MWra6Lm^#[ieTI0&[D52?nJ0sMA_@3sbb-tHI$m]Z%c:9;CiJ?Z1"3>if@YlgSNNb@TXk.:DS4&KZoq9BoYo$]\$DVk;YDb]gMY/cLh*EXp5a?(/,n5I*qLCmKR\f;3S7!l/`dR%emjXGG0I5b[>$c+nqd$$-4j+NEu$.BX)Q3FZDJ*pn%X*6p3SB)8j+7=t-T?;SJ?B5Z`qjUA%rgQGI6p0pVDqi*\&lY4XH,QUM:q$=`aJZ6AXffqsYgMdcbG3cO3ok7m^YD`=:QJ%Sq`1.?dW+JE5?-(%fRZCC\M.\'U>WDi3glES@cjmlWPEX^"RB`sH=GBp.!;CFnNWjH7uO:rJ?iVG07)+?d8q\,d=C.i#8u0QWKtJ*W9j6e8AMK%3%Y6?HKJKG<(l2,EYrgRS+B4BUYgQ,Ff-4"K;WMEhd!$OYq1*-_U##'*6fF+/D%/.q;H82AE8LjWfk3GPk=K^r1),`R:$Tri9=+e?!lTXiCXiC\214KVOX^u`Jb=4eAdPiK[>8sD5MD4K3Vk+]Bl2u$S(\CE$(iVu)GXL8$-j7]Hn;^L_DEZ,A_?+?K\F@\6YfO&gm=NCI6l%0NS:GH:8^l'=d^44.ql!?0aGD_Ssh^C8Ga.a$_'3+^o2]Kt%d#Ss'_3Fl&?gd'ab`D!PqTSHNcFid(WAoSpb$B>+t^sVVm-4h50%Q9e.&#o"VN2<^t&mYnRF0L*jgX$_fuh_Itt#WA?\H?-j&'3$*l$_j_1Bk]/HCVGHb-03"l6^pho4o<2+AV'L.rN?LgVdWmNu/u[&;4l?>5X\tQunb>fdL%RGSS.V-gn$O(P*h"eTOS$nG`!J1u)n>"D,NY9Zo!>5+*1.BZkm8Sd00iToG(m*soWnE99#FSAe62g;2RM3oU[\OCce$<\Co'BFG+AZ-oG#RPjEWQmC;`Kk#>fX-5a5RT3(,=/EVo:iWqGCM()FpsA:(7?A8-?lZj)LCfWVPPM]BtD8:=P;!19hnU*5ibV5A/aAn^MC@:[U'kM<:1P199/F4ZKTMTJ>Z]r'>3sqYU`dGCU&~> +endstream +endobj +50 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 49 0 R +>> +endobj +51 0 obj +<< /Length 2431 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!#]>Aqt]'RnB3n/JJt#1*(Y&kJjN>nBP4)BknfKr3-@1%chS[bKs%n+$@81Nl\^Wtb[kNmGq[Ic!/Lf;ll[e`*2)27E#KpI/I&dr8S/rjA<^o?oLQDb+Bqo,4TFjqDAQG97R3RA4SCf69]UGg(MX5`^US3HIk-<@]28pNeHLEaNA1X?gur<4PNcTmf6Xb>i+gHbP@(G3148fS#7gaUkh'S!d3C7gWc+%p3Mum]g;eX3(&Tjj=PaU(hkj(K%aa*n<`'M]L.:)0jVAT,Jg6'%ZB1jopfUSEZ%!'i!sflG@K50g>PR/?%_s8V0YFU'LQA#'7UsoHZX/i`,S=(RD@R4iQ7\7@T[1\"9ZQ^X$a/0T[Fn()lV0&J6K@9C(6(A0KUZ]76;h$)0,9m`gruZae.ntVkP(*JsSol!$[+H^&_'P6IBKpdZK0CI:3?]nQT@6q!!J09-CcaPd0(b`n/\-Y/L5TGmbei;S^'$]X9o;YN0nXdXiS2mPO@c5-^K\7?g9\kDrur^7NZEY3D-8Q0Y(Mq4l/lM8U8D[$o3r2\I/\Z',5:d>cdh;X&.cTIC.u'5f9)J8RHM\B.1!,heg4D4(BujM1A*Vg@F4!RQ4l;=Z6ahctCL9U1.c"']W\Stf=ce%@S:QIB?]"YMF,(]kON5_tY;!0(Y8EJ<=YXWo:3,*NU^*.IiJ7uIgQ7?2dG-bMSOA@U]@_'Nue2Z4IHEW`("/0&<b<+-ql05p6dI2,+]jh!fmK2.:R]Y5?qH#?X9c4QlCl)jh6f3*=&ND$1!DcFfmJ)1tZfY%^YiX]Vu$M0$L)CdH>j=,92'K^Re7"g1AL>/kY#@DD7I+]Q5@I%0cJI%qQ=A-[3Bb^\;Ecq`MA3Epd/C'=c@Z&+"0FQ"JC5SeMQY[9_0j.aMD"S!2pDOsD/?=AO%Nm!Jc$l"cpo'cRY8L2JsAR&#f5Sa>R.Vshmbr_bmd3;iJC1'D1=u,,9H:Vh`ja1Q3lTSKe>%a`2GU\GGN?2&&d#1=a'_AkN8/>q=DO(mp.<2um+?9Y\tnuT%Ad=ug0XL['be6F#UXXMnaeRV++%*8"7J:@9D-EY\P'oiIbhNh_5T$Qa6Nr]CdDIu\4%P!%4CJ4H?8KbiC]-m-63itP58X]JXOAPef0IpXEl@[U*RcombY$a)lR`);'?r%=C'N^^1@FII^eKG[?nr4JM'j1UJg=&R0PT+r3/R0LBObipbo)-p6Csu(s?U)Wrl[.WWDeTcsame1Dj/0Pr+C,aE5)/c8NX)QfuOqW1%l-o+=8tc*%GuU-:Y/CQT!M-e>3sq%q#pA`j~> +endstream +endobj +52 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 51 0 R +/Annots 53 0 R +>> +endobj +53 0 obj +[ +54 0 R +] +endobj +54 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 175.668 634.4 262.068 622.4 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://peak.telecommunity.com/DevCenter/EasyInstall) +/S /URI >> +/H /I +>> +endobj +55 0 obj +<< /Length 2208 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gau0D>Ar7S'Roe[&GG*o8/P3J"^QX89tCOEQ`W_3@+bW6dC'rroI)2ls8@sqetF;@ahn\.(29"_Y1d'*::WG&ZcQ1mgsqnB?XHH-PD*hp])F1lb;#QKJ%n34CAC:N@bFCtYr0U3hp=HHjg;I)@4.4RTX[*%*3J)+tQs9t?I3)bP4u)jZQ_U/np2I/Aj[A5X/3DF9:*;b=u#[l%C[S2W@iS_F?U[-)%al`/mLW"h!mY0c.uo-?gmmY'pBr!-GE;L[:)XUnnR%p\QA-Ln)&>NN=)4,+oF`qYUEIP`C2Y*MQt_m-h:?Pla,9%*sr9GMZ1;;WQuO-!rC+]4q%g`Xs8Q5iYVG#;dI02s2uL!1)2]>sf<)j#]hD#Ttq$H&4(eV`I@>S__A/IIc=M;%X8?+[mlLtaDjk%Z2O`)%Qk'6UiM3$%N:i&Z*]L3"r=fR((U`M1mW+XPIaCaD*"+&=Z#k%>^C=oO!B[E&jT]]Mp/6CliP,6T$Do6;P,@g/(_=Zl4V:ZR=JMs);$'Pd\6NhM6bAPX:5*a?7Z87dc!35$!t1I,%1d`"6HYt8*&G&UU0.HU'(@qQs+%[uE`,m>p!QAXqnjMH-`;uNGa!@hLkR7]kn"<]V8;Z5:U\QmkLL&ld0'`=qb#b<.j4=CH/(@2^294ZRP]4E\^)tBoN$Jggn%F7e0.rZ_ZfQ-Bi]'5jF!?aDOLg.a!Q908EF+omC;=LGN'IumR7qhY\T=n'_l_`I=+$!K*QqMo@C\$16Tq1_Fft+R=ltEZ3F="H`5OhN"a5Juk`@E4;?Q$ZiK.[?Y_#t,%(FYlqqP,b.P96pu2Y<0MFagD-gB!3,W)Ts"%;#=n/nTDa4ZJ`I)J[F7+[:8ckh!JVS]!l-Y!KEU:kqZV*//t%_5-^E&8kqPBE?Nbjk)mMm]-uCjJiemFII\s8jP^ZZD%n#jj*4T4ZYU8_,DLT4sqXrfnp?UT#4U0:4R.cVb*M0,p/hbkSX>Lg%RMW5bWc%1_6h;uPj6ukuqDJ:s'u.pe7N38j'#]l8$u#pio`fXUQ+8J*4sHC(Zlaf^(ZK27qDd*OS-JF<6h>/WkP8$$>j#HSI0AT>8G;FolMZM,Nh$SP(_eLs8FM'@jrImgs7YPY6&SFT:rJk7S.r;R*@%4b!.o/5t'<;aFuN^nL(3JtDO"(e+fQ(Cm*0S&XA^S+Vnhl[h$D+Y]AEM$-B?pgj)cRCK?[\Mpuhgf/.AU,aY0*9b+%hRY_D7@nRh-B1b?`&gd\RNqGT_HXQRiJ%r]Ao\.o^dXcWb\(l=*EX_LUr_s#1&oEL'io(?=#jD)Qel:S0IVq[sROMROBl,Mg@8%2s%P)k8PHgM^6_Aa2[Y@&/M5H,(/l4"mQC#)["U$C*W2FCs4)f\>']rIbkG-Uts1Z2l*2NOjYp]p.ito6Uu`-]"bdAEt&7efj>nRObgR9Ml*(M7KB+F,602CTiPZSL,Y?Nd.]*OjGl#M$.iQdhN?Ee1]=V&eqc-*CJFlbU:XAHOqp*?E#N-qeNekbq+0&E>XMb"5,@lu9#N=g4kG32S6AYH\Ik=%9U;^d97B_!I*8ABath5G>;['m]st5.?FTn`(2DSHZ/2HX.4`jq6"F!;BGQm"*3FM3j`?-Oeo:WiFh?7C~> +endstream +endobj +56 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 55 0 R +>> +endobj +57 0 obj +<< /Length 2233 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!#]gMYb*&:O:SkcH;&E(,YKW$!%P9eY71JShlFCPm_KBj4kGEcjsBIf5;p6V2CJ@NQ@Oj&4G:LM(5EbV1]9Mc\*@[fpo_nF+UGFG]>Y^(7uhW;_99V+64-AD4RsHs"$`/q/.!*6R3?j_!K&@P7RTV\'Z&pb$cVHfrl":@%ROkD[rK:5\q$*``N2WP'p+Q4MOIkQOVY#e&V-GtI1RLP1_/lWN0`RUHc=XJ#m89edf4*@S.J`/HaNof5]Ad+T&6mZ94U:84DX(TaRLXf#Y7?hUY=SF`1I"aoslR42[Zmq.hC/pKJ[Wsm%>)bo/2Gt:u1;,8LKlF[c@XEFJR4?X$((_&^`17"krI(rgDUW@.)ch0N3ZPB+X'"?5gaNY`oed$F?&H%g33KVlUY6A)$+XbmFl4pnJiT&/R:<(u"8@RQmr_NeZ`A9[,i^G%OSuK6\LE_'e5[J'p[_ei.%NOkqZqQdhk0o:5k\LC;eDI;-$c2IPs0pZB9MbhTF#b7F,]\G%ZJli#kf.s3$NkZ/mf&WkU`:NZZK^InM[u/.,M!u71Ao/Z^1TnWe`ZR/ZhYK5fNn-3%bN9(U30\J1$A@D&rM?J%ot=m/\[+]:o?m:Y+M;H0W'<\8O5?K@AE/n.2^S6DV:^Y_-["@H#4u@3KZV&r/^qj%]]#)A?1Wh=@%AWY>9IE/]'9-Y3/cVSC6\$\=r7#[X?aQ$k=DMpN9[Ol>Zpd^k)(r(cQKn%2&ApjWOtf5?+qK%AmuXd3oZNN_a)j&mS8m\\6l`-4$q8';eW55XIM5%k/*H'\7&i-Z+jJeDe4*Ljk:%P8kV?h::'>`2_04_,fe=AJMA,O)9-=l-CNsKD)f>f*5(^\`\$$_dg;$"sC77lmgi[_=,;8&-6jmQm\M'gLPWU!_DrNR13+P%hSuk[mhR<=d]uO*pMKo%b9,;I#hb?Q.p#gJLh?V-KBka&:ss[EQ$Es$h>qT8W=6W1jb0;.UY:&uForMT`R("elFUs4EVGTBRn/7H51;$n0;fg-3S%*ERiJlAcRp1^-(?4"uGQ="Q$Z?W(56tl(t,b6Xd2K"qeClDm8kYSt4Aj^/pd3'a#2pW]!-*6cJ_Erj0t?S]V(#`jh_;RX7S.6noQMmF?27$V2sLK#(c/5TqqpZuc_i"aLIeaKKN?fuA`fYc"bW.8Ps9$h[mgieH+';+C^K()dG/:)iGB4O3fpiC[Vk[Q:ilIjqa:!"8]RO]u"odB/O&C8T0.s#`R664tWA@E9[l4U`Z8N=R^tqp`-eW\+PIs)QCs^*MYkED]dMrjYgp_5nE.Fq20t]GrE1H9*u-pF08(6L#-L._K=\At6a1C>!%`I1IIog@m8srQYYY:S0<',oIpaR,*TmH\7G33#;;dB=aIm!gc2G28;-[X9^-QcD#rGN/.tDAQFg)V0tul&pX"0eH,!ie3iCD4+-lg&HUWL]!sjl[roVd*Y>foU+o+h,Opb/W<\?'KZO@TY0o)KEFYFYRO^tJ_psT]ue_$Z+_1jN_.-S^Y6;JHO5rPi`T*.R)U/Hq)-q$JA\1)Ohi%[\jWiS$hQq@.Kg'+iCFHU\"%o4*q$@J];sa(<7uA6%GWtFR-TLEW_J=^nbf%\Gl9AQ'R)#o:L9r1ZE=FJK#F*KC_l^#3Ek`RB=IQI98D?<>i_=Y~> +endstream +endobj +58 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 57 0 R +>> +endobj +59 0 obj +<< /Length 2263 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gau0E>Ar7S'RnB3&?lDOBZc0rW(W6V8@,^>0Wa*m\%3u$S$nU(U8pqmo[iQqIOJ,eZ=54#Rh?7XL#9H;c/S)V`S&j!C@0sZ]t"]R4/$o'CDg>kDBQlG$Z%WHe1ib4=YAq;R,$fim/>%tq]+BKjeMSV;_Xp%F=3o[c7ZVpC]A65gL!S+r)?8B!mW1k+:3F93t,%SWi@';J!!.S5Qg1(o#P33L);3)@9F1"q]@pLW<-e)W7/p]d,Y0!^SC'2\bb&Q2iDRDPjVU6R0UH<"Gr8B'1.FDGd5UKm-#e*s.1<`P8oJkLMJOHtKQ"T3!?@W^;h=Cg+3"3e7Anh-@p_ei;>(MSL"-L$e)lSnd/Q`k_LeZXXcWE#h9rYDMTqP,2LA$U3$S@;;5MV4V-DJTsVKh]7Q`,lOg=f9i#1$"))m:XY#S/aD]%#D$Z,u:!gFi#=qU/@*p4ER6Gb(Cp)B12jMnc[1!A7Q,!7+*^>r>^LT55cp9DY2n`NX6tdAh/!caH3i#"IkG_JO@1`dIo(HSNYTI#7*edfa'Y=i%@)B%FBf).+qY9NB1S\!@YG'28)Tf;Rgdkb#h00mRfD*(4%jAs9-^L?h5e>&^t>DP]7ILm_)o3IB+(?6Aa=`+BqaPGM.=re^5BXb/H@bDr.o-NG(?SU:FqphE18A2MoMD\`?V7-2J1RtP=]N%D+&S3@9Lo_X5f8GN1L=S,Dl(-UkhJiJrSooh\~> +endstream +endobj +60 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 59 0 R +>> +endobj +61 0 obj +<< /Length 1681 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gau0DD/\/e&H;*)+^cDO!"DS!+sgfgf9b#VK)t?tJ$XcZG%b*6/UncP3?`=qfBe>]pfEo9EotWR'VD%O+WN,l:@7aQrC6s8L3u!ajuji(Fmn;Y+6M<8fB=Ge.DHu[G>]hf?[=.2H6ca>X'-1V5ZLdLHo8N"8e>VJ"D*7X8g$V%\-TAdKJ"R`3F9Vb[Q[f&Q4iHX<&g-;bk68OhhOuI-XAag?iKlKWDUr$nW#".$'Qs&6R6@"&*='")\n$ab,V)L#t_2)W@TtBKZ+LJ@\-SYT'OP=iFm"`_FPqe.fA*64!P*Dd1G&\L<=Os=;K#%Vt+N8_3N\gJM$7:?)"Gb699)OS*MJ/[G4+So7[%WC(_T+rE)hMLDqMKW1F.dTYI#UE1^.-fAuH_^mMaA_[6a8fjKNdJ*@'iCrlHigZK7H?/eJcS6gYiMb);8gPCE1+_6CN*hbg[EOcV@EmRc`=,m2?"6R>b1OZAb3;8AHOlRRlY%/h6D'0lRb[EW=JEN-Uua7J[h"Da,X3D!_0i6X%h$U[NCY@nmU3AdA@d)b>Z+$J@odO)T/is]bQKZ(Z*',SGN.8G+gsEA$sEPVrLN;]N23;OKm/T!0Mpl"6TRZpBAY:ii\IbC'\dn:Nb5T]!JCjbMZ"b7?(GRgdQbGAV4*.W_h'8JA2c7f&bN[^8R<7.:a,qgd%HoJGZR;QoBI&k5J0a-h+qF3e6[_h'%bYl`qQ,75T`T/sr=lV4jS0 +endstream +endobj +62 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 61 0 R +>> +endobj +63 0 obj +<< /Length 2132 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!#]gQ(#H&:O:S#j=VqFMj!c]alQTD)T2("k(>&+6oSn-/)BD9.D6ij*lEUqbSKM9!\\Y+D]@K3AZFHB4gAZS(4"QlE:p\%c[NCQ(jRh9@m@CO;pgR)m128*WlGHt+#k@-Ct3IqpTtnD.AXi;1u9^G6bI9^rW4f^e@9N\C)`2CIPZof(3V(Tpn:?,m+n0#qVI3PCBhCd1,`R"a(IV-rj)E#fl&d5/!g93Am-:iBNDEqYNf[^QPpMZ47?q>MjbbQ->kX&*ZoBHVGL!'/l/h9i#pA*S:>nuWe,+\gL[EYbtUiMmF]\ipIOis$q$Q2r,pN"7An-"#k'5'C/$0Linpdd45fo/U?,)/&4/'k_g3b;_XM`?d]iY_sLU*V9Vj8hQ13*(O-Kj65*ea(Li1]$4@idN!($B&iLM\)2d#31E7P^oeBM:HQ?1\t7Pm]7Qi,$%\.(ml-bLHs!7sc1Zk?G)IX%=H=Ubd,0il!*'cY3VL@7QgtECqTNZF1LAjMTN+W82!>n_hMD@F`.,^#MOE$^cUD/QiAiNOHAtE@QJ*nCOJU&&(qZ\c;K7[8[,u%`@Iq![%XQ6ilS^4@XEL^s0#K4A]:\+/)+PjP$Cr4ucNNVMC(!@'M^),W[L'8=s,e_s5'7$Cs6EN3qp`#5D:'NC&>f+/U(f[1ntf?sc>-b(C)"r>b2'#dlG$1,KB#R'KMXoY'CLO1hF3Ch$Ya0LhUVPcOb9%("5%XiZO\k4JK5[./b"F*2pO\kC@R%1;\&]Ybuq,i!@-6:@;BZ\[h7W5mkQt/u6LX;fZls/#_6H3:>a-W,jXlVG^(u]n.QLTf_;H?F<`:OY((.6`@C-Y2\FQ@:BKU8WF3S&ZSB7aj4-61[8eU](D,MqUWE\c]S5?LMfN!Q=UDK?BF?P%mL8?jXorN#!3e75^r2;Y2VoAC%U2'EDtOLX/h`!r%"sJI"-`'@A@m*eZL))*bASVYm1=m48aVe7Ad9.QOoUFmeKIT&tP;3=B>!7Eql(:b6?!]U@OoP]ucQ3S5RA/.oU7b8X^.e.EA0E2GN?X\Lq^_S3QgleaQ01pp7dE)9gb\XVH.[J!hmGUG:4%LHYNSJC9`)O;+#!,[$`Q;KMcDD,+0SYb6Wja^klK2(RR#mQg/lFQL\?*kYTauCt#!lolA`65jb!8Vm0-(?E*aM8kFV@1\9?"SO2GAP6blrOFH`kX:g_F=&INNQU?oV"0%'+8t01O#RW&im-K25(#k#s%*H#[gp&Br!8LZJqjj?8=iL:aP%!s$[OPE!KJ@/JdIJ&FXh#Eqr0VGlU3'!'aI,K2bN5K6[3]Bf_%C4FeQ2`m7eQ6rsQ\@pZs6TaKblKTc,8"7C%A;)Vk].ER&m=!1lC.i#KUE<8K3([52Mho,!f02IF2m"R2#V"F&"..CJNXR?u@5/dl>Sih,c9Wp(o>&bl5:+^>?9>aA9JO%EEKY=,c-:u+"a=:`u71K(2ZQ8hQRV8`'f.hYW.,Ne@he)\ +endstream +endobj +64 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 63 0 R +/Annots 65 0 R +>> +endobj +65 0 obj +[ +66 0 R +] +endobj +66 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 336.946 202.344 324.946 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../../jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +67 0 obj +<< /Length 1757 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm<>Ar7S'Roe[d+]N_AOYn,%XVm$VEc6S0(msS%(&^_C1YhgA>ek"s1Y7tFXU0`$4K35;**=t>^4=KMqP_]m-bhb2XpTm`dQinE?)mR0WLaP6pX3bcU_t7qgJ18,PObL-@Y%O,b[N@lMh1'nN=/JVE`qPpDaMA(6K_.)ID9?O4Kp=l^?Y_jhhW%-k0tK-j@0<@d!\Ehi$j%fZ5$#7I8V7F)HN31L\L@D-.N>7?BY4BedW1CmZ`pF"+"nVUSq#E$EZ5(C>\7S\GRPWlGAZa=Nh(IglC$Q5Kk3e-:IW^G'PoQ1#s_Jd/)?g&bmUYgh%OK^)FWUXp2)R[PE2.V$M*]W@1NQKLg9*CjVV-k/5`sMZQYJ"*eP5cHU4E"`gg8i+*bF_FAW(++@4Z5o(L)0^k83cT;A.#.a)nGn1pS5Ir^an$JqU!uZY#i#kNM_]/#m$c'43'^=BEkRlM$!MBk4)"$Bffp]G'M@:rBX+c$//RA(hk`;R/CocQD#X-,U(O7,Q.h4qu)"V*saOX>m;cR7CtWOBfC(_jC4UfL=E(HGK)dM\/:?.gacG&?M#[H9J!oN.)-=A8,f!^2Mbi6)P#:U!9"+Z4\WS`Z9cTlfLN=+^cE9[W;!KhJQ]ASQ+-%)m84@:@!A%3P4krNk2\\0n;"YgEiWQf(8P>=kUd+-/&\XN"SIM6j%4Y*a63jaZrNNb.CghX='qUN)O2UFY3AQdc;en86WqUFl?ejTUPP?4<'s2RhEXj>@q^ZqM_!1!GYa$(YC^pu](N,C!A''H%gkbs"&l5VhHV(`l4Z!j(6BK$tL8gs"cFkmUj,_RO"oB(6p^gU-,('*3gF2^pG&p:?62Zr!Z\p&gf'h3Y#:DJ+:,\_aer7#%R%14OO(8('.TI2$oKQ"T!M"!CpdL^6`c73c*rabTE[OKY#`bbXi,!)fmn+0u[T^[$K''NcIPmnGWO75P!,~> +endstream +endobj +68 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 67 0 R +>> +endobj +69 0 obj +<< /Length 2330 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gau0E=`<(R&:XAW&H#E29it7:-*]]WP/%Qj-;I`_%j2>jH,]H^f!3W]:&]Qr3gRZhC@+2!7N>:Rb^>rV3/@EHEH8cQq0GB@48AR47/4^pU*tWNN9fdG@3Ptg?7ikFPM],ANUQ`i@^46BT>WRF"9!]XGLfC=Io4M0HYbB8#&>mnc\T2Mo,@=QH@Z5mmZ='Qd=CN?C"Z0/^r_Um&#p@lFD2M;DQQNdmBdUn7F$5lXrFHo=fI-GK*CPd7#T0#p8Lbo<`4ji/M=5e+\#kPm_^"5\&/Z-OLM2KEM%pN@IqTF)uc,;;X@o6`/rI#Xp_o2r+\2>=d,d7Q&+4^4,3F8HfF[;5G$PMiYWE?X=UM^R33jVt:Z@3=W&iBW[Z0ihp/=k']N4`TH&a^Ae*/_tiD'"p.5."*BOkqtRn[0Gf=[NX[M:4l7h#B-IsjH;c\>b@UXXD(RW2CsRJM$^-0F1sIceX4;(=8g%'_W)copeg:c-[_$1J-fN-/^r4okfX8LW^eI%9gk!i%tk4S/pj*N)S*0("_b,A2u@OMs\=Wn#tJZH.XMP4-n\a2jqpZ570ZUrhin>..^B)C626hFKRPGX6:=N!X`!#O53W,=+ab=CJ!l-3'9r:/))\O>E;cS1mH5=\%b11aAI0YKkWU.nm/bJ8&'k3M^gg!RGif;3Ln[5liXmJa-3T7<4,?/J/]K5r\:\"kbT7oAA!Fp,T/V0UETmu_&MEM&dR7Np63-+%I`qj&8DpM>bIhTN@i"umKRD@f:cC`R&161m[0u.MYWlE4Ns7tf[qHIBT+FG-;"iLEC/mRW24UN):08JAdh#]?K\>pV@d#SKrH'0_)IkdkOFM#54[/ujL_:>SJZWIafgi0"3f*Zb7G"odriJ$/b;@OVB%Bt!Y%=4qVH(5tMg+DQC0'(Mh#K4+2*VN;&.]fp2PJj)4?#$,Kco6=eJC;C1ePSNRDeZJuj`*&S&oZiZ[tM!bJ<-AQWN!p&%#4]rqFQa%+n65uacJ:cW1KNtbhUGU/IM1_7X">EE?9SrWNHY5=18fdC#/(TMX(KI&.>MFf'>ee^oEeEs_M%D9iBGp)7=_1=raPe(/kYliAlKu33O=o-W5$22loCK+k:cIiVP)sX!XfmA-+0.&D1((rE*':O6rSDCho%*4%B!%P#U]d\Lo(SE61O1YcE=XDI@rhZ6*Xq(F>!F;<:(#3_qCXDHe$eGPe#I8^kZc4TAe2jJ9:YeZf(8IErd'a6_:IF?@s+Va4Y[D7NJQ-+'if)c9R'[tSaQ_Fk.AqQH]@P?&\P4!g%NMhUtKK'Fa%cu+[n!Ea")60pMA!WGWDC%p/nn+Xki3P*>=jJSZD<&D(\WQRUaefO7qX1?`\"`mb=bU\%Y3>.I?]Zqum7m=h<+34$8EGAG`\$0AaHkF*5]scp\D%5_n4``gHR,_otX?Vr\h/b/-eeMJ8BaZ;U\Nk-TN#!GYee%W,"1-9VW.Ra+3BMXWt^dRjgZFNl/FDIeh=J]NEE)h!QimG#MqOfN(Jea"NJX6AWp?<,UDaX\9Q'+*R,)u`'ZncL1HIn1;RIkB1ZD\9C.3Eo1RACYjVLJ6]rR(*<.SqAcWK`%iSk;I?/\[D8+WeDV_4N=9fDIc)FhL^+[]F0fXG\]\Gd"7dI*ieS`E^@^pJVF_0,/N?GLb'?79?D)Z,;;dTDF1Xr)t$YTBRXo~> +endstream +endobj +70 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 69 0 R +/Annots 71 0 R +>> +endobj +71 0 obj +[ +72 0 R +73 0 R +74 0 R +] +endobj +72 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 433.08 292.86 455.748 280.86 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/viewcvs.cgi/lucene/pylucene/trunk/test) +/S /URI >> +/H /I +>> +endobj +73 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 479.076 292.86 517.74 280.86 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../../documentation/readme.html) +/S /URI >> +/H /I +>> +endobj +74 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 297.0 196.926 409.344 184.926 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../../jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +75 0 obj +<< /Length 2826 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GatU6>EdgM'n5%I_7/,S]*L(JXa+sB3jg>Qgo`!G9\o5(,UCb`#mCH5+H!RCAUG7ODclJ\qILYj19B<2(3plN)*FeU;4s3e.Kr'[MS,"HZdrU>l'+'&p0]aGk,f=tI**o)sDX8dF)k9ca5(m2n9cDkpap$-t]-rsunH0m`iVq#+1c1kH/Gf.SBXCP$-^KX\7P#o:s87[*Mb9GgfJQ`B'G<[+u:=S=q0-A6\)_%fWklZ9@jL+StBD"RFl1I(bcGS(3/!eXur5j6lUX%p&g+Irl7.>]6"+lnrn4j/#=I*FFpS'r6`h:>@8iZ"q?[M]P[h?0CZU8YQ:e%h/a_fM=9ON[0+^O5rX1MCP`-?+fk:31rE&$$_XJ&+!2QdfRQ(Y8Udj>K.b]R;k[[hf_PCcC5QDmsl7V6QIP=lP5??A+Zo.Iu;kSEq++-6tE:o0W"A$$dL*@!-@lD:=g4pp=[K2]jT)S3+&GeM_/+h*aEh$,`$G5SBUIG(0ZZ$%.VAhZ!00q>&L/e_c\83/rc-$=3mn;#fk;"VdLqKRa,8dNuS8`lrZa'=H9-e1>1mq4ZIr$1h<.[43ZqmBThe#$69m5M!]+cY/A\n]c_?N8kMEpc$073Phqhl7H_VDLj]^5pdOcfU=4>Y>6oZ*Cr9*9F&?.;1D](,k^d1CoL9+V41K1R71=c$"e47of!S(AiZu@?4T4AW7n8;>8ADYtm5/egbp>,B<=$FaU:*$)Ldu"11W_,L\@`;k5.qclZihM.GOUA\n7R!_iuR2gM%6ZtPX%p@:bWb$;an,+;]<9$`]c:_91egPe`YQ9rT&5KI-=u)6k0m4BaiO(Oj>k,0?$tI('$=H:u<[X<^iu4p.?Pc/!148H><1>5]5>:Zm3_H1n.'"g6'q56&#_EZ6Kc'"-su883n@,<"*e&QK^T/.Fof;f?89bWH.B]K+;]3pkIYGL1om[(+W@6hin5ApNN=@bu0eAQRL->1C;+m?]HbWfR58qH%b&-#[+DrifBCaJ.!JHJD5bMdBLOVfoH'Y"<2>Ra\YTdCC4YTk((f_,@\n?Lkp=h`nguYo=+jS1eAo=$`9-5HErrM/\,VmQ*tfM?ZIG>M$j?c(u"ED[cn"S!_`5.%mqpJhLoRJFM;m'D&D5+D[EM;"N3/$.u4I-ouuXEWjYmG*`d\ZD4C"WE>,)R.hD"!R'E'',n3AC`Z`5'3"=e+>h0UW`Y2M$b!sVP6L--0rW:mVF-Y9`Q'*Rq)i4MuBT1mOh,sae^N)/WJaQ`(,_T'"Ie@*u>]q<'ekjXNRcD2)CUT(iB49b5Y_1NAJ+u>tWgT6Zbc)i%8uJVSS4%:n;q+H1`-VT=C=]:SR;=:[0FC^D>amLF5;nI9G+pR>@OHhZ"aXUmL(i?]`+bP9hA-,clkI#OQYnVhW>q$0oCH2P^S])XOn:AYa2G#qUG\F)f4?^YFe9ms5sE8-qb'tRS(J:/G-"jXDS06.Yh;lm@a&:A>[74A$USlQLsgb[E4(t'\7NEU<.\Ea^T+bd\reu084)uE'[]e1gcbG/@-8[LVuum;'6'%%3kr+[54s98'7*LA@Wp_lotbm%cFEHqq90BWd1s"K_s`r3cZSDIKFCT(,;KqGmg)Lqd%u`)*042:Kq9,+?7)bu9d7e5UA_Tq*D7R&n94sk&GNH]gkbmI'U9ujG@iETR-j!I,g?W_K#i[n\`hQ8G:92?:]6h-B6b>5CT$&&kk1CB$aW'7r5=dCKL#6Obf-:$)#PsX_$_fbhFqo\OsY))F?cbfF[&5P9cQZ@*W%Miq4lERbMqj?d.=k#?:__ZU6f"la.kF9a`fl\##Xi6r='F5$ZLU)QU6O&PRUS2]hmac!V)rKXG5#\`:?A,;SpmW6PB,Sr/)Dm/[)]A=Rnb`rVcq&ZS\Z5)h8O3Sn1OaC9K099s1`4nh5W9:#=[I@.m<-ngMn496K\:%@1a4/3C*8o/.`Xd?M0"*B^bDTX4X)^D/O)M>[]D^S8(h;UWBW,II?(-1;(S(N5LcS_eSl>9#.eQ=/TNi11;."M4CKf#B87c^V2?n=;r`'N?_WLe_n:f'H<+J`.*H="95%)FqO5lCIA-qQ:E0[P!\+OLdl@_kB+Jb7)#BfURl;1H,`M`d0EgV7J^BrB@J<7iQagB^E:C7s3Y`0b*)GCLKc^.!-*%Fn^XI>O7EVf^;MKM`^CkoEr;%a`lQ_HlCkhkTAZO`eGD~> +endstream +endobj +76 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 75 0 R +>> +endobj +77 0 obj +<< /Length 1761 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +GauHL?#Qc5&:O;V5jEu=l$E;lQMM0UJ`$ta(P=C!3<&ZCjhj0<_+CZ*MNY3Zbg)fJ3HOPjMp_!Od>sE$7.C(jjI=(E*63WF2OE#OgGB60N*I3\UVB9*(SgekAQTWf/Y5FDAS/f?A^mU4+dQa[MiD^@=]3D?`e1P+!@*%MC(2`CCFgI3Z1qr`d]"sre^fpL?/>];BGDj[;5iSK^l;S5]p9U7Y-SkK%?a,\Na_`2@55qQ5,3F3WH^"]PNDG\E>Y?\Fm\mB\Q4'T)%&*ZFrQR0.-b'%]0))%#ecK]4*Elq)6qpZZH$SiQHH'eCrI3t>fXf+FSO61=:+Vq'RB28#bl3Bb@&ZSAeN.mp"8,R)rTh#mCHH]IF;%&`,GDpjYmq_/@SO3iOL_4AXg'AQ_F5K1,lFC'n\$`:a0i%lWM[QbD5%Y`pm`k\]<^).A#>hc5AGicCQK!fCSmr-4eEbupTW#Qq&fq]R#FV!m[D^:'/4RO,7j/sG]NH$!WWq_TL:n0ro!Boi@2S"'c*qqqgpG)i[*Q9%#36gG?fPD5Cs+?kQ!T>eKlV?jXd(LA"olHU8>EP8W-mWfiO.QG!;H5@C1/E2MH-DA1lL4Zm5_=]**k@sX3WQm0;c_hFC)5fklDUqjC*9BCX>maUG25*U>p>uOL3X)D7(Ge1Ng`68^#9F_Rc7gM?.SR:Os0[2iZ<+88PE#0C?Me"6Nu/?%7/Qai*^o]=>A/2oUSpo43@pkB@@Fb:a4ct"U*2\IVtl+%6c\F/l+f7`J`]8FPZUmW#Nkp%3Nh87*0^t]n2&4\h#b9M5jqVWT/f`9a.ZClh\c-A]erWcr.]d=1gnSXcpg9Z/eR'WjtHqW;O7#sZ3EL!:7'FfmYn9"?pO"i))SBF<'Gb.QdWd"r6TA#_k:b!![6VV:1oe(O+'@_W8d^(4BS,@+3HqsCo@jdP2&u&4V$:1,/H/Cf7G9[&;Z,V,07YZq'.N(&dMC.oE\X=Cu?De9QVH.jRn@OWF#o+?o;VY+^OU*2$b;VG@'`"YP4U*Z\hlJ^GQft/Mt5:#&'*^bQ(VhFhTbN?'k>h6_Q/$ePCnUa\`ME"eWLp(Xuj07s-GhHrAjQp$Lnr!+`FhD%I"j2iFG5gIqS`:tOj.PqD3J+)9-.+n,]RbA;OHj(M`'[OZDW=]0.:'oY+ZXW!A)(V`uVWqBZk<:292D=:_@]6%&$'1.eKr">c1<-oEp/IIWTL_We.r +endstream +endobj +78 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 77 0 R +/Annots 79 0 R +>> +endobj +79 0 obj +[ +80 0 R +81 0 R +] +endobj +80 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 174.648 575.6 218.64 563.6 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/types.html#wp16432) +/S /URI >> +/H /I +>> +endobj +81 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 355.608 401.52 399.6 389.52 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://java.sun.com/j2se/1.5.0/docs/guide/jni/spec/types.html#wp16432) +/S /URI >> +/H /I +>> +endobj +83 0 obj +<< + /Title (\376\377\0\61\0\40\0\111\0\156\0\163\0\164\0\141\0\154\0\154\0\151\0\156\0\147\0\40\0\112\0\103\0\103) + /Parent 82 0 R + /Next 84 0 R + /A 9 0 R +>> endobj +84 0 obj +<< + /Title (\376\377\0\62\0\40\0\111\0\156\0\166\0\157\0\153\0\151\0\156\0\147\0\40\0\112\0\103\0\103) + /Parent 82 0 R + /Prev 83 0 R + /Next 85 0 R + /A 11 0 R +>> endobj +85 0 obj +<< + /Title (\376\377\0\63\0\40\0\107\0\145\0\156\0\145\0\162\0\141\0\164\0\151\0\156\0\147\0\40\0\103\0\53\0\53\0\40\0\141\0\156\0\144\0\40\0\120\0\171\0\164\0\150\0\157\0\156\0\40\0\167\0\162\0\141\0\160\0\160\0\145\0\162\0\163\0\40\0\167\0\151\0\164\0\150\0\40\0\112\0\103\0\103) + /Parent 82 0 R + /Prev 84 0 R + /Next 86 0 R + /A 13 0 R +>> endobj +86 0 obj +<< + /Title (\376\377\0\64\0\40\0\103\0\154\0\141\0\163\0\163\0\160\0\141\0\164\0\150\0\40\0\143\0\157\0\156\0\163\0\151\0\144\0\145\0\162\0\141\0\164\0\151\0\157\0\156\0\163) + /Parent 82 0 R + /Prev 85 0 R + /Next 87 0 R + /A 15 0 R +>> endobj +87 0 obj +<< + /Title (\376\377\0\65\0\40\0\125\0\163\0\151\0\156\0\147\0\40\0\144\0\151\0\163\0\164\0\165\0\164\0\151\0\154\0\163\0\40\0\166\0\163\0\40\0\163\0\145\0\164\0\165\0\160\0\164\0\157\0\157\0\154\0\163) + /Parent 82 0 R + /Prev 86 0 R + /Next 88 0 R + /A 17 0 R +>> endobj +88 0 obj +<< + /Title (\376\377\0\66\0\40\0\104\0\151\0\163\0\164\0\162\0\151\0\142\0\165\0\164\0\151\0\156\0\147\0\40\0\141\0\156\0\40\0\145\0\147\0\147) + /Parent 82 0 R + /Prev 87 0 R + /Next 89 0 R + /A 19 0 R +>> endobj +89 0 obj +<< + /Title (\376\377\0\67\0\40\0\112\0\103\0\103\0\47\0\163\0\40\0\162\0\165\0\156\0\164\0\151\0\155\0\145\0\40\0\101\0\120\0\111\0\40\0\146\0\165\0\156\0\143\0\164\0\151\0\157\0\156\0\163) + /Parent 82 0 R + /Prev 88 0 R + /Next 90 0 R + /A 21 0 R +>> endobj +90 0 obj +<< + /Title (\376\377\0\70\0\40\0\124\0\171\0\160\0\145\0\40\0\143\0\141\0\163\0\164\0\151\0\156\0\147\0\40\0\141\0\156\0\144\0\40\0\151\0\156\0\163\0\164\0\141\0\156\0\143\0\145\0\40\0\143\0\150\0\145\0\143\0\153\0\163) + /Parent 82 0 R + /Prev 89 0 R + /Next 91 0 R + /A 23 0 R +>> endobj +91 0 obj +<< + /Title (\376\377\0\71\0\40\0\110\0\141\0\156\0\144\0\154\0\151\0\156\0\147\0\40\0\147\0\145\0\156\0\145\0\162\0\151\0\143\0\40\0\143\0\154\0\141\0\163\0\163\0\145\0\163) + /Parent 82 0 R + /Prev 90 0 R + /Next 92 0 R + /A 25 0 R +>> endobj +92 0 obj +<< + /Title (\376\377\0\61\0\60\0\40\0\110\0\141\0\156\0\144\0\154\0\151\0\156\0\147\0\40\0\141\0\162\0\162\0\141\0\171\0\163) + /Parent 82 0 R + /Prev 91 0 R + /Next 93 0 R + /A 27 0 R +>> endobj +93 0 obj +<< + /Title (\376\377\0\61\0\61\0\40\0\105\0\170\0\143\0\145\0\160\0\164\0\151\0\157\0\156\0\40\0\162\0\145\0\160\0\157\0\162\0\164\0\151\0\156\0\147) + /Parent 82 0 R + /Prev 92 0 R + /Next 94 0 R + /A 29 0 R +>> endobj +94 0 obj +<< + /Title (\376\377\0\61\0\62\0\40\0\127\0\162\0\151\0\164\0\151\0\156\0\147\0\40\0\112\0\141\0\166\0\141\0\40\0\143\0\154\0\141\0\163\0\163\0\40\0\145\0\170\0\164\0\145\0\156\0\163\0\151\0\157\0\156\0\163\0\40\0\151\0\156\0\40\0\120\0\171\0\164\0\150\0\157\0\156) + /Parent 82 0 R + /Prev 93 0 R + /Next 95 0 R + /A 31 0 R +>> endobj +95 0 obj +<< + /Title (\376\377\0\61\0\63\0\40\0\105\0\155\0\142\0\145\0\144\0\144\0\151\0\156\0\147\0\40\0\141\0\40\0\120\0\171\0\164\0\150\0\157\0\156\0\40\0\126\0\115\0\40\0\151\0\156\0\40\0\141\0\40\0\112\0\141\0\166\0\141\0\40\0\126\0\115) + /Parent 82 0 R + /Prev 94 0 R + /Next 96 0 R + /A 33 0 R +>> endobj +96 0 obj +<< + /Title (\376\377\0\61\0\64\0\40\0\120\0\171\0\164\0\150\0\157\0\156\0\151\0\143\0\40\0\160\0\162\0\157\0\164\0\157\0\143\0\157\0\154\0\163) + /Parent 82 0 R + /Prev 95 0 R + /A 35 0 R +>> endobj +97 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +98 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F10 +/BaseFont /Courier-Oblique +/Encoding /WinAnsiEncoding >> +endobj +99 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +100 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +101 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +102 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F9 +/BaseFont /Courier +/Encoding /WinAnsiEncoding >> +endobj +103 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 15 +/Kids [6 0 R 37 0 R 46 0 R 48 0 R 50 0 R 52 0 R 56 0 R 58 0 R 60 0 R 62 0 R 64 0 R 68 0 R 70 0 R 76 0 R 78 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 82 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F1 97 0 R /F10 98 0 R /F5 99 0 R /F3 100 0 R /F2 101 0 R /F9 102 0 R /F7 103 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [37 0 R /XYZ 85.0 602.59 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [37 0 R /XYZ 85.0 502.656 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [37 0 R /XYZ 85.0 319.522 null] +>> +endobj +15 0 obj +<< +/S /GoTo +/D [50 0 R /XYZ 85.0 505.0 null] +>> +endobj +17 0 obj +<< +/S /GoTo +/D [50 0 R /XYZ 85.0 238.666 null] +>> +endobj +19 0 obj +<< +/S /GoTo +/D [50 0 R /XYZ 85.0 159.932 null] +>> +endobj +21 0 obj +<< +/S /GoTo +/D [52 0 R /XYZ 85.0 598.2 null] +>> +endobj +23 0 obj +<< +/S /GoTo +/D [56 0 R /XYZ 85.0 188.72 null] +>> +endobj +25 0 obj +<< +/S /GoTo +/D [58 0 R /XYZ 85.0 475.44 null] +>> +endobj +27 0 obj +<< +/S /GoTo +/D [60 0 R /XYZ 85.0 530.82 null] +>> +endobj +29 0 obj +<< +/S /GoTo +/D [64 0 R /XYZ 85.0 540.68 null] +>> +endobj +31 0 obj +<< +/S /GoTo +/D [64 0 R /XYZ 85.0 313.946 null] +>> +endobj +33 0 obj +<< +/S /GoTo +/D [70 0 R /XYZ 85.0 269.86 null] +>> +endobj +35 0 obj +<< +/S /GoTo +/D [76 0 R /XYZ 85.0 217.4 null] +>> +endobj +82 0 obj +<< + /First 83 0 R + /Last 96 0 R +>> endobj +xref +0 104 +0000000000 65535 f +0000043908 00000 n +0000044065 00000 n +0000044157 00000 n +0000000015 00000 n +0000000071 00000 n +0000001165 00000 n +0000001285 00000 n +0000001401 00000 n +0000044307 00000 n +0000001536 00000 n +0000044371 00000 n +0000001673 00000 n +0000044437 00000 n +0000001810 00000 n +0000044503 00000 n +0000001947 00000 n +0000044567 00000 n +0000002084 00000 n +0000044633 00000 n +0000002221 00000 n +0000044699 00000 n +0000002358 00000 n +0000044763 00000 n +0000002494 00000 n +0000044828 00000 n +0000002631 00000 n +0000044893 00000 n +0000002767 00000 n +0000044958 00000 n +0000002904 00000 n +0000045023 00000 n +0000003041 00000 n +0000045089 00000 n +0000003177 00000 n +0000045154 00000 n +0000003314 00000 n +0000006053 00000 n +0000006176 00000 n +0000006238 00000 n +0000006375 00000 n +0000006565 00000 n +0000006784 00000 n +0000006986 00000 n +0000007185 00000 n +0000007388 00000 n +0000009659 00000 n +0000009767 00000 n +0000011706 00000 n +0000011814 00000 n +0000014259 00000 n +0000014367 00000 n +0000016891 00000 n +0000017014 00000 n +0000017041 00000 n +0000017242 00000 n +0000019543 00000 n +0000019651 00000 n +0000021977 00000 n +0000022085 00000 n +0000024441 00000 n +0000024549 00000 n +0000026323 00000 n +0000026431 00000 n +0000028656 00000 n +0000028779 00000 n +0000028806 00000 n +0000028993 00000 n +0000030843 00000 n +0000030951 00000 n +0000033374 00000 n +0000033497 00000 n +0000033538 00000 n +0000033749 00000 n +0000033931 00000 n +0000034119 00000 n +0000037038 00000 n +0000037146 00000 n +0000039000 00000 n +0000039123 00000 n +0000039157 00000 n +0000039375 00000 n +0000045218 00000 n +0000039594 00000 n +0000039768 00000 n +0000039945 00000 n +0000040301 00000 n +0000040550 00000 n +0000040827 00000 n +0000041045 00000 n +0000041309 00000 n +0000041603 00000 n +0000041851 00000 n +0000042051 00000 n +0000042275 00000 n +0000042615 00000 n +0000042923 00000 n +0000043127 00000 n +0000043235 00000 n +0000043350 00000 n +0000043460 00000 n +0000043574 00000 n +0000043691 00000 n +0000043798 00000 n +trailer +<< +/Size 104 +/Root 2 0 R +/Info 4 0 R +>> +startxref +45269 +%%EOF diff --git a/doc/jcc/index.html b/doc/jcc/index.html new file mode 100644 index 0000000..45b81a9 --- /dev/null +++ b/doc/jcc/index.html @@ -0,0 +1,235 @@ + + + + + + + +Welcome to JCC, PyLucene's code generator + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

Welcome to JCC, PyLucene's code generator

+ + + +

What is JCC ?

+
+

+ JCC is a C++ code generator that produces a C++ object interface + wrapping a Java library via Java's Native Interface (JNI). JCC + also generates C++ wrappers that conform to Python's C type system + making the instances of Java classes directly available to a Python + interpreter. +

+

+ When generating Python wrappers, JCC produces a complete Python + extension module via the distutils + or setuptools + packages. +

+

+ See here for more + information and documentation about JCC. +

+
+ + +

Requirements

+
+

+ JCC is supported on Mac OS X, Linux, Solaris and Windows. +

+

+ JCC requires Python version 2.x (x >= 3.5) and Java version 1.x + (x >= 4). Building JCC requires a C++ compiler. Use of + setuptools + is recommended. +

+

+ See the installation + instructions for more information about building JCC from sources. +

+
+ + +

Source Code

+
+

+ The source code to JCC is part of PyLucene's and can be obtained with + a subversion client + from here. +

+
+ + +

Mailing List

+
+

+ If you'd like to contribute to JCC or are having issues or questions + with JCC, please subscribe to the PyLucene developer mailing list. +

+
+ +
+ +
 
+
+ + + diff --git a/doc/jcc/index.pdf b/doc/jcc/index.pdf new file mode 100644 index 0000000..28fcf80 --- /dev/null +++ b/doc/jcc/index.pdf @@ -0,0 +1,327 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 525 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gaua;4`A1k&;KrWMVAJ9CM_&:a&EdQ#"5;r6t0gkUqohaNN9.`bAS`4W3pcKV6ZjBjdJq\&(ii;6sg:4c[jYkXI@:%W4>nh':%`/(j/V^e?j>V_-JG*%H#DdPN[(dN>qHY)$\IuN_kdMk8Zph]`QppZum25b-@TU7)8!Q*\DdjdamDo_unRH\6D6k?S;+JP#fcB`YRMU7[Z1.@td^OpKkW*E3Sm)!R`&j=jBA*U,+UU4`F(gL\#:3dF]m8Y81"'!k0T'Pb\C26#k9pg"`'30d&LMPuVt/)5gLg1O3O'@=34;knFaE9#50[>Z+kQQL +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +14 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 532.666 179.0 520.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 514.466 176.66 502.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 496.266 171.656 484.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 478.066 169.676 466.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 15 0 R +/H /I +>> +endobj +16 0 obj +<< /Length 1843 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatmb+UBj?8Mql.rrbBn][Dh]Nd(![PQG7_5Y7JE$Z5m`F?^)8.6?Gq8%AVd33Rk%rIeLu<;S?+S[8ho1P=h0bX)cuVP^6W5l2neJorgu-5`/1!Rj:GVm`X[1aGTA\Yaa^06HqX>apbrkK>iLT94+U>pI`N*pB.SPT+p)jp%@0X1rn7XMgA+MJCOVb4\Z]o*/8;"^%DQGWmeQ%p)n_Q9+YY\6tjBe&5QAiCr/P35rK#:"!\7([diGcoAo9N6@E_l*-C9i$V;ApSt38N$%ShT-NG[-k$39AX<_CF??EmTVj?p\tPmE#g7fY&o.>\mLCJ;dp$'PiqO3j&.VW(t[?FBpoBVN1QRY@L6@"7Cc!MPg7p5fo//423eMerT*3%F!4E@V1OmoK4T,+4Q#GN@q2eIMCKc4^/UpI5pGr;CJt5TPp%D0&GL=n%TE`gh'"R%B])@Ss3"3`D?4W/(XQG)S1A?Bfp*NfgX'-1X?Y(blIpEhf)^cMdLqAhh'"s=#6_bUjo7l?)58[MWr;FbL[4i[r+ro6?3tSTfDdq^7!XrZFm:Z_q(FaV,gOD-M`2h+cSklD?ab^J.Mk!*Uc+Vo;k@@ai1>/3'Y0;5m7Enmicc'C-@P&d@"isd&HokUrYnAa,k&OKq]"XM\pipSVq`4P_(H"qhmT*$#?F!\@NUGrqEgmMssM1E52:='k'bbE:q07Z;)@6blk.iQioJ/kg#r[h'MI=FG:3^9rUu_B67FV[LW9BQA8`G*.L)oqnUZX&9U8M-ird?,>l=d"31i)MjREL#5.7i+S%0oHP+8^%jbhE)d@BXh&U]Y:HD]*%S9g6'OjZ/(7N3/4dfgW1dNe._F1c2P,uHAk<>qiP6>O`eJ\+YGm6]Y.doe;:iD.XLR+>4-tp4/&bp3_DE=G`Bi9\Fb?CQ)#Z<_<"81[$d2r["i/^d]d2oR]]b&AcSTi$&S[2CVb0-LpCZK16pnm"R=!%Z,sE@WbY\L='nA^iQG,SkjS>9EF^X1e=QD@5.+S!r,'=A*&0TagC%@2;KhV?,^G#:+h&&R,nCkk=+%fWOqF4S6HspjfB9`S>B_j_?gWj(r%8JRd+r,!e_GPqiP.Fd$V0N'=Nqm!KH,[5f^Cndce2(;"_>Lrc8F%HJ9`R1DbhPcTdJ2fqdM;83_94]`bB),Y/>c(\YjuJ>puEaH?5@le=67n^*Vig*H=XJL&T/0[;^VfD=u5B%pTq3Pq`^Oh#"G`B>l_X1i1gMr.pX`ToV!WUBY"m<2h#\lcG$"@^r5gb)dd#-F.fjj>uE&A.rBBUd3"~> +endstream +endobj +17 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 16 0 R +/Annots 18 0 R +>> +endobj +18 0 obj +[ +19 0 R +20 0 R +21 0 R +22 0 R +23 0 R +24 0 R +] +endobj +19 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 188.46 568.866 260.46 556.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://pypi.python.org/pypi/setuptools) +/S /URI >> +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 110.328 547.666 130.98 535.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/readme.html) +/S /URI >> +/H /I +>> +endobj +21 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 247.836 460.932 319.836 448.932 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://pypi.python.org/pypi/setuptools) +/S /URI >> +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 127.992 439.732 240.336 427.732 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../jcc/documentation/install.html) +/S /URI >> +/H /I +>> +endobj +23 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 116.328 374.198 136.98 362.198 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/trunk/jcc) +/S /URI >> +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 221.304 308.664 275.652 296.664 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (../resources/mailing_lists.html) +/S /URI >> +/H /I +>> +endobj +26 0 obj +<< + /Title (\376\377\0\61\0\40\0\127\0\150\0\141\0\164\0\40\0\151\0\163\0\40\0\112\0\103\0\103\0\40\0\77) + /Parent 25 0 R + /Next 27 0 R + /A 9 0 R +>> endobj +27 0 obj +<< + /Title (\376\377\0\62\0\40\0\122\0\145\0\161\0\165\0\151\0\162\0\145\0\155\0\145\0\156\0\164\0\163) + /Parent 25 0 R + /Prev 26 0 R + /Next 28 0 R + /A 11 0 R +>> endobj +28 0 obj +<< + /Title (\376\377\0\63\0\40\0\123\0\157\0\165\0\162\0\143\0\145\0\40\0\103\0\157\0\144\0\145) + /Parent 25 0 R + /Prev 27 0 R + /Next 29 0 R + /A 13 0 R +>> endobj +29 0 obj +<< + /Title (\376\377\0\64\0\40\0\115\0\141\0\151\0\154\0\151\0\156\0\147\0\40\0\114\0\151\0\163\0\164) + /Parent 25 0 R + /Prev 28 0 R + /A 15 0 R +>> endobj +30 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +31 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +32 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +33 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +34 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F9 +/BaseFont /Courier +/Encoding /WinAnsiEncoding >> +endobj +35 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 2 +/Kids [6 0 R 17 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 25 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F1 30 0 R /F5 31 0 R /F3 32 0 R /F2 33 0 R /F9 34 0 R /F7 35 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [17 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [17 0 R /XYZ 85.0 524.666 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [17 0 R /XYZ 85.0 416.732 null] +>> +endobj +15 0 obj +<< +/S /GoTo +/D [17 0 R /XYZ 85.0 351.198 null] +>> +endobj +25 0 obj +<< + /First 26 0 R + /Last 29 0 R +>> endobj +xref +0 36 +0000000000 65535 f +0000006005 00000 n +0000006070 00000 n +0000006162 00000 n +0000000015 00000 n +0000000071 00000 n +0000000687 00000 n +0000000807 00000 n +0000000853 00000 n +0000006296 00000 n +0000000986 00000 n +0000006359 00000 n +0000001122 00000 n +0000006425 00000 n +0000001259 00000 n +0000006491 00000 n +0000001396 00000 n +0000003332 00000 n +0000003455 00000 n +0000003517 00000 n +0000003707 00000 n +0000003892 00000 n +0000004084 00000 n +0000004271 00000 n +0000004481 00000 n +0000006557 00000 n +0000004666 00000 n +0000004831 00000 n +0000005009 00000 n +0000005180 00000 n +0000005343 00000 n +0000005451 00000 n +0000005561 00000 n +0000005674 00000 n +0000005790 00000 n +0000005896 00000 n +trailer +<< +/Size 36 +/Root 2 0 R +/Info 4 0 R +>> +startxref +6608 +%%EOF diff --git a/doc/linkmap.html b/doc/linkmap.html new file mode 100644 index 0000000..b00a46d --- /dev/null +++ b/doc/linkmap.html @@ -0,0 +1,319 @@ + + + + + + + +Site Linkmap Table of Contents + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

Site Linkmap Table of Contents

+

+ This is a map of the complete site and its structure. +

+
    +
  • +PyLucene  ___________________  site +
  • +
      + +
        +
      • +About  ___________________  about +
      • +
          + +
            +
          • +Index  ___________________  index : Welcome to PyLucene
          • +
          + + +
        +
      + +
        +
      • +Documentation  ___________________  documentation +
      • +
          + + + +
            +
          • +Features  ___________________  readme +
          • +
          + +
        +
      + +
        +
      • +Resources  ___________________  resources +
      • +
          + +
            +
          • +Releases  ___________________  releases +
          • +
          + + + + + + + +
        +
      + + +
        +
      • +JCC  ___________________  jcc +
      • +
          + +
            +
          • +About  ___________________  about +
          • +
              + +
                +
              • +Index  ___________________  index : Welcome to JCC
              • +
              + +
            +
          + +
            +
          • +Documentation  ___________________  documentation +
          • +
              + + + +
                +
              • +Features  ___________________  readme +
              • +
              + +
                +
              • +Javadoc  ___________________  readme +
              • +
              + +
            +
          + +
        +
      + + + + +
    +
+
+ +
 
+
+ + + diff --git a/doc/linkmap.pdf b/doc/linkmap.pdf new file mode 100644 index 0000000..8585ee2 --- /dev/null +++ b/doc/linkmap.pdf @@ -0,0 +1,94 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 966 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm;>u03/'RfGRaS2pJ78$g@<>O%eGmXL#QY(W&>4+WUR[/!6kBkNqD`;R+)5ZdQa`H+4g^!MgA!atLDXQaFag?KDIYLJF]he0f1QD]!f'%!0_.adBLW1t^8OK$ja+T^71ON=8f)Z@)Q1JRhk>4nA?X^1HKI3FN/&70P)ISpTI=AK6%)gOh)3.m`/lZW35fKbTOTG:#tCnrQ#-1d=_^a*!2FfL3lVU!W"q#Ql3:,1lc%q+P46s9f#MRp`e?[`1b+/3ZM"4,mH77<-neSX7@/Oi=Gt4cq*N"V23g*ob^b>DWn@U(TJYcR53TEFPO10`7Q;(sa+O;-iPOJV +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +>> +endobj +7 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +8 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +9 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +10 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +11 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F6 +/BaseFont /Times-Italic +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 1 +/Kids [6 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + >> +endobj +3 0 obj +<< +/Font << /F1 7 0 R /F5 8 0 R /F3 9 0 R /F2 10 0 R /F6 11 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +xref +0 12 +0000000000 65535 f +0000001789 00000 n +0000001847 00000 n +0000001897 00000 n +0000000015 00000 n +0000000071 00000 n +0000001128 00000 n +0000001234 00000 n +0000001341 00000 n +0000001450 00000 n +0000001562 00000 n +0000001678 00000 n +trailer +<< +/Size 12 +/Root 2 0 R +/Info 4 0 R +>> +startxref +2017 +%%EOF diff --git a/doc/resources/mailing_lists.html b/doc/resources/mailing_lists.html new file mode 100644 index 0000000..db49f8b --- /dev/null +++ b/doc/resources/mailing_lists.html @@ -0,0 +1,260 @@ + + + + + + + +PyLucene Mailing Lists + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

PyLucene Mailing Lists

+
+ +
+ + +

Users

+
+

+ Historically, Lucene user issues or questions have almost always + been best addressed on the Java Lucene User mailing list. PyLucene + users with general Lucene questions should consider contacting + the Java + Lucene User list first. +

+

+ For issues or questions specific to PyLucene such as build issues, + please contact the PyLucene Developer + mailing list. +

+
+
Note
+
+ In order to post to the PyLucene Developer + list, it is necessary to first subscribe to it. See below for more information. +
+
+
+ + +

Developers

+
+

+ If you'd like to contribute to PyLucene or are having questions + specific to PyLucene or JCC, please subscribe to the PyLucene + developer mailing list. +

+

+ The PyLucene developer mailing list is: + pylucene-dev@lucene.apache.org. +

+ +
+
Note
+
+ In order to post to the list, it is necessary to first subscribe to it. +
+
+
+ + + +

Commits

+
+

+ If you'd like to be notified when source code changes are committed + to PyLucene's version control + system then subscribe to the PyLucene commits mailing list. +

+ +
+ +
+ +
 
+
+ + + diff --git a/doc/resources/mailing_lists.pdf b/doc/resources/mailing_lists.pdf new file mode 100644 index 0000000..b2b9620 --- /dev/null +++ b/doc/resources/mailing_lists.pdf @@ -0,0 +1,387 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 430 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!$B]hZI!'SU`q`?!D.j.A;K7LF6&706mE?jNa\7S;^(Ud]]Y*%.alQEh6De-=d(Vo)u1lWR*BMTMi'N?5F3.dN@6ftU]61MO9/@P%&PNOXIQ'LQQfB's4l+pXoglhPaqSI_KnR8E@_>Z)QMID[G)b&?X)>gIs2mHr35P]^5!:f`6OQfNnMLjcBALu42'72g&;*OrkTHoYlDX8"XYGt5L1=JDp;lmFr(B)Td`PP-[m@bt1lcuF$SI+R37S)ep[175+Zd&'FIPF:cSM`]GDKb\.lUk+9?s?Y,_O]Ke[C4cClQu8g5<>;*iW~> +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 137.324 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 541.466 164.648 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 523.266 154.016 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Length 2428 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gau0E?$Dc-&:N06i%<570k!Z\ar+G:]\B"Of*1\OH^E6t>^S,d]QiiBj5#0e3"f`tZ1c8KYCpS:d`<6#j"p<5XaGTnTK8,dsD"6==3p@4[&k0p*_MM6W1;3)*7L$cBN%O8#J<9,D(f^]>9N@lhB8KFr$IKS]\HG?*4a4S)MYFTOuicL[XfBpEjJq\*gDo]+:TrgGRjeb1E_khXkFaG2:?*VK]gXXrR_C5&/H;dJ@8QV9?XH`7\m'(E';rtX-1'pVVmP@01PPIgs4-.h:(QadP_Ude[oU`^b.^a"qrFB#./Ompa$MM!QXp=>l9tD;GW#gIsIM>[-5XVn5Cl-k-0i#+aX?DD3:I>-0"&SKc#3n%q`iu:CV5PPp$OMf+%G>'fe$ZmfpM"NNG9iHLTj_c?Zh'g\iZVI%pZWW28^'Sdu\4CVOt%dA/ekCLe)_`!@0A"$9`[GlW?P;jbn2@h+jMAtTo,pc,6VQg'?n"?(clf+dWi!.fW-I6YbUC24"hTeLYCe@oQtLQnC72LRcn4Vrg\@LX?I"EgL>2(Ol7SO!$t[#gmX,^/es/>GJ]>\kNk,+'B@'+(%*W81*K):&i3H,_`5g3DrolH)I(LF#:ZAKa`#187b[5OEH5lEif]HQG3]#6:iN)7>lb"T;WTT5MEH2p,n!>I0^>%JkSEdSYRu/N2Of&%7uP<%!7n&"Zn;U2/g7uP9nFoEIJnB4/QXNU;RlWg#0$h+ql-\RsNZaBi,H;ic%sMoCOXQfe!lCG(L,Vk7iu52[SbIG7_V=ZoQo:G?Ks/9_WLH([M(SXgR&a;)f87o?>HU.@kY&+cDcE/4KGOo"ltW\:gSHC#bGYDR"E.'FntNd^iV2\o4,\ZbrgYmR[@)j6<>Z!k=5FdW'^qP2KOWj#[;\VUG`V@/RjT5$qU/)&pg[Q[WUXd%>V\RQ=*2g\n.#Ue_;g&RJH3bjr>n)+V2']Q9?-`I.b#0aIM>_#Flg">d=tc,qT>d\*7=4<1hbpa,G.U&+&p&+O\(pfY9KOU_Lp6mtH)S2QJ1VhljK3[faF@csJ3\pRX@0g1=ZSL/\IZDDd;2lB[-lMJ4]eVO>cuZ0XW+cl3#h=]\qC)Pk6SsH8;aadOpGgbG/3&g/[%dXicN#(QV_O9jVn"^_M!LD7l!67f#-WLn8@EJ\7^.]7ju^>Je,if'F/ELrs\(?Z`,Uu*0u2[#.^AVs%9kLn/"[Kim>a_=gD#haj'co3=2h!->(MV-5"$JfacB8%P,:M<2?bj]+DcHHXbhXoYhe)\V"DS+c4Ikg>?#U0$U:j;Q\jFh3V2Pu9^$Lh.`_n]86m,2HVDKET<9'`.(hg]$PAQ8_lHVRp[]tAZh""rG[U!IV?!=fs)hlj4Up;%qkWB#D`C:Rj2FYAJZ!f5jcUs9tppZim%K:(?a:A$Oo+H;%O#HNWNF-"9jP8/dTZ#bu@dR1S83pF4=DG%H[DBi&d(*^NJtqH#9ClYHI2!&IpfebrQ+/'d9UgI]<-rb,9ng"+\'Hbe@#R!(??&:Fg\oSh2HCg0h+\2Y78Z5\'/M-r1l]"77jb'-G=V0uT\$"^%,e$%[RgT=52FS]`MKrrp7G3-B!:,?H>3ch,>-D@;.\>13uW&:"WU+cI+PYXn9jJ\ASo==Q'>3U7"X#7k$k77;a0RPlF,FB5f'M1O$Eb7cpC%h6ol%`s%Em.+m-2eBCXjERLIHf?,4[]'S=*!#[9DZ%- +endstream +endobj +15 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 14 0 R +/Annots 16 0 R +>> +endobj +16 0 obj +[ +17 0 R +18 0 R +20 0 R +21 0 R +22 0 R +23 0 R +24 0 R +25 0 R +26 0 R +27 0 R +28 0 R +29 0 R +30 0 R +] +endobj +17 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 204.312 603.266 289.608 591.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://lucene.apache.org/java/docs/mailinglists.html#Java%20User%20List) +/S /URI >> +/H /I +>> +endobj +18 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 568.866 190.968 556.866 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 19 0 R +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 182.104 527.256 249.416 519.256 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 19 0 R +/H /I +>> +endobj +21 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 335.192 527.256 365.408 519.256 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:pylucene-dev-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 284.652 434.522 446.976 422.522 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:pylucene-dev@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +23 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 417.322 189.336 405.322 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:pylucene-dev-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 404.122 215.988 392.122 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:pylucene-dev-unsubscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +25 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 390.922 197.316 378.922 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://mail-archives.apache.org/mod_mbox/lucene-pylucene-dev/) +/S /URI >> +/H /I +>> +endobj +26 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 265.88 347.312 296.096 339.312 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:pylucene-dev-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +27 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 490.908 288.978 526.236 276.978 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (version_control.html) +/S /URI >> +/H /I +>> +endobj +28 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 275.778 160.332 263.778 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (version_control.html) +/S /URI >> +/H /I +>> +endobj +29 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 258.578 189.336 246.578 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:pylucene-commits-subscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +30 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 108.0 245.378 215.988 233.378 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (mailto:pylucene-commits-unsubscribe@lucene.apache.org) +/S /URI >> +/H /I +>> +endobj +32 0 obj +<< + /Title (\376\377\0\61\0\40\0\125\0\163\0\145\0\162\0\163) + /Parent 31 0 R + /Next 33 0 R + /A 9 0 R +>> endobj +33 0 obj +<< + /Title (\376\377\0\62\0\40\0\104\0\145\0\166\0\145\0\154\0\157\0\160\0\145\0\162\0\163) + /Parent 31 0 R + /Prev 32 0 R + /Next 34 0 R + /A 11 0 R +>> endobj +34 0 obj +<< + /Title (\376\377\0\63\0\40\0\103\0\157\0\155\0\155\0\151\0\164\0\163) + /Parent 31 0 R + /Prev 33 0 R + /A 13 0 R +>> endobj +35 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +36 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +37 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +38 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +39 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 2 +/Kids [6 0 R 15 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 31 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F1 35 0 R /F5 36 0 R /F3 37 0 R /F2 38 0 R /F7 39 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 498.256 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [15 0 R /XYZ 85.0 318.312 null] +>> +endobj +19 0 obj +<< +/S /GoTo +/D [null /XYZ 0.0 0.0 null] +>> +endobj +31 0 obj +<< + /First 32 0 R + /Last 34 0 R +>> endobj +xref +0 40 +0000000000 65535 f +0000007350 00000 n +0000007415 00000 n +0000007507 00000 n +0000000015 00000 n +0000000071 00000 n +0000000592 00000 n +0000000712 00000 n +0000000751 00000 n +0000007630 00000 n +0000000886 00000 n +0000007693 00000 n +0000001023 00000 n +0000007759 00000 n +0000001160 00000 n +0000003681 00000 n +0000003804 00000 n +0000003915 00000 n +0000004140 00000 n +0000007825 00000 n +0000004276 00000 n +0000004415 00000 n +0000004616 00000 n +0000004807 00000 n +0000005006 00000 n +0000005207 00000 n +0000005420 00000 n +0000005620 00000 n +0000005794 00000 n +0000005965 00000 n +0000006168 00000 n +0000007884 00000 n +0000006373 00000 n +0000006494 00000 n +0000006660 00000 n +0000006794 00000 n +0000006902 00000 n +0000007012 00000 n +0000007125 00000 n +0000007241 00000 n +trailer +<< +/Size 40 +/Root 2 0 R +/Info 4 0 R +>> +startxref +7935 +%%EOF diff --git a/doc/resources/version_control.html b/doc/resources/version_control.html new file mode 100644 index 0000000..d138135 --- /dev/null +++ b/doc/resources/version_control.html @@ -0,0 +1,213 @@ + + + + + + + +PyLucene Version Control System + + + + + + + + + +
+ + + +
+ + + + +
+PyLucene +
+ + + + +
+
+
+
+ +
+ + +
+ +
+ +   +
+ + + + + +
+ +

PyLucene Version Control System

+ + + +

Overview

+
+

+ The PyLucene source code resides in the Apache Subversion (SVN) repository. The command-line SVN client can be obtained here. +

+
+ + +

Web Access (read-only)

+
+

+ The source code can be browsed via the Web at http://svn.apache.org/viewcvs.cgi/lucene/pylucene/. No SVN client software is required. +

+
+ + +

Anonymous Access (read-only)

+
+

+ The SVN URL for anonymous users is http://svn.apache.org/repos/asf/lucene/pylucene/. Instructions for anonymous SVN access are here. +

+
+ + +

Committer Access (read-write)

+
+

+ The SVN URL for committers is https://svn.apache.org/repos/asf/lucene/pylucene/. Instructions for committer SVN access are here. +

+
+ +
+ +
 
+
+ + + diff --git a/doc/resources/version_control.pdf b/doc/resources/version_control.pdf new file mode 100644 index 0000000..edd9272 --- /dev/null +++ b/doc/resources/version_control.pdf @@ -0,0 +1,332 @@ +%PDF-1.3 +%ª«¬­ +4 0 obj +<< /Type /Info +/Producer (FOP 0.20.5) >> +endobj +5 0 obj +<< /Length 511 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gb!$C4`>s,&;GCX`JlRi[128.9VQ-t.^`8Q4#@r`0aSKNe2W<[pZ3WWSM%9B=u3sbchI8rTm%8VG7,4cLG6Z7KtCE$@G"`dE;L3&\t&4tjSC%<-+r!Yc/Tu/'mF%PK?b*7Z;GY/Z7$'JOKZ)r=M8&#bQt/EIO[/>)B7.`M*8Y;s;G&Sjol`W?J-#p]ImK#A"3D^m?E@;mC@j8-N'\G-bV1ok(lE\FOc.k'"T([NgRZci[A&WDLgh@g.,Gj@2r,j\))"FU:5Le_$ZW+'M/#`t)93*%+a`V1Lj@f;&DnUirWW%T26Q~> +endstream +endobj +6 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 5 0 R +/Annots 7 0 R +>> +endobj +7 0 obj +[ +8 0 R +10 0 R +12 0 R +14 0 R +] +endobj +8 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 559.666 157.316 547.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 9 0 R +/H /I +>> +endobj +10 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 541.466 226.616 529.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 11 0 R +/H /I +>> +endobj +12 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 523.266 262.628 511.266 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 13 0 R +/H /I +>> +endobj +14 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 102.0 505.066 259.292 493.066 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A 15 0 R +/H /I +>> +endobj +16 0 obj +<< /Length 1291 /Filter [ /ASCII85Decode /FlateDecode ] + >> +stream +Gatm;cZ@f"'YaIJcnROK=L)JZHYYF,cL,ec]7kD=boVfu`%tduah[E'mur_^\H1t@*G3+U+haN*-U#5:7DI0Cmno$T-K>6^?UfkMj`)UZdOTG8cd*\0Pf5UO.;/.p\:)(9*0Z%!?(uW::\9)O9caE]G/7u4`Fp!;"o&YYD.O#):XO!2^>0?JMsb7ok2rjqWV9D<\4-Fkfq2Q;Utu".j>::IVQ"VEda0F5KQ^+G%Oo4fFT,lJV0D_/n`-c#lTet0:)AIdW'qindSu.QGXYX0k9dS6DOVGCp5*n]Zis.%`&p[9Xr()g2CO\E<34i#$ZnY_0!j#5gDQ?F\)DCtmcI>3;lTVVj.RR\QsY"6g]/E7?pnBTj[B9"\G\l=mc2Ral_heX;lh-9(g-+*qUN14.T82s*s9I*qAKk`*_RGH]<> jTL@elC@8n/O91?'TOGaEc.`H*BrdRiKX'/sM]7![lo2i4(?ke0a5*.E6J[=n(B7Q+B(V+`H*g`IkDO7W54hAtE^<#sKb#(BUKhm388jX8X^6!Bl=71]`XYTtU9]:=\Z?%uBE@k@o[7KYE]MS9(42o4GfN@@W-4jW[@'*n=+6TVh]=QlU(t0#@7;(7WQYMATPq8:n#O[#OD+iV9?h2KeT@MG5;OO>83p>b;BThI93KG\5>?ZAg#,#((fg?J^!&A3Go:V4=L/pcQBj)<&t$;o)s4@DN&"&cC\qt_^U5"AVMYPY^nPA@(OsK3o?Hm0U))nD9r)NDO+d?g;ai[`TOB.#3QYB/,lJAUN]-/2l7I"Z9bbe*rqDgMmkl,S\h5C7#L$5JdG.j`d5V&CVMe@UiGXZE9;eL^QMu`_e%q,4J#r>V/WTB/g;&BllPT[WbG*(;h?$3eWa,P`Y,ct;ta[T;*/1.f/'LI)3!Phr1A#>A>)jLG_L-06E_[#25<.]>7"cJS2,".Pn,2qI%F<'IW!+1;AIOTr(9l?JZ7`5!TLR7CaUIY/&k=iSWP%SCP*EB/p0%>4AZ/j;huo8@0UeV+RmZDZYk3dg&^9Y6qYkO=#[Yp\pIF9>-/%@l-V?LAlt'n#C6H(VViiG_`Z3rjHgAIjN#n4`0A=GA^FcB6H`HJY\9+;M,/&3=Alt[:_dG0XHXsO+A:.N\l[X]Y#\?c&0PLV?HS+kBH1Yk3jGe#h!:/fVnr#PBcbd,?K;l437tZGH_L+L!kEubDEjLCrW`_C@"&~> +endstream +endobj +17 0 obj +<< /Type /Page +/Parent 1 0 R +/MediaBox [ 0 0 612 792 ] +/Resources 3 0 R +/Contents 16 0 R +/Annots 18 0 R +>> +endobj +18 0 obj +[ +19 0 R +20 0 R +21 0 R +22 0 R +23 0 R +24 0 R +25 0 R +] +endobj +19 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 328.596 629.666 417.588 617.666 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://subversion.tigris.org) +/S /URI >> +/H /I +>> +endobj +20 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 297.3 616.466 317.952 604.466 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://subversion.tigris.org/project_packages.html) +/S /URI >> +/H /I +>> +endobj +21 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 90.0 550.932 335.628 538.932 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/viewcvs.cgi/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +22 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 279.648 498.598 511.608 486.598 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://svn.apache.org/repos/asf/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +23 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 303.288 485.398 323.94 473.398 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dev/version-control.html#anon-svn) +/S /URI >> +/H /I +>> +endobj +24 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 250.656 433.064 487.284 421.064 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (https://svn.apache.org/repos/asf/lucene/pylucene/) +/S /URI >> +/H /I +>> +endobj +25 0 obj +<< /Type /Annot +/Subtype /Link +/Rect [ 297.288 419.864 317.94 407.864 ] +/C [ 0 0 0 ] +/Border [ 0 0 0 ] +/A << /URI (http://www.apache.org/dev/version-control.html#https-svn) +/S /URI >> +/H /I +>> +endobj +27 0 obj +<< + /Title (\376\377\0\61\0\40\0\117\0\166\0\145\0\162\0\166\0\151\0\145\0\167) + /Parent 26 0 R + /Next 28 0 R + /A 9 0 R +>> endobj +28 0 obj +<< + /Title (\376\377\0\62\0\40\0\127\0\145\0\142\0\40\0\101\0\143\0\143\0\145\0\163\0\163\0\40\0\50\0\162\0\145\0\141\0\144\0\55\0\157\0\156\0\154\0\171\0\51) + /Parent 26 0 R + /Prev 27 0 R + /Next 29 0 R + /A 11 0 R +>> endobj +29 0 obj +<< + /Title (\376\377\0\63\0\40\0\101\0\156\0\157\0\156\0\171\0\155\0\157\0\165\0\163\0\40\0\101\0\143\0\143\0\145\0\163\0\163\0\40\0\50\0\162\0\145\0\141\0\144\0\55\0\157\0\156\0\154\0\171\0\51) + /Parent 26 0 R + /Prev 28 0 R + /Next 30 0 R + /A 13 0 R +>> endobj +30 0 obj +<< + /Title (\376\377\0\64\0\40\0\103\0\157\0\155\0\155\0\151\0\164\0\164\0\145\0\162\0\40\0\101\0\143\0\143\0\145\0\163\0\163\0\40\0\50\0\162\0\145\0\141\0\144\0\55\0\167\0\162\0\151\0\164\0\145\0\51) + /Parent 26 0 R + /Prev 29 0 R + /A 15 0 R +>> endobj +31 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding >> +endobj +32 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F5 +/BaseFont /Times-Roman +/Encoding /WinAnsiEncoding >> +endobj +33 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding /WinAnsiEncoding >> +endobj +34 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica-Oblique +/Encoding /WinAnsiEncoding >> +endobj +35 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F7 +/BaseFont /Times-Bold +/Encoding /WinAnsiEncoding >> +endobj +1 0 obj +<< /Type /Pages +/Count 2 +/Kids [6 0 R 17 0 R ] >> +endobj +2 0 obj +<< /Type /Catalog +/Pages 1 0 R + /Outlines 26 0 R + /PageMode /UseOutlines + >> +endobj +3 0 obj +<< +/Font << /F1 31 0 R /F5 32 0 R /F3 33 0 R /F2 34 0 R /F7 35 0 R >> +/ProcSet [ /PDF /ImageC /Text ] >> +endobj +9 0 obj +<< +/S /GoTo +/D [17 0 R /XYZ 85.0 659.0 null] +>> +endobj +11 0 obj +<< +/S /GoTo +/D [17 0 R /XYZ 85.0 593.466 null] +>> +endobj +13 0 obj +<< +/S /GoTo +/D [17 0 R /XYZ 85.0 527.932 null] +>> +endobj +15 0 obj +<< +/S /GoTo +/D [17 0 R /XYZ 85.0 462.398 null] +>> +endobj +26 0 obj +<< + /First 27 0 R + /Last 30 0 R +>> endobj +xref +0 36 +0000000000 65535 f +0000005826 00000 n +0000005891 00000 n +0000005983 00000 n +0000000015 00000 n +0000000071 00000 n +0000000673 00000 n +0000000793 00000 n +0000000839 00000 n +0000006106 00000 n +0000000974 00000 n +0000006169 00000 n +0000001111 00000 n +0000006235 00000 n +0000001248 00000 n +0000006301 00000 n +0000001385 00000 n +0000002769 00000 n +0000002892 00000 n +0000002961 00000 n +0000003143 00000 n +0000003345 00000 n +0000003546 00000 n +0000003748 00000 n +0000003956 00000 n +0000004159 00000 n +0000006367 00000 n +0000004368 00000 n +0000004507 00000 n +0000004740 00000 n +0000005009 00000 n +0000005270 00000 n +0000005378 00000 n +0000005488 00000 n +0000005601 00000 n +0000005717 00000 n +trailer +<< +/Size 36 +/Root 2 0 R +/Info 4 0 R +>> +startxref +6418 +%%EOF diff --git a/doc/skin/CommonMessages_de.xml b/doc/skin/CommonMessages_de.xml new file mode 100644 index 0000000..bc46119 --- /dev/null +++ b/doc/skin/CommonMessages_de.xml @@ -0,0 +1,23 @@ + + + + Schriftgrösse: + Zuletzt veröffentlicht: + Suche: + Suche auf der Seite mit + diff --git a/doc/skin/CommonMessages_en_US.xml b/doc/skin/CommonMessages_en_US.xml new file mode 100644 index 0000000..88dfe14 --- /dev/null +++ b/doc/skin/CommonMessages_en_US.xml @@ -0,0 +1,23 @@ + + + + Font size: + Last Published: + Search + Search site with + diff --git a/doc/skin/CommonMessages_es.xml b/doc/skin/CommonMessages_es.xml new file mode 100644 index 0000000..63be671 --- /dev/null +++ b/doc/skin/CommonMessages_es.xml @@ -0,0 +1,23 @@ + + + + Tamaño del texto: + Fecha de publicación: + Buscar + Buscar en + diff --git a/doc/skin/CommonMessages_fr.xml b/doc/skin/CommonMessages_fr.xml new file mode 100644 index 0000000..622569a --- /dev/null +++ b/doc/skin/CommonMessages_fr.xml @@ -0,0 +1,23 @@ + + + + Taille : + Dernière publication : + Rechercher + Rechercher sur le site avec + diff --git a/doc/skin/basic.css b/doc/skin/basic.css new file mode 100644 index 0000000..eb24c32 --- /dev/null +++ b/doc/skin/basic.css @@ -0,0 +1,166 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +/** + * General + */ + +img { border: 0; } + +#content table { + border: 0; + width: 100%; +} +/*Hack to get IE to render the table at 100%*/ +* html #content table { margin-left: -3px; } + +#content th, +#content td { + margin: 0; + padding: 0; + vertical-align: top; +} + +.clearboth { + clear: both; +} + +.note, .warning, .fixme { + border: solid black 1px; + margin: 1em 3em; +} + +.note .label { + background: #369; + color: white; + font-weight: bold; + padding: 5px 10px; +} +.note .content { + background: #F0F0FF; + color: black; + line-height: 120%; + font-size: 90%; + padding: 5px 10px; +} +.warning .label { + background: #C00; + color: white; + font-weight: bold; + padding: 5px 10px; +} +.warning .content { + background: #FFF0F0; + color: black; + line-height: 120%; + font-size: 90%; + padding: 5px 10px; +} +.fixme .label { + background: #C6C600; + color: black; + font-weight: bold; + padding: 5px 10px; +} +.fixme .content { + padding: 5px 10px; +} + +/** + * Typography + */ + +body { + font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif; + font-size: 100%; +} + +#content { + font-family: Georgia, Palatino, Times, serif; + font-size: 95%; +} +#tabs { + font-size: 70%; +} +#menu { + font-size: 80%; +} +#footer { + font-size: 70%; +} + +h1, h2, h3, h4, h5, h6 { + font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif; + font-weight: bold; + margin-top: 1em; + margin-bottom: .5em; +} + +h1 { + margin-top: 0; + margin-bottom: 1em; + font-size: 1.4em; +} +#content h1 { + font-size: 160%; + margin-bottom: .5em; +} +#menu h1 { + margin: 0; + padding: 10px; + background: #336699; + color: white; +} +h2 { font-size: 120%; } +h3 { font-size: 100%; } +h4 { font-size: 90%; } +h5 { font-size: 80%; } +h6 { font-size: 75%; } + +p { + line-height: 120%; + text-align: left; + margin-top: .5em; + margin-bottom: 1em; +} + +#content li, +#content th, +#content td, +#content li ul, +#content li ol{ + margin-top: .5em; + margin-bottom: .5em; +} + + +#content li li, +#minitoc-area li{ + margin-top: 0em; + margin-bottom: 0em; +} + +#content .attribution { + text-align: right; + font-style: italic; + font-size: 85%; + margin-top: 1em; +} + +.codefrag { + font-family: "Courier New", Courier, monospace; + font-size: 110%; +} \ No newline at end of file diff --git a/doc/skin/breadcrumbs-optimized.js b/doc/skin/breadcrumbs-optimized.js new file mode 100644 index 0000000..507612a --- /dev/null +++ b/doc/skin/breadcrumbs-optimized.js @@ -0,0 +1,90 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +var PREPREND_CRUMBS=new Array(); +var link1="@skinconfig.trail.link1.name@"; +var link2="@skinconfig.trail.link2.name@"; +var link3="@skinconfig.trail.link3.name@"; +if(!(link1=="")&&!link1.indexOf( "@" ) == 0){ + PREPREND_CRUMBS.push( new Array( link1, @skinconfig.trail.link1.href@ ) ); } +if(!(link2=="")&&!link2.indexOf( "@" ) == 0){ + PREPREND_CRUMBS.push( new Array( link2, @skinconfig.trail.link2.href@ ) ); } +if(!(link3=="")&&!link3.indexOf( "@" ) == 0){ + PREPREND_CRUMBS.push( new Array( link3, @skinconfig.trail.link3.href@ ) ); } +var DISPLAY_SEPARATOR=" > "; +var DISPLAY_PREPREND=" > "; +var DISPLAY_POSTPREND=":"; +var CSS_CLASS_CRUMB="breadcrumb"; +var CSS_CLASS_TRAIL="breadcrumbTrail"; +var CSS_CLASS_SEPARATOR="crumbSeparator"; +var FILE_EXTENSIONS=new Array( ".html", ".htm", ".jsp", ".php", ".php3", ".php4" ); +var PATH_SEPARATOR="/"; + +function sc(s) { + var l=s.toLowerCase(); + return l.substr(0,1).toUpperCase()+l.substr(1); +} +function getdirs() { + var t=document.location.pathname.split(PATH_SEPARATOR); + var lc=t[t.length-1]; + for(var i=0;i < FILE_EXTENSIONS.length;i++) + { + if(lc.indexOf(FILE_EXTENSIONS[i])) + return t.slice(1,t.length-1); } + return t.slice(1,t.length); +} +function getcrumbs( d ) +{ + var pre = "/"; + var post = "/"; + var c = new Array(); + if( d != null ) + { + for(var i=0;i < d.length;i++) { + pre+=d[i]+postfix; + c.push(new Array(d[i],pre)); } + } + if(PREPREND_CRUMBS.length > 0 ) + return PREPREND_CRUMBS.concat( c ); + return c; +} +function gettrail( c ) +{ + var h=DISPLAY_PREPREND; + for(var i=0;i < c.length;i++) + { + h+=''+sc(c[i][0])+''; + if(i!=(c.length-1)) + h+=DISPLAY_SEPARATOR; } + return h+DISPLAY_POSTPREND; +} + +function gettrailXHTML( c ) +{ + var h=''+DISPLAY_PREPREND; + for(var i=0;i < c.length;i++) + { + h+=''+sc(c[i][0])+''; + if(i!=(c.length-1)) + h+=''+DISPLAY_SEPARATOR+''; } + return h+DISPLAY_POSTPREND+''; +} + +if(document.location.href.toLowerCase().indexOf("http://")==-1) + document.write(gettrail(getcrumbs())); +else + document.write(gettrail(getcrumbs(getdirs()))); + diff --git a/doc/skin/breadcrumbs.js b/doc/skin/breadcrumbs.js new file mode 100644 index 0000000..aea80ec --- /dev/null +++ b/doc/skin/breadcrumbs.js @@ -0,0 +1,237 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +/** + * This script, when included in a html file, builds a neat breadcrumb trail + * based on its url. That is, if it doesn't contains bugs (I'm relatively + * sure it does). + * + * Typical usage: + * + */ + +/** + * IE 5 on Mac doesn't know Array.push. + * + * Implement it - courtesy to fritz. + */ +var abc = new Array(); +if (!abc.push) { + Array.prototype.push = function(what){this[this.length]=what} +} + +/* ======================================================================== + CONSTANTS + ======================================================================== */ + +/** + * Two-dimensional array containing extra crumbs to place at the front of + * the trail. Specify first the name of the crumb, then the URI that belongs + * to it. You'll need to modify this for every domain or subdomain where + * you use this script (you can leave it as an empty array if you wish) + */ +var PREPREND_CRUMBS = new Array(); + +var link1 = "@skinconfig.trail.link1.name@"; +var link2 = "@skinconfig.trail.link2.name@"; +var link3 = "@skinconfig.trail.link3.name@"; + +var href1 = "@skinconfig.trail.link1.href@"; +var href2 = "@skinconfig.trail.link2.href@"; +var href3 = "@skinconfig.trail.link3.href@"; + + if(!(link1=="")&&!link1.indexOf( "@" ) == 0){ + PREPREND_CRUMBS.push( new Array( link1, href1 ) ); + } + if(!(link2=="")&&!link2.indexOf( "@" ) == 0){ + PREPREND_CRUMBS.push( new Array( link2, href2 ) ); + } + if(!(link3=="")&&!link3.indexOf( "@" ) == 0){ + PREPREND_CRUMBS.push( new Array( link3, href3 ) ); + } + +/** + * String to include between crumbs: + */ +var DISPLAY_SEPARATOR = " > "; +/** + * String to include at the beginning of the trail + */ +var DISPLAY_PREPREND = " > "; +/** + * String to include at the end of the trail + */ +var DISPLAY_POSTPREND = ""; + +/** + * CSS Class to use for a single crumb: + */ +var CSS_CLASS_CRUMB = "breadcrumb"; + +/** + * CSS Class to use for the complete trail: + */ +var CSS_CLASS_TRAIL = "breadcrumbTrail"; + +/** + * CSS Class to use for crumb separator: + */ +var CSS_CLASS_SEPARATOR = "crumbSeparator"; + +/** + * Array of strings containing common file extensions. We use this to + * determine what part of the url to ignore (if it contains one of the + * string specified here, we ignore it). + */ +var FILE_EXTENSIONS = new Array( ".html", ".htm", ".jsp", ".php", ".php3", ".php4" ); + +/** + * String that separates parts of the breadcrumb trail from each other. + * When this is no longer a slash, I'm sure I'll be old and grey. + */ +var PATH_SEPARATOR = "/"; + +/* ======================================================================== + UTILITY FUNCTIONS + ======================================================================== */ +/** + * Capitalize first letter of the provided string and return the modified + * string. + */ +function sentenceCase( string ) +{ return string; + //var lower = string.toLowerCase(); + //return lower.substr(0,1).toUpperCase() + lower.substr(1); +} + +/** + * Returns an array containing the names of all the directories in the + * current document URL + */ +function getDirectoriesInURL() +{ + var trail = document.location.pathname.split( PATH_SEPARATOR ); + + // check whether last section is a file or a directory + var lastcrumb = trail[trail.length-1]; + for( var i = 0; i < FILE_EXTENSIONS.length; i++ ) + { + if( lastcrumb.indexOf( FILE_EXTENSIONS[i] ) ) + { + // it is, remove it and send results + return trail.slice( 1, trail.length-1 ); + } + } + + // it's not; send the trail unmodified + return trail.slice( 1, trail.length ); +} + +/* ======================================================================== + BREADCRUMB FUNCTIONALITY + ======================================================================== */ +/** + * Return a two-dimensional array describing the breadcrumbs based on the + * array of directories passed in. + */ +function getBreadcrumbs( dirs ) +{ + var prefix = "/"; + var postfix = "/"; + + // the array we will return + var crumbs = new Array(); + + if( dirs != null ) + { + for( var i = 0; i < dirs.length; i++ ) + { + prefix += dirs[i] + postfix; + crumbs.push( new Array( dirs[i], prefix ) ); + } + } + + // preprend the PREPREND_CRUMBS + if(PREPREND_CRUMBS.length > 0 ) + { + return PREPREND_CRUMBS.concat( crumbs ); + } + + return crumbs; +} + +/** + * Return a string containing a simple text breadcrumb trail based on the + * two-dimensional array passed in. + */ +function getCrumbTrail( crumbs ) +{ + var xhtml = DISPLAY_PREPREND; + + for( var i = 0; i < crumbs.length; i++ ) + { + xhtml += ''; + xhtml += unescape( crumbs[i][0] ) + ''; + if( i != (crumbs.length-1) ) + { + xhtml += DISPLAY_SEPARATOR; + } + } + + xhtml += DISPLAY_POSTPREND; + + return xhtml; +} + +/** + * Return a string containing an XHTML breadcrumb trail based on the + * two-dimensional array passed in. + */ +function getCrumbTrailXHTML( crumbs ) +{ + var xhtml = ''; + xhtml += DISPLAY_PREPREND; + + for( var i = 0; i < crumbs.length; i++ ) + { + xhtml += ''; + xhtml += unescape( crumbs[i][0] ) + ''; + if( i != (crumbs.length-1) ) + { + xhtml += '' + DISPLAY_SEPARATOR + ''; + } + } + + xhtml += DISPLAY_POSTPREND; + xhtml += ''; + + return xhtml; +} + +/* ======================================================================== + PRINT BREADCRUMB TRAIL + ======================================================================== */ + +// check if we're local; if so, only print the PREPREND_CRUMBS +if( document.location.href.toLowerCase().indexOf( "http://" ) == -1 ) +{ + document.write( getCrumbTrail( getBreadcrumbs() ) ); +} +else +{ + document.write( getCrumbTrail( getBreadcrumbs( getDirectoriesInURL() ) ) ); +} + diff --git a/doc/skin/fontsize.js b/doc/skin/fontsize.js new file mode 100644 index 0000000..11722bf --- /dev/null +++ b/doc/skin/fontsize.js @@ -0,0 +1,166 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +function init() +{ //embedded in the doc + //ndeSetTextSize(); +} + +function checkBrowser(){ + if (!document.getElementsByTagName){ + return true; + } + else{ + return false; + } +} + + +function ndeSetTextSize(chgsize,rs) +{ + var startSize; + var newSize; + + if (!checkBrowser) + { + return; + } + + startSize = parseInt(ndeGetDocTextSize()); + + if (!startSize) + { + startSize = 16; + } + + switch (chgsize) + { + case 'incr': + newSize = startSize + 2; + break; + + case 'decr': + newSize = startSize - 2; + break; + + case 'reset': + if (rs) {newSize = rs;} else {newSize = 16;} + break; + + default: + try{ + newSize = parseInt(ndeReadCookie("nde-textsize")); + } + catch(e){ + alert(e); + } + + if (!newSize || newSize == 'NaN') + { + newSize = startSize; + } + break; + + } + + if (newSize < 10) + { + newSize = 10; + } + + newSize += 'px'; + + document.getElementsByTagName('html')[0].style.fontSize = newSize; + document.getElementsByTagName('body')[0].style.fontSize = newSize; + + ndeCreateCookie("nde-textsize", newSize, 365); +} + +function ndeGetDocTextSize() +{ + if (!checkBrowser) + { + return 0; + } + + var size = 0; + var body = document.getElementsByTagName('body')[0]; + + if (body.style && body.style.fontSize) + { + size = body.style.fontSize; + } + else if (typeof(getComputedStyle) != 'undefined') + { + size = getComputedStyle(body,'').getPropertyValue('font-size'); + } + else if (body.currentStyle) + { + size = body.currentStyle.fontSize; + } + + //fix IE bug + if( isNaN(size)){ + if(size.substring(size.length-1)=="%"){ + return + } + + } + + return size; + +} + + + +function ndeCreateCookie(name,value,days) +{ + var cookie = name + "=" + value + ";"; + + if (days) + { + var date = new Date(); + date.setTime(date.getTime()+(days*24*60*60*1000)); + cookie += " expires=" + date.toGMTString() + ";"; + } + cookie += " path=/"; + + document.cookie = cookie; + +} + +function ndeReadCookie(name) +{ + var nameEQ = name + "="; + var ca = document.cookie.split(';'); + + + for(var i = 0; i < ca.length; i++) + { + var c = ca[i]; + while (c.charAt(0) == ' ') + { + c = c.substring(1, c.length); + } + + ctest = c.substring(0,name.length); + + if(ctest == name){ + return c.substring(nameEQ.length,c.length); + } + } + return null; +} diff --git a/doc/skin/getBlank.js b/doc/skin/getBlank.js new file mode 100644 index 0000000..d9978c0 --- /dev/null +++ b/doc/skin/getBlank.js @@ -0,0 +1,40 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +/** + * getBlank script - when included in a html file and called from a form text field, will set the value of this field to "" + * if the text value is still the standard value. + * getPrompt script - when included in a html file and called from a form text field, will set the value of this field to the prompt + * if the text value is empty. + * + * Typical usage: + * + * + */ + diff --git a/doc/skin/getMenu.js b/doc/skin/getMenu.js new file mode 100644 index 0000000..b17aad6 --- /dev/null +++ b/doc/skin/getMenu.js @@ -0,0 +1,45 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +/** + * This script, when included in a html file, can be used to make collapsible menus + * + * Typical usage: + * + */ + +if (document.getElementById){ + document.write('') +} + + +function SwitchMenu(obj, thePath) +{ +var open = 'url("'+thePath + 'images/chapter_open.gif")'; +var close = 'url("'+thePath + 'images/chapter.gif")'; + if(document.getElementById) { + var el = document.getElementById(obj); + var title = document.getElementById(obj+'Title'); + + if(el.style.display != "block"){ + title.style.backgroundImage = open; + el.style.display = "block"; + }else{ + title.style.backgroundImage = close; + el.style.display = "none"; + } + }// end - if(document.getElementById) +}//end - function SwitchMenu(obj) diff --git a/doc/skin/images/README.txt b/doc/skin/images/README.txt new file mode 100644 index 0000000..e0932f4 --- /dev/null +++ b/doc/skin/images/README.txt @@ -0,0 +1 @@ +The images in this directory are used if the current skin lacks them. diff --git a/doc/skin/images/add.jpg b/doc/skin/images/add.jpg new file mode 100644 index 0000000..06831ee Binary files /dev/null and b/doc/skin/images/add.jpg differ diff --git a/doc/skin/images/built-with-forrest-button.png b/doc/skin/images/built-with-forrest-button.png new file mode 100644 index 0000000..4a787ab Binary files /dev/null and b/doc/skin/images/built-with-forrest-button.png differ diff --git a/doc/skin/images/chapter.gif b/doc/skin/images/chapter.gif new file mode 100644 index 0000000..d3d8245 Binary files /dev/null and b/doc/skin/images/chapter.gif differ diff --git a/doc/skin/images/chapter_open.gif b/doc/skin/images/chapter_open.gif new file mode 100644 index 0000000..eecce18 Binary files /dev/null and b/doc/skin/images/chapter_open.gif differ diff --git a/doc/skin/images/current.gif b/doc/skin/images/current.gif new file mode 100644 index 0000000..fd82c08 Binary files /dev/null and b/doc/skin/images/current.gif differ diff --git a/doc/skin/images/error.png b/doc/skin/images/error.png new file mode 100644 index 0000000..b4fe06e Binary files /dev/null and b/doc/skin/images/error.png differ diff --git a/doc/skin/images/external-link.gif b/doc/skin/images/external-link.gif new file mode 100644 index 0000000..ff2f7b2 Binary files /dev/null and b/doc/skin/images/external-link.gif differ diff --git a/doc/skin/images/fix.jpg b/doc/skin/images/fix.jpg new file mode 100644 index 0000000..1d6820b Binary files /dev/null and b/doc/skin/images/fix.jpg differ diff --git a/doc/skin/images/forrest-credit-logo.png b/doc/skin/images/forrest-credit-logo.png new file mode 100644 index 0000000..8a63e42 Binary files /dev/null and b/doc/skin/images/forrest-credit-logo.png differ diff --git a/doc/skin/images/hack.jpg b/doc/skin/images/hack.jpg new file mode 100644 index 0000000..f38d50f Binary files /dev/null and b/doc/skin/images/hack.jpg differ diff --git a/doc/skin/images/header_white_line.gif b/doc/skin/images/header_white_line.gif new file mode 100644 index 0000000..369cae8 Binary files /dev/null and b/doc/skin/images/header_white_line.gif differ diff --git a/doc/skin/images/info.png b/doc/skin/images/info.png new file mode 100644 index 0000000..2e53447 Binary files /dev/null and b/doc/skin/images/info.png differ diff --git a/doc/skin/images/instruction_arrow.png b/doc/skin/images/instruction_arrow.png new file mode 100644 index 0000000..0fbc724 Binary files /dev/null and b/doc/skin/images/instruction_arrow.png differ diff --git a/doc/skin/images/label.gif b/doc/skin/images/label.gif new file mode 100644 index 0000000..c83a389 Binary files /dev/null and b/doc/skin/images/label.gif differ diff --git a/doc/skin/images/page.gif b/doc/skin/images/page.gif new file mode 100644 index 0000000..a144d32 Binary files /dev/null and b/doc/skin/images/page.gif differ diff --git a/doc/skin/images/pdfdoc.gif b/doc/skin/images/pdfdoc.gif new file mode 100644 index 0000000..ec13eb5 Binary files /dev/null and b/doc/skin/images/pdfdoc.gif differ diff --git a/doc/skin/images/poddoc.png b/doc/skin/images/poddoc.png new file mode 100644 index 0000000..a393df7 Binary files /dev/null and b/doc/skin/images/poddoc.png differ diff --git a/doc/skin/images/printer.gif b/doc/skin/images/printer.gif new file mode 100644 index 0000000..a8d0d41 Binary files /dev/null and b/doc/skin/images/printer.gif differ diff --git a/doc/skin/images/rc-b-l-15-1body-2menu-3menu.png b/doc/skin/images/rc-b-l-15-1body-2menu-3menu.png new file mode 100644 index 0000000..cdb460a Binary files /dev/null and b/doc/skin/images/rc-b-l-15-1body-2menu-3menu.png differ diff --git a/doc/skin/images/rc-b-r-15-1body-2menu-3menu.png b/doc/skin/images/rc-b-r-15-1body-2menu-3menu.png new file mode 100644 index 0000000..3eff254 Binary files /dev/null and b/doc/skin/images/rc-b-r-15-1body-2menu-3menu.png differ diff --git a/doc/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png b/doc/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png new file mode 100644 index 0000000..b175f27 Binary files /dev/null and b/doc/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png differ diff --git a/doc/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png b/doc/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png new file mode 100644 index 0000000..e9f4440 Binary files /dev/null and b/doc/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png differ diff --git a/doc/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png b/doc/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png new file mode 100644 index 0000000..f1e015b Binary files /dev/null and b/doc/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png differ diff --git a/doc/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png b/doc/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png new file mode 100644 index 0000000..e9f4440 Binary files /dev/null and b/doc/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png differ diff --git a/doc/skin/images/rc-t-r-15-1body-2menu-3menu.png b/doc/skin/images/rc-t-r-15-1body-2menu-3menu.png new file mode 100644 index 0000000..29388b5 Binary files /dev/null and b/doc/skin/images/rc-t-r-15-1body-2menu-3menu.png differ diff --git a/doc/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png b/doc/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png new file mode 100644 index 0000000..944ed73 Binary files /dev/null and b/doc/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png differ diff --git a/doc/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png b/doc/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png new file mode 100644 index 0000000..c4d4a8c Binary files /dev/null and b/doc/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png differ diff --git a/doc/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png b/doc/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png new file mode 100644 index 0000000..944ed73 Binary files /dev/null and b/doc/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png differ diff --git a/doc/skin/images/remove.jpg b/doc/skin/images/remove.jpg new file mode 100644 index 0000000..8c9b9ef Binary files /dev/null and b/doc/skin/images/remove.jpg differ diff --git a/doc/skin/images/rss.png b/doc/skin/images/rss.png new file mode 100644 index 0000000..f0796ac Binary files /dev/null and b/doc/skin/images/rss.png differ diff --git a/doc/skin/images/spacer.gif b/doc/skin/images/spacer.gif new file mode 100644 index 0000000..35d42e8 Binary files /dev/null and b/doc/skin/images/spacer.gif differ diff --git a/doc/skin/images/success.png b/doc/skin/images/success.png new file mode 100644 index 0000000..96fcfea Binary files /dev/null and b/doc/skin/images/success.png differ diff --git a/doc/skin/images/txtdoc.png b/doc/skin/images/txtdoc.png new file mode 100644 index 0000000..bf8b374 Binary files /dev/null and b/doc/skin/images/txtdoc.png differ diff --git a/doc/skin/images/update.jpg b/doc/skin/images/update.jpg new file mode 100644 index 0000000..beb9207 Binary files /dev/null and b/doc/skin/images/update.jpg differ diff --git a/doc/skin/images/valid-html401.png b/doc/skin/images/valid-html401.png new file mode 100644 index 0000000..3855210 Binary files /dev/null and b/doc/skin/images/valid-html401.png differ diff --git a/doc/skin/images/vcss.png b/doc/skin/images/vcss.png new file mode 100644 index 0000000..9b2f596 Binary files /dev/null and b/doc/skin/images/vcss.png differ diff --git a/doc/skin/images/warning.png b/doc/skin/images/warning.png new file mode 100644 index 0000000..b81b2ce Binary files /dev/null and b/doc/skin/images/warning.png differ diff --git a/doc/skin/images/xmldoc.gif b/doc/skin/images/xmldoc.gif new file mode 100644 index 0000000..c92d9b9 Binary files /dev/null and b/doc/skin/images/xmldoc.gif differ diff --git a/doc/skin/menu.js b/doc/skin/menu.js new file mode 100644 index 0000000..06ea471 --- /dev/null +++ b/doc/skin/menu.js @@ -0,0 +1,48 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +/** + * This script, when included in a html file, can be used to make collapsible menus + * + * Typical usage: + * + */ + +if (document.getElementById){ + document.write('') +} + +function SwitchMenu(obj) +{ + if(document.getElementById) { + var el = document.getElementById(obj); + var title = document.getElementById(obj+'Title'); + + if(obj.indexOf("_selected_")==0&&el.style.display == ""){ + el.style.display = "block"; + title.className = "pagegroupselected"; + } + + if(el.style.display != "block"){ + el.style.display = "block"; + title.className = "pagegroupopen"; + } + else{ + el.style.display = "none"; + title.className = "pagegroup"; + } + }// end - if(document.getElementById) +}//end - function SwitchMenu(obj) diff --git a/doc/skin/note.txt b/doc/skin/note.txt new file mode 100644 index 0000000..d34c8db --- /dev/null +++ b/doc/skin/note.txt @@ -0,0 +1,50 @@ +Notes for developer: + +--Legend------------------- +TODO -> blocker +DONE -> blocker +ToDo -> enhancement bug +done -> enhancement bug + +--Issues------------------- +- the corner images should be rendered through svg with the header color. +-> DONE +-> ToDo: get rid of the images and use only divs! + +- the menu points should be displayed "better". +-> DONE +-- Use the krysalis-site menu approach for the overall menu display. +-> DONE +-- Use the old lenya innermenu approch to further enhance the menu . +-> DONE + +- the content area needs some attention. +-> DONE +-- introduce the heading scheme from krysalis () +-> DONE +-> ToDo: make box with round corners +-> done: make underlined with variable border height +-> ToDo: make underline with bottom round corner +-- introduce the toc for each html-page +-> DONE +-- introduce the external-link-images. +-> DONE + +- the publish note should be where now only a border is. +Like
+-> DONE +, but make it configurable. +-> DONE +- footer needs some attention +-> DONE +-- the footer do not have the color profile! Enable it! +-> DONE +-- the footer should as well contain a feedback link. +See http://issues.apache.org/eyebrowse/ReadMsg?listName=forrest-user@xml.apache.org&msgNo=71 +-> DONE + +- introduce credits alternativ location +-> DONE + +- border for published / breadtrail / menu /tab divs +-> ToDo \ No newline at end of file diff --git a/doc/skin/print.css b/doc/skin/print.css new file mode 100644 index 0000000..aaa9931 --- /dev/null +++ b/doc/skin/print.css @@ -0,0 +1,54 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +body { + font-family: Georgia, Palatino, serif; + font-size: 12pt; + background: white; +} + +#tabs, +#menu, +#content .toc { + display: none; +} + +#content { + width: auto; + padding: 0; + float: none !important; + color: black; + background: inherit; +} + +a:link, a:visited { + color: #336699; + background: inherit; + text-decoration: underline; +} + +#top .logo { + padding: 0; + margin: 0 0 2em 0; +} + +#footer { + margin-top: 4em; +} + +acronym { + border: 0; +} \ No newline at end of file diff --git a/doc/skin/profile.css b/doc/skin/profile.css new file mode 100644 index 0000000..1df706f --- /dev/null +++ b/doc/skin/profile.css @@ -0,0 +1,161 @@ + + +/* ==================== aural ============================ */ + +@media aural { + h1, h2, h3, h4, h5, h6 { voice-family: paul, male; stress: 20; richness: 90 } + h1 { pitch: x-low; pitch-range: 90 } + h2 { pitch: x-low; pitch-range: 80 } + h3 { pitch: low; pitch-range: 70 } + h4 { pitch: medium; pitch-range: 60 } + h5 { pitch: medium; pitch-range: 50 } + h6 { pitch: medium; pitch-range: 40 } + li, dt, dd { pitch: medium; richness: 60 } + dt { stress: 80 } + pre, code, tt { pitch: medium; pitch-range: 0; stress: 0; richness: 80 } + em { pitch: medium; pitch-range: 60; stress: 60; richness: 50 } + strong { pitch: medium; pitch-range: 60; stress: 90; richness: 90 } + dfn { pitch: high; pitch-range: 60; stress: 60 } + s, strike { richness: 0 } + i { pitch: medium; pitch-range: 60; stress: 60; richness: 50 } + b { pitch: medium; pitch-range: 60; stress: 90; richness: 90 } + u { richness: 0 } + + :link { voice-family: harry, male } + :visited { voice-family: betty, female } + :active { voice-family: betty, female; pitch-range: 80; pitch: x-high } +} + +a.external { + padding: 0 20px 0px 0px; + display:inline; + background-repeat: no-repeat; + background-position: center right; + background-image: url(images/external-link.gif); +} + +#top { background-color: #FFFFFF;} + +#top .header .current { background-color: #4C6C8F;} +#top .header .current a:link { color: #ffffff; } +#top .header .current a:visited { color: #ffffff; } +#top .header .current a:hover { color: #ffffff; } + +#tabs li { background-color: #E5E4D9 ;} +#tabs li a:link { color: #000000; } +#tabs li a:visited { color: #000000; } +#tabs li a:hover { color: #000000; } + +#level2tabs a.selected { background-color: #4C6C8F ;} +#level2tabs a:link { color: #ffffff; } +#level2tabs a:visited { color: #ffffff; } +#level2tabs a:hover { color: #ffffff; } + +#level2tabs { background-color: #E5E4D9;} +#level2tabs a.unselected:link { color: #000000; } +#level2tabs a.unselected:visited { color: #000000; } +#level2tabs a.unselected:hover { color: #000000; } + +.heading { background-color: #E5E4D9;} + +.boxed { background-color: #E5E4D9;} +.underlined_5 {border-bottom: solid 5px #E5E4D9;} +.underlined_10 {border-bottom: solid 10px #E5E4D9;} +table caption { +background-color: #E5E4D9; +color: #000000; +} + +#feedback { +color: #FFFFFF; +background: #4C6C8F; +text-align: center; +} +#feedback #feedbackto { +color: #FFFFFF; +} + +#publishedStrip { +color: #FFFFFF; +background: #4C6C8F; +} + +#publishedStrip { +color: #000000; +background: #E5E4D9; +} + +#menu .menupagetitle { background-color: #CFDCED; + color: #000000;} + +#menu { border-color: #999999;} +#menu .menupagetitle { border-color: #999999;} +#menu .menupageitemgroup { border-color: #999999;} + +#menu { background-color: #4C6C8F;} +#menu { color: #ffffff;} +#menu a:link { color: #ffffff;} +#menu a:visited { color: #ffffff;} +#menu a:hover { +background-color: #4C6C8F; +color: #ffffff;} + +#menu h1 { +color: #000000; +background-color: #cfdced; +} + +#top .searchbox { +background-color: #E5E4D9 ; +color: #000000; +} + +#menu .menupageitemgroup { +background-color: #E5E4D9; +} +#menu .menupageitem { +color: #000000; +} +#menu .menupageitem a:link { color: #000000;} +#menu .menupageitem a:visited { color: #000000;} +#menu .menupageitem a:hover { +background-color: #E5E4D9; +color: #000000; +} + +body{ +background-color: #ffffff; +color: #000000; +} +a:link { color:#0000ff} +a:visited { color:#009999} +a:hover { color:#6587ff} + + +.ForrestTable { background-color: #ccc;} + +.ForrestTable td { background-color: #ffffff;} + +.highlight { background-color: #ffff00;} + +.fixme { border-color: #c60;} + +.note { border-color: #069;} + +.warning { border-color: #900;} + +.code { border-color: #a5b6c6;} + +#footer { background-color: #E5E4D9;} +/* extra-css */ + + p.quote { + margin-left: 2em; + padding: .5em; + background-color: #f0f0f0; + font-family: monospace; + } + + #footer a { color: #0F3660; } + #footer a:visited { color: #009999; } + \ No newline at end of file diff --git a/doc/skin/prototype.js b/doc/skin/prototype.js new file mode 100644 index 0000000..ed7d920 --- /dev/null +++ b/doc/skin/prototype.js @@ -0,0 +1,1257 @@ +/* Prototype JavaScript framework, version 1.4.0_pre4 + * (c) 2005 Sam Stephenson + * + * THIS FILE IS AUTOMATICALLY GENERATED. When sending patches, please diff + * against the source tree, available from the Prototype darcs repository. + * + * Prototype is freely distributable under the terms of an MIT-style license. + * + * For details, see the Prototype web site: http://prototype.conio.net/ + * +/*--------------------------------------------------------------------------*/ + +var Prototype = { + Version: '1.4.0_pre4', + + emptyFunction: function() {}, + K: function(x) {return x} +} + +var Class = { + create: function() { + return function() { + this.initialize.apply(this, arguments); + } + } +} + +var Abstract = new Object(); + +Object.extend = function(destination, source) { + for (property in source) { + destination[property] = source[property]; + } + return destination; +} + +Function.prototype.bind = function(object) { + var __method = this; + return function() { + return __method.apply(object, arguments); + } +} + +Function.prototype.bindAsEventListener = function(object) { + var __method = this; + return function(event) { + return __method.call(object, event || window.event); + } +} + +Number.prototype.toColorPart = function() { + var digits = this.toString(16); + if (this < 16) return '0' + digits; + return digits; +} + +var Try = { + these: function() { + var returnValue; + + for (var i = 0; i < arguments.length; i++) { + var lambda = arguments[i]; + try { + returnValue = lambda(); + break; + } catch (e) {} + } + + return returnValue; + } +} + +/*--------------------------------------------------------------------------*/ + +var PeriodicalExecuter = Class.create(); +PeriodicalExecuter.prototype = { + initialize: function(callback, frequency) { + this.callback = callback; + this.frequency = frequency; + this.currentlyExecuting = false; + + this.registerCallback(); + }, + + registerCallback: function() { + setInterval(this.onTimerEvent.bind(this), this.frequency * 1000); + }, + + onTimerEvent: function() { + if (!this.currentlyExecuting) { + try { + this.currentlyExecuting = true; + this.callback(); + } finally { + this.currentlyExecuting = false; + } + } + } +} + +/*--------------------------------------------------------------------------*/ + +function $() { + var elements = new Array(); + + for (var i = 0; i < arguments.length; i++) { + var element = arguments[i]; + if (typeof element == 'string') + element = document.getElementById(element); + + if (arguments.length == 1) + return element; + + elements.push(element); + } + + return elements; +} + +if (!Array.prototype.push) { + Array.prototype.push = function() { + var startLength = this.length; + for (var i = 0; i < arguments.length; i++) + this[startLength + i] = arguments[i]; + return this.length; + } +} + +if (!Function.prototype.apply) { + // Based on code from http://www.youngpup.net/ + Function.prototype.apply = function(object, parameters) { + var parameterStrings = new Array(); + if (!object) object = window; + if (!parameters) parameters = new Array(); + + for (var i = 0; i < parameters.length; i++) + parameterStrings[i] = 'parameters[' + i + ']'; + + object.__apply__ = this; + var result = eval('object.__apply__(' + + parameterStrings.join(', ') + ')'); + object.__apply__ = null; + + return result; + } +} + +Object.extend(String.prototype, { + stripTags: function() { + return this.replace(/<\/?[^>]+>/gi, ''); + }, + + escapeHTML: function() { + var div = document.createElement('div'); + var text = document.createTextNode(this); + div.appendChild(text); + return div.innerHTML; + }, + + unescapeHTML: function() { + var div = document.createElement('div'); + div.innerHTML = this.stripTags(); + return div.childNodes[0].nodeValue; + }, + + parseQuery: function() { + var str = this; + if (str.substring(0,1) == '?') { + str = this.substring(1); + } + var result = {}; + var pairs = str.split('&'); + for (var i = 0; i < pairs.length; i++) { + var pair = pairs[i].split('='); + result[pair[0]] = pair[1]; + } + return result; + } +}); + + +var _break = new Object(); +var _continue = new Object(); + +var Enumerable = { + each: function(iterator) { + var index = 0; + try { + this._each(function(value) { + try { + iterator(value, index++); + } catch (e) { + if (e != _continue) throw e; + } + }); + } catch (e) { + if (e != _break) throw e; + } + }, + + all: function(iterator) { + var result = true; + this.each(function(value, index) { + if (!(result &= (iterator || Prototype.K)(value, index))) + throw _break; + }); + return result; + }, + + any: function(iterator) { + var result = true; + this.each(function(value, index) { + if (result &= (iterator || Prototype.K)(value, index)) + throw _break; + }); + return result; + }, + + collect: function(iterator) { + var results = []; + this.each(function(value, index) { + results.push(iterator(value, index)); + }); + return results; + }, + + detect: function (iterator) { + var result; + this.each(function(value, index) { + if (iterator(value, index)) { + result = value; + throw _break; + } + }); + return result; + }, + + findAll: function(iterator) { + var results = []; + this.each(function(value, index) { + if (iterator(value, index)) + results.push(value); + }); + return results; + }, + + grep: function(pattern, iterator) { + var results = []; + this.each(function(value, index) { + var stringValue = value.toString(); + if (stringValue.match(pattern)) + results.push((iterator || Prototype.K)(value, index)); + }) + return results; + }, + + include: function(object) { + var found = false; + this.each(function(value) { + if (value == object) { + found = true; + throw _break; + } + }); + return found; + }, + + inject: function(memo, iterator) { + this.each(function(value, index) { + memo = iterator(memo, value, index); + }); + return memo; + }, + + invoke: function(method) { + var args = $A(arguments).slice(1); + return this.collect(function(value) { + return value[method].apply(value, args); + }); + }, + + max: function(iterator) { + var result; + this.each(function(value, index) { + value = (iterator || Prototype.K)(value, index); + if (value >= (result || value)) + result = value; + }); + return result; + }, + + min: function(iterator) { + var result; + this.each(function(value, index) { + value = (iterator || Prototype.K)(value, index); + if (value <= (result || value)) + result = value; + }); + return result; + }, + + partition: function(iterator) { + var trues = [], falses = []; + this.each(function(value, index) { + ((iterator || Prototype.K)(value, index) ? + trues : falses).push(value); + }); + return [trues, falses]; + }, + + pluck: function(property) { + var results = []; + this.each(function(value, index) { + results.push(value[property]); + }); + return results; + }, + + reject: function(iterator) { + var results = []; + this.each(function(value, index) { + if (!iterator(value, index)) + results.push(value); + }); + return results; + }, + + sortBy: function(iterator) { + return this.collect(function(value, index) { + return {value: value, criteria: iterator(value, index)}; + }).sort(function(left, right) { + var a = left.criteria, b = right.criteria; + return a < b ? -1 : a > b ? 1 : 0; + }).pluck('value'); + }, + + toArray: function() { + return this.collect(Prototype.K); + }, + + zip: function() { + var iterator = Prototype.K, args = $A(arguments); + if (typeof args.last() == 'function') + iterator = args.pop(); + + var collections = [this].concat(args).map($A); + return this.map(function(value, index) { + iterator(value = collections.pluck(index)); + return value; + }); + } +} + +Object.extend(Enumerable, { + map: Enumerable.collect, + find: Enumerable.detect, + select: Enumerable.findAll, + member: Enumerable.include, + entries: Enumerable.toArray +}); + +$A = Array.from = function(iterable) { + var results = []; + for (var i = 0; i < iterable.length; i++) + results.push(iterable[i]); + return results; +} + +Object.extend(Array.prototype, { + _each: function(iterator) { + for (var i = 0; i < this.length; i++) + iterator(this[i]); + }, + + first: function() { + return this[0]; + }, + + last: function() { + return this[this.length - 1]; + } +}); + +Object.extend(Array.prototype, Enumerable); + + +var Ajax = { + getTransport: function() { + return Try.these( + function() {return new ActiveXObject('Msxml2.XMLHTTP')}, + function() {return new ActiveXObject('Microsoft.XMLHTTP')}, + function() {return new XMLHttpRequest()} + ) || false; + } +} + +Ajax.Base = function() {}; +Ajax.Base.prototype = { + setOptions: function(options) { + this.options = { + method: 'post', + asynchronous: true, + parameters: '' + } + Object.extend(this.options, options || {}); + }, + + responseIsSuccess: function() { + return this.transport.status == undefined + || this.transport.status == 0 + || (this.transport.status >= 200 && this.transport.status < 300); + }, + + responseIsFailure: function() { + return !this.responseIsSuccess(); + } +} + +Ajax.Request = Class.create(); +Ajax.Request.Events = + ['Uninitialized', 'Loading', 'Loaded', 'Interactive', 'Complete']; + +Ajax.Request.prototype = Object.extend(new Ajax.Base(), { + initialize: function(url, options) { + this.transport = Ajax.getTransport(); + this.setOptions(options); + this.request(url); + }, + + request: function(url) { + var parameters = this.options.parameters || ''; + if (parameters.length > 0) parameters += '&_='; + + try { + if (this.options.method == 'get') + url += '?' + parameters; + + this.transport.open(this.options.method, url, + this.options.asynchronous); + + if (this.options.asynchronous) { + this.transport.onreadystatechange = this.onStateChange.bind(this); + setTimeout((function() {this.respondToReadyState(1)}).bind(this), 10); + } + + this.setRequestHeaders(); + + var body = this.options.postBody ? this.options.postBody : parameters; + this.transport.send(this.options.method == 'post' ? body : null); + + } catch (e) { + } + }, + + setRequestHeaders: function() { + var requestHeaders = + ['X-Requested-With', 'XMLHttpRequest', + 'X-Prototype-Version', Prototype.Version]; + + if (this.options.method == 'post') { + requestHeaders.push('Content-type', + 'application/x-www-form-urlencoded'); + + /* Force "Connection: close" for Mozilla browsers to work around + * a bug where XMLHttpReqeuest sends an incorrect Content-length + * header. See Mozilla Bugzilla #246651. + */ + if (this.transport.overrideMimeType) + requestHeaders.push('Connection', 'close'); + } + + if (this.options.requestHeaders) + requestHeaders.push.apply(requestHeaders, this.options.requestHeaders); + + for (var i = 0; i < requestHeaders.length; i += 2) + this.transport.setRequestHeader(requestHeaders[i], requestHeaders[i+1]); + }, + + onStateChange: function() { + var readyState = this.transport.readyState; + if (readyState != 1) + this.respondToReadyState(this.transport.readyState); + }, + + respondToReadyState: function(readyState) { + var event = Ajax.Request.Events[readyState]; + + if (event == 'Complete') + (this.options['on' + this.transport.status] + || this.options['on' + (this.responseIsSuccess() ? 'Success' : 'Failure')] + || Prototype.emptyFunction)(this.transport); + + (this.options['on' + event] || Prototype.emptyFunction)(this.transport); + + /* Avoid memory leak in MSIE: clean up the oncomplete event handler */ + if (event == 'Complete') + this.transport.onreadystatechange = Prototype.emptyFunction; + } +}); + +Ajax.Updater = Class.create(); +Ajax.Updater.ScriptFragment = '(?:)((\n|.)*?)(?:<\/script>)'; + +Object.extend(Object.extend(Ajax.Updater.prototype, Ajax.Request.prototype), { + initialize: function(container, url, options) { + this.containers = { + success: container.success ? $(container.success) : $(container), + failure: container.failure ? $(container.failure) : + (container.success ? null : $(container)) + } + + this.transport = Ajax.getTransport(); + this.setOptions(options); + + var onComplete = this.options.onComplete || Prototype.emptyFunction; + this.options.onComplete = (function() { + this.updateContent(); + onComplete(this.transport); + }).bind(this); + + this.request(url); + }, + + updateContent: function() { + var receiver = this.responseIsSuccess() ? + this.containers.success : this.containers.failure; + + var match = new RegExp(Ajax.Updater.ScriptFragment, 'img'); + var response = this.transport.responseText.replace(match, ''); + var scripts = this.transport.responseText.match(match); + + if (receiver) { + if (this.options.insertion) { + new this.options.insertion(receiver, response); + } else { + receiver.innerHTML = response; + } + } + + if (this.responseIsSuccess()) { + if (this.onComplete) + setTimeout((function() {this.onComplete( + this.transport)}).bind(this), 10); + } + + if (this.options.evalScripts && scripts) { + match = new RegExp(Ajax.Updater.ScriptFragment, 'im'); + setTimeout((function() { + for (var i = 0; i < scripts.length; i++) + eval(scripts[i].match(match)[1]); + }).bind(this), 10); + } + } +}); + +Ajax.PeriodicalUpdater = Class.create(); +Ajax.PeriodicalUpdater.prototype = Object.extend(new Ajax.Base(), { + initialize: function(container, url, options) { + this.setOptions(options); + this.onComplete = this.options.onComplete; + + this.frequency = (this.options.frequency || 2); + this.decay = 1; + + this.updater = {}; + this.container = container; + this.url = url; + + this.start(); + }, + + start: function() { + this.options.onComplete = this.updateComplete.bind(this); + this.onTimerEvent(); + }, + + stop: function() { + this.updater.onComplete = undefined; + clearTimeout(this.timer); + (this.onComplete || Ajax.emptyFunction).apply(this, arguments); + }, + + updateComplete: function(request) { + if (this.options.decay) { + this.decay = (request.responseText == this.lastText ? + this.decay * this.options.decay : 1); + + this.lastText = request.responseText; + } + this.timer = setTimeout(this.onTimerEvent.bind(this), + this.decay * this.frequency * 1000); + }, + + onTimerEvent: function() { + this.updater = new Ajax.Updater(this.container, this.url, this.options); + } +}); + +document.getElementsByClassName = function(className) { + var children = document.getElementsByTagName('*') || document.all; + var elements = new Array(); + + for (var i = 0; i < children.length; i++) { + var child = children[i]; + var classNames = child.className.split(' '); + for (var j = 0; j < classNames.length; j++) { + if (classNames[j] == className) { + elements.push(child); + break; + } + } + } + + return elements; +} + +/*--------------------------------------------------------------------------*/ + +if (!window.Element) { + var Element = new Object(); +} + +Object.extend(Element, { + toggle: function() { + for (var i = 0; i < arguments.length; i++) { + var element = $(arguments[i]); + element.style.display = + (element.style.display == 'none' ? '' : 'none'); + } + }, + + hide: function() { + for (var i = 0; i < arguments.length; i++) { + var element = $(arguments[i]); + element.style.display = 'none'; + } + }, + + show: function() { + for (var i = 0; i < arguments.length; i++) { + var element = $(arguments[i]); + element.style.display = ''; + } + }, + + remove: function(element) { + element = $(element); + element.parentNode.removeChild(element); + }, + + getHeight: function(element) { + element = $(element); + return element.offsetHeight; + }, + + hasClassName: function(element, className) { + element = $(element); + if (!element) + return; + var a = element.className.split(' '); + for (var i = 0; i < a.length; i++) { + if (a[i] == className) + return true; + } + return false; + }, + + addClassName: function(element, className) { + element = $(element); + Element.removeClassName(element, className); + element.className += ' ' + className; + }, + + removeClassName: function(element, className) { + element = $(element); + if (!element) + return; + var newClassName = ''; + var a = element.className.split(' '); + for (var i = 0; i < a.length; i++) { + if (a[i] != className) { + if (i > 0) + newClassName += ' '; + newClassName += a[i]; + } + } + element.className = newClassName; + }, + + // removes whitespace-only text node children + cleanWhitespace: function(element) { + var element = $(element); + for (var i = 0; i < element.childNodes.length; i++) { + var node = element.childNodes[i]; + if (node.nodeType == 3 && !/\S/.test(node.nodeValue)) + Element.remove(node); + } + } +}); + +var Toggle = new Object(); +Toggle.display = Element.toggle; + +/*--------------------------------------------------------------------------*/ + +Abstract.Insertion = function(adjacency) { + this.adjacency = adjacency; +} + +Abstract.Insertion.prototype = { + initialize: function(element, content) { + this.element = $(element); + this.content = content; + + if (this.adjacency && this.element.insertAdjacentHTML) { + this.element.insertAdjacentHTML(this.adjacency, this.content); + } else { + this.range = this.element.ownerDocument.createRange(); + if (this.initializeRange) this.initializeRange(); + this.fragment = this.range.createContextualFragment(this.content); + this.insertContent(); + } + } +} + +var Insertion = new Object(); + +Insertion.Before = Class.create(); +Insertion.Before.prototype = Object.extend(new Abstract.Insertion('beforeBegin'), { + initializeRange: function() { + this.range.setStartBefore(this.element); + }, + + insertContent: function() { + this.element.parentNode.insertBefore(this.fragment, this.element); + } +}); + +Insertion.Top = Class.create(); +Insertion.Top.prototype = Object.extend(new Abstract.Insertion('afterBegin'), { + initializeRange: function() { + this.range.selectNodeContents(this.element); + this.range.collapse(true); + }, + + insertContent: function() { + this.element.insertBefore(this.fragment, this.element.firstChild); + } +}); + +Insertion.Bottom = Class.create(); +Insertion.Bottom.prototype = Object.extend(new Abstract.Insertion('beforeEnd'), { + initializeRange: function() { + this.range.selectNodeContents(this.element); + this.range.collapse(this.element); + }, + + insertContent: function() { + this.element.appendChild(this.fragment); + } +}); + +Insertion.After = Class.create(); +Insertion.After.prototype = Object.extend(new Abstract.Insertion('afterEnd'), { + initializeRange: function() { + this.range.setStartAfter(this.element); + }, + + insertContent: function() { + this.element.parentNode.insertBefore(this.fragment, + this.element.nextSibling); + } +}); + +var Field = { + clear: function() { + for (var i = 0; i < arguments.length; i++) + $(arguments[i]).value = ''; + }, + + focus: function(element) { + $(element).focus(); + }, + + present: function() { + for (var i = 0; i < arguments.length; i++) + if ($(arguments[i]).value == '') return false; + return true; + }, + + select: function(element) { + $(element).select(); + }, + + activate: function(element) { + $(element).focus(); + $(element).select(); + } +} + +/*--------------------------------------------------------------------------*/ + +var Form = { + serialize: function(form) { + var elements = Form.getElements($(form)); + var queryComponents = new Array(); + + for (var i = 0; i < elements.length; i++) { + var queryComponent = Form.Element.serialize(elements[i]); + if (queryComponent) + queryComponents.push(queryComponent); + } + + return queryComponents.join('&'); + }, + + getElements: function(form) { + var form = $(form); + var elements = new Array(); + + for (tagName in Form.Element.Serializers) { + var tagElements = form.getElementsByTagName(tagName); + for (var j = 0; j < tagElements.length; j++) + elements.push(tagElements[j]); + } + return elements; + }, + + getInputs: function(form, typeName, name) { + var form = $(form); + var inputs = form.getElementsByTagName('input'); + + if (!typeName && !name) + return inputs; + + var matchingInputs = new Array(); + for (var i = 0; i < inputs.length; i++) { + var input = inputs[i]; + if ((typeName && input.type != typeName) || + (name && input.name != name)) + continue; + matchingInputs.push(input); + } + + return matchingInputs; + }, + + disable: function(form) { + var elements = Form.getElements(form); + for (var i = 0; i < elements.length; i++) { + var element = elements[i]; + element.blur(); + element.disabled = 'true'; + } + }, + + enable: function(form) { + var elements = Form.getElements(form); + for (var i = 0; i < elements.length; i++) { + var element = elements[i]; + element.disabled = ''; + } + }, + + focusFirstElement: function(form) { + var form = $(form); + var elements = Form.getElements(form); + for (var i = 0; i < elements.length; i++) { + var element = elements[i]; + if (element.type != 'hidden' && !element.disabled) { + Field.activate(element); + break; + } + } + }, + + reset: function(form) { + $(form).reset(); + } +} + +Form.Element = { + serialize: function(element) { + var element = $(element); + var method = element.tagName.toLowerCase(); + var parameter = Form.Element.Serializers[method](element); + + if (parameter) + return encodeURIComponent(parameter[0]) + '=' + + encodeURIComponent(parameter[1]); + }, + + getValue: function(element) { + var element = $(element); + var method = element.tagName.toLowerCase(); + var parameter = Form.Element.Serializers[method](element); + + if (parameter) + return parameter[1]; + } +} + +Form.Element.Serializers = { + input: function(element) { + switch (element.type.toLowerCase()) { + case 'submit': + case 'hidden': + case 'password': + case 'text': + return Form.Element.Serializers.textarea(element); + case 'checkbox': + case 'radio': + return Form.Element.Serializers.inputSelector(element); + } + return false; + }, + + inputSelector: function(element) { + if (element.checked) + return [element.name, element.value]; + }, + + textarea: function(element) { + return [element.name, element.value]; + }, + + select: function(element) { + var value = ''; + if (element.type == 'select-one') { + var index = element.selectedIndex; + if (index >= 0) + value = element.options[index].value || element.options[index].text; + } else { + value = new Array(); + for (var i = 0; i < element.length; i++) { + var opt = element.options[i]; + if (opt.selected) + value.push(opt.value || opt.text); + } + } + return [element.name, value]; + } +} + +/*--------------------------------------------------------------------------*/ + +var $F = Form.Element.getValue; + +/*--------------------------------------------------------------------------*/ + +Abstract.TimedObserver = function() {} +Abstract.TimedObserver.prototype = { + initialize: function(element, frequency, callback) { + this.frequency = frequency; + this.element = $(element); + this.callback = callback; + + this.lastValue = this.getValue(); + this.registerCallback(); + }, + + registerCallback: function() { + setInterval(this.onTimerEvent.bind(this), this.frequency * 1000); + }, + + onTimerEvent: function() { + var value = this.getValue(); + if (this.lastValue != value) { + this.callback(this.element, value); + this.lastValue = value; + } + } +} + +Form.Element.Observer = Class.create(); +Form.Element.Observer.prototype = Object.extend(new Abstract.TimedObserver(), { + getValue: function() { + return Form.Element.getValue(this.element); + } +}); + +Form.Observer = Class.create(); +Form.Observer.prototype = Object.extend(new Abstract.TimedObserver(), { + getValue: function() { + return Form.serialize(this.element); + } +}); + +/*--------------------------------------------------------------------------*/ + +Abstract.EventObserver = function() {} +Abstract.EventObserver.prototype = { + initialize: function(element, callback) { + this.element = $(element); + this.callback = callback; + + this.lastValue = this.getValue(); + if (this.element.tagName.toLowerCase() == 'form') + this.registerFormCallbacks(); + else + this.registerCallback(this.element); + }, + + onElementEvent: function() { + var value = this.getValue(); + if (this.lastValue != value) { + this.callback(this.element, value); + this.lastValue = value; + } + }, + + registerFormCallbacks: function() { + var elements = Form.getElements(this.element); + for (var i = 0; i < elements.length; i++) + this.registerCallback(elements[i]); + }, + + registerCallback: function(element) { + if (element.type) { + switch (element.type.toLowerCase()) { + case 'checkbox': + case 'radio': + element.target = this; + element.prev_onclick = element.onclick || Prototype.emptyFunction; + element.onclick = function() { + this.prev_onclick(); + this.target.onElementEvent(); + } + break; + case 'password': + case 'text': + case 'textarea': + case 'select-one': + case 'select-multiple': + element.target = this; + element.prev_onchange = element.onchange || Prototype.emptyFunction; + element.onchange = function() { + this.prev_onchange(); + this.target.onElementEvent(); + } + break; + } + } + } +} + +Form.Element.EventObserver = Class.create(); +Form.Element.EventObserver.prototype = Object.extend(new Abstract.EventObserver(), { + getValue: function() { + return Form.Element.getValue(this.element); + } +}); + +Form.EventObserver = Class.create(); +Form.EventObserver.prototype = Object.extend(new Abstract.EventObserver(), { + getValue: function() { + return Form.serialize(this.element); + } +}); + + +if (!window.Event) { + var Event = new Object(); +} + +Object.extend(Event, { + KEY_BACKSPACE: 8, + KEY_TAB: 9, + KEY_RETURN: 13, + KEY_ESC: 27, + KEY_LEFT: 37, + KEY_UP: 38, + KEY_RIGHT: 39, + KEY_DOWN: 40, + KEY_DELETE: 46, + + element: function(event) { + return event.target || event.srcElement; + }, + + isLeftClick: function(event) { + return (((event.which) && (event.which == 1)) || + ((event.button) && (event.button == 1))); + }, + + pointerX: function(event) { + return event.pageX || (event.clientX + + (document.documentElement.scrollLeft || document.body.scrollLeft)); + }, + + pointerY: function(event) { + return event.pageY || (event.clientY + + (document.documentElement.scrollTop || document.body.scrollTop)); + }, + + stop: function(event) { + if (event.preventDefault) { + event.preventDefault(); + event.stopPropagation(); + } else { + event.returnValue = false; + } + }, + + // find the first node with the given tagName, starting from the + // node the event was triggered on; traverses the DOM upwards + findElement: function(event, tagName) { + var element = Event.element(event); + while (element.parentNode && (!element.tagName || + (element.tagName.toUpperCase() != tagName.toUpperCase()))) + element = element.parentNode; + return element; + }, + + observers: false, + + _observeAndCache: function(element, name, observer, useCapture) { + if (!this.observers) this.observers = []; + if (element.addEventListener) { + this.observers.push([element, name, observer, useCapture]); + element.addEventListener(name, observer, useCapture); + } else if (element.attachEvent) { + this.observers.push([element, name, observer, useCapture]); + element.attachEvent('on' + name, observer); + } + }, + + unloadCache: function() { + if (!Event.observers) return; + for (var i = 0; i < Event.observers.length; i++) { + Event.stopObserving.apply(this, Event.observers[i]); + Event.observers[i][0] = null; + } + Event.observers = false; + }, + + observe: function(element, name, observer, useCapture) { + var element = $(element); + useCapture = useCapture || false; + + if (name == 'keypress' && + ((/Konqueror|Safari|KHTML/.test(navigator.userAgent)) + || element.attachEvent)) + name = 'keydown'; + + this._observeAndCache(element, name, observer, useCapture); + }, + + stopObserving: function(element, name, observer, useCapture) { + var element = $(element); + useCapture = useCapture || false; + + if (name == 'keypress' && + ((/Konqueror|Safari|KHTML/.test(navigator.userAgent)) + || element.detachEvent)) + name = 'keydown'; + + if (element.removeEventListener) { + element.removeEventListener(name, observer, useCapture); + } else if (element.detachEvent) { + element.detachEvent('on' + name, observer); + } + } +}); + +/* prevent memory leaks in IE */ +Event.observe(window, 'unload', Event.unloadCache, false); + +var Position = { + + // set to true if needed, warning: firefox performance problems + // NOT neeeded for page scrolling, only if draggable contained in + // scrollable elements + includeScrollOffsets: false, + + // must be called before calling withinIncludingScrolloffset, every time the + // page is scrolled + prepare: function() { + this.deltaX = window.pageXOffset + || document.documentElement.scrollLeft + || document.body.scrollLeft + || 0; + this.deltaY = window.pageYOffset + || document.documentElement.scrollTop + || document.body.scrollTop + || 0; + }, + + realOffset: function(element) { + var valueT = 0, valueL = 0; + do { + valueT += element.scrollTop || 0; + valueL += element.scrollLeft || 0; + element = element.parentNode; + } while (element); + return [valueL, valueT]; + }, + + cumulativeOffset: function(element) { + var valueT = 0, valueL = 0; + do { + valueT += element.offsetTop || 0; + valueL += element.offsetLeft || 0; + element = element.offsetParent; + } while (element); + return [valueL, valueT]; + }, + + // caches x/y coordinate pair to use with overlap + within: function(element, x, y) { + if (this.includeScrollOffsets) + return this.withinIncludingScrolloffsets(element, x, y); + this.xcomp = x; + this.ycomp = y; + this.offset = this.cumulativeOffset(element); + + return (y >= this.offset[1] && + y < this.offset[1] + element.offsetHeight && + x >= this.offset[0] && + x < this.offset[0] + element.offsetWidth); + }, + + withinIncludingScrolloffsets: function(element, x, y) { + var offsetcache = this.realOffset(element); + + this.xcomp = x + offsetcache[0] - this.deltaX; + this.ycomp = y + offsetcache[1] - this.deltaY; + this.offset = this.cumulativeOffset(element); + + return (this.ycomp >= this.offset[1] && + this.ycomp < this.offset[1] + element.offsetHeight && + this.xcomp >= this.offset[0] && + this.xcomp < this.offset[0] + element.offsetWidth); + }, + + // within must be called directly before + overlap: function(mode, element) { + if (!mode) return 0; + if (mode == 'vertical') + return ((this.offset[1] + element.offsetHeight) - this.ycomp) / + element.offsetHeight; + if (mode == 'horizontal') + return ((this.offset[0] + element.offsetWidth) - this.xcomp) / + element.offsetWidth; + }, + + clone: function(source, target) { + source = $(source); + target = $(target); + target.style.position = 'absolute'; + var offsets = this.cumulativeOffset(source); + target.style.top = offsets[1] + 'px'; + target.style.left = offsets[0] + 'px'; + target.style.width = source.offsetWidth + 'px'; + target.style.height = source.offsetHeight + 'px'; + } +} diff --git a/doc/skin/screen.css b/doc/skin/screen.css new file mode 100644 index 0000000..c6084f8 --- /dev/null +++ b/doc/skin/screen.css @@ -0,0 +1,587 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +body { margin: 0px 0px 0px 0px; font-family: Verdana, Helvetica, sans-serif; } + +h1 { font-size : 160%; margin: 0px 0px 0px 0px; padding: 0px; } +h2 { font-size : 140%; margin: 1em 0px 0.8em 0px; padding: 0px; font-weight : bold;} +h3 { font-size : 130%; margin: 0.8em 0px 0px 0px; padding: 0px; font-weight : bold; } +.h3 { margin: 22px 0px 3px 0px; } +h4 { font-size : 120%; margin: 0.7em 0px 0px 0px; padding: 0px; font-weight : normal; text-align: left; } +.h4 { margin: 18px 0px 0px 0px; } +h4.faq { font-size : 120%; margin: 18px 0px 0px 0px; padding: 0px; font-weight : bold; text-align: left; } +h5 { font-size : 100%; margin: 14px 0px 0px 0px; padding: 0px; font-weight : normal; text-align: left; } + +/** +* table +*/ +table .title { background-color: #000000; } +.ForrestTable { + color: #ffffff; + background-color: #7099C5; + width: 100%; + font-size : 100%; + empty-cells: show; +} +table caption { + padding-left: 5px; + color: white; + text-align: left; + font-weight: bold; + background-color: #000000; +} +.ForrestTable td { + color: black; + background-color: #f0f0ff; +} +.ForrestTable th { text-align: center; } +/** + * Page Header + */ + +#top { + position: relative; + float: left; + width: 100%; + background: #294563; /* if you want a background in the header, put it here */ +} + +#top .breadtrail { + background: #CFDCED; + color: black; + border-bottom: solid 1px white; + padding: 3px 10px; + font-size: 75%; +} +#top .breadtrail a { color: black; } + +#top .header { + float: left; + width: 100%; + background: url("images/header_white_line.gif") repeat-x bottom; +} + +#top .grouplogo { + padding: 7px 0 10px 10px; + float: left; + text-align: left; +} +#top .projectlogo { + padding: 7px 0 10px 10px; + float: left; + width: 33%; + text-align: right; +} +#top .projectlogoA1 { + padding: 7px 0 10px 10px; + float: right; +} +html>body #top .searchbox { + bottom: 0px; +} +#top .searchbox { + position: absolute; + right: 10px; + height: 42px; + font-size: 70%; + white-space: nowrap; + text-align: right; + color: white; + background-color: #000000; + z-index:0; + background-image: url(images/rc-t-l-5-1header-2searchbox-3searchbox.png); + background-repeat: no-repeat; + background-position: top left; + bottom: -1px; /* compensate for IE rendering issue */ +} + +#top .searchbox form { + padding: 5px 10px; + margin: 0; +} +#top .searchbox p { + padding: 0 0 2px 0; + margin: 0; +} +#top .searchbox input { + font-size: 100%; +} + +#tabs { + clear: both; + padding-left: 10px; + margin: 0; + list-style: none; +} +/* background: #CFDCED url("images/tab-right.gif") no-repeat right top;*/ +#tabs li { + float: left; + background-image: url(images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png); + background-repeat: no-repeat; + background-position: top right; + background-color: #000000; + margin: 0 3px 0 0; + padding: 0; +} + +/*background: url("images/tab-left.gif") no-repeat left top;*/ +#tabs li a { + float: left; + display: block; + font-family: verdana, arial, sans-serif; + text-decoration: none; + color: black; + white-space: nowrap; + background-image: url(images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png); + background-repeat: no-repeat; + background-position: top left; + padding: 5px 15px 4px; + width: .1em; /* IE/Win fix */ +} + +#tabs li a:hover { + + cursor: pointer; + text-decoration:underline; +} + +#tabs > li a { width: auto; } /* Rest of IE/Win fix */ + +/* Commented Backslash Hack hides rule from IE5-Mac \*/ +#tabs a { float: none; } +/* End IE5-Mac hack */ + +#top .header .current { + background-color: #4C6C8F; + background-image: url(images/rc-t-r-5-1header-2tab-selected-3tab-selected.png); + background-repeat: no-repeat; + background-position: top right; +} +#top .header .current a { + font-weight: bold; + padding-bottom: 5px; + color: white; + background-image: url(images/rc-t-l-5-1header-2tab-selected-3tab-selected.png); + background-repeat: no-repeat; + background-position: top left; +} +#publishedStrip { + padding-right: 10px; + padding-left: 20px; + padding-top: 3px; + padding-bottom:3px; + color: #ffffff; + font-size : 60%; + font-weight: bold; + background-color: #4C6C8F; + text-align:right; +} + +#level2tabs { +margin: 0; +float:left; +position:relative; + +} + + + +#level2tabs a:hover { + + cursor: pointer; + text-decoration:underline; + +} + +#level2tabs a{ + + cursor: pointer; + text-decoration:none; + background-image: url('images/chapter.gif'); + background-repeat: no-repeat; + background-position: center left; + padding-left: 6px; + margin-left: 6px; +} + +/* +* border-top: solid #4C6C8F 15px; +*/ +#main { + position: relative; + background: white; + clear:both; +} +#main .breadtrail { + clear:both; + position: relative; + background: #CFDCED; + color: black; + border-bottom: solid 1px black; + border-top: solid 1px black; + padding: 0px 180px; + font-size: 75%; + z-index:10; +} +/** +* Round corner +*/ +#roundtop { + background-image: url(images/rc-t-r-15-1body-2menu-3menu.png); + background-repeat: no-repeat; + background-position: top right; +} + +#roundbottom { + background-image: url(images/rc-b-r-15-1body-2menu-3menu.png); + background-repeat: no-repeat; + background-position: top right; +} + +img.corner { + width: 15px; + height: 15px; + border: none; + display: block !important; +} + +.roundtopsmall { + background-image: url(images/rc-t-r-5-1header-2searchbox-3searchbox.png); + background-repeat: no-repeat; + background-position: top right; +} + +#roundbottomsmall { + background-image: url(images/rc-b-r-5-1header-2tab-selected-3tab-selected.png); + background-repeat: no-repeat; + background-position: top right; +} + +img.cornersmall { + width: 5px; + height: 5px; + border: none; + display: block !important; +} +/** + * Side menu + */ +#menu a { font-weight: normal; text-decoration: none;} +#menu a:visited { font-weight: normal; } +#menu a:active { font-weight: normal; } +#menu a:hover { font-weight: normal; text-decoration:underline;} + +#menuarea { width:10em;} +#menu { + position: relative; + float: left; + width: 160px; + padding-top: 0px; + top:-18px; + left:10px; + z-index: 20; + background-color: #f90; + font-size : 70%; + +} + +.menutitle { + cursor:pointer; + padding: 3px 12px; + margin-left: 10px; + background-image: url('images/chapter.gif'); + background-repeat: no-repeat; + background-position: center left; + font-weight : bold; + + +} + +.menutitle:hover{text-decoration:underline;cursor: pointer;} + +#menu .menuitemgroup { + margin: 0px 0px 6px 8px; + padding: 0px; + font-weight : bold; } + +#menu .selectedmenuitemgroup{ + margin: 0px 0px 0px 8px; + padding: 0px; + font-weight : normal; + + } + +#menu .menuitem { + padding: 2px 0px 1px 13px; + background-image: url('images/page.gif'); + background-repeat: no-repeat; + background-position: center left; + font-weight : normal; + margin-left: 10px; +} + +#menu .menupage { + margin: 2px 0px 1px 10px; + padding: 0px 3px 0px 12px; + background-image: url('images/page.gif'); + background-repeat: no-repeat; + background-position: center left; + font-style : normal; +} +#menu .menupagetitle { + padding: 0px 0px 0px 1px; + font-style : normal; + border-style: solid; + border-width: 1px; + margin-right: 10px; + +} +#menu .menupageitemgroup { + padding: 3px 0px 4px 6px; + font-style : normal; + border-bottom: 1px solid ; + border-left: 1px solid ; + border-right: 1px solid ; + margin-right: 10px; +} +#menu .menupageitem { + font-style : normal; + font-weight : normal; + border-width: 0px; + font-size : 90%; +} +#menu #credit { + text-align: center; +} +#menu #credit2 { + text-align: center; + padding: 3px 3px 3px 3px; + background-color: #ffffff; +} +#menu .searchbox { + text-align: center; +} +#menu .searchbox form { + padding: 3px 3px; + margin: 0; +} +#menu .searchbox input { + font-size: 100%; +} + +#content { + padding: 20px 20px 20px 180px; + margin: 0; + font : small Verdana, Helvetica, sans-serif; + font-size : 80%; +} + +#content ul { + margin: 0; + padding: 0 25px; +} +#content li { + padding: 0 5px; +} +#feedback { + color: black; + background: #CFDCED; + text-align:center; + margin-top: 5px; +} +#feedback #feedbackto { + font-size: 90%; + color: black; +} +#footer { + clear: both; + position: relative; /* IE bugfix (http://www.dracos.co.uk/web/css/ie6floatbug/) */ + width: 100%; + background: #CFDCED; + border-top: solid 1px #4C6C8F; + color: black; +} +#footer .copyright { + position: relative; /* IE bugfix cont'd */ + padding: 5px; + margin: 0; + width: 45%; +} +#footer .lastmodified { + position: relative; /* IE bugfix cont'd */ + float: right; + width: 45%; + padding: 5px; + margin: 0; + text-align: right; +} +#footer a { color: white; } + +#footer #logos { + text-align: left; +} + + +/** + * Misc Styles + */ + +acronym { cursor: help; } +.boxed { background-color: #a5b6c6;} +.underlined_5 {border-bottom: solid 5px #4C6C8F;} +.underlined_10 {border-bottom: solid 10px #4C6C8F;} +/* ==================== snail trail ============================ */ + +.trail { + position: relative; /* IE bugfix cont'd */ + font-size: 70%; + text-align: right; + float: right; + margin: -10px 5px 0px 5px; + padding: 0; +} + +#motd-area { + position: relative; /* IE bugfix cont'd */ + float: right; + width: 35%; + background-color: #f0f0ff; + border-top: solid 1px #4C6C8F; + border-bottom: solid 1px #4C6C8F; + margin-bottom: 15px; + margin-left: 15px; + margin-right: 10%; + padding-bottom: 5px; + padding-top: 5px; +} + +#minitoc-area { + border-top: solid 1px #4C6C8F; + border-bottom: solid 1px #4C6C8F; + margin: 15px 10% 5px 15px; + /* margin-bottom: 15px; + margin-left: 15px; + margin-right: 10%;*/ + padding-bottom: 7px; + padding-top: 5px; +} +.minitoc { + list-style-image: url('images/current.gif'); + font-weight: normal; +} + +li p { + margin: 0; + padding: 0; +} + +.pdflink { + position: relative; /* IE bugfix cont'd */ + float: right; + margin: 0px 5px; + padding: 0; +} +.pdflink br { + margin-top: -10px; + padding-left: 1px; +} +.pdflink a { + display: block; + font-size: 70%; + text-align: center; + margin: 0; + padding: 0; +} + +.pdflink img { + display: block; + height: 16px; + width: 16px; +} +.xmllink { + position: relative; /* IE bugfix cont'd */ + float: right; + margin: 0px 5px; + padding: 0; +} +.xmllink br { + margin-top: -10px; + padding-left: 1px; +} +.xmllink a { + display: block; + font-size: 70%; + text-align: center; + margin: 0; + padding: 0; +} + +.xmllink img { + display: block; + height: 16px; + width: 16px; +} +.podlink { + position: relative; /* IE bugfix cont'd */ + float: right; + margin: 0px 5px; + padding: 0; +} +.podlink br { + margin-top: -10px; + padding-left: 1px; +} +.podlink a { + display: block; + font-size: 70%; + text-align: center; + margin: 0; + padding: 0; +} + +.podlink img { + display: block; + height: 16px; + width: 16px; +} + +.printlink { + position: relative; /* IE bugfix cont'd */ + float: right; +} +.printlink br { + margin-top: -10px; + padding-left: 1px; +} +.printlink a { + display: block; + font-size: 70%; + text-align: center; + margin: 0; + padding: 0; +} +.printlink img { + display: block; + height: 16px; + width: 16px; +} + +p.instruction { + display: list-item; + list-style-image: url('../images/instruction_arrow.png'); + list-style-position: outside; + margin-left: 2em; +} \ No newline at end of file diff --git a/extensions.xml b/extensions.xml new file mode 100644 index 0000000..3388a6d --- /dev/null +++ b/extensions.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/java/org/apache/pylucene/analysis/PythonAnalyzer.java b/java/org/apache/pylucene/analysis/PythonAnalyzer.java new file mode 100644 index 0000000..30cac64 --- /dev/null +++ b/java/org/apache/pylucene/analysis/PythonAnalyzer.java @@ -0,0 +1,47 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.analysis; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import java.io.Reader; + +public class PythonAnalyzer extends Analyzer { + + private long pythonObject; + + public PythonAnalyzer() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native TokenStream tokenStream(String fieldName, Reader reader); +} diff --git a/java/org/apache/pylucene/analysis/PythonCharTokenizer.java b/java/org/apache/pylucene/analysis/PythonCharTokenizer.java new file mode 100644 index 0000000..397332e --- /dev/null +++ b/java/org/apache/pylucene/analysis/PythonCharTokenizer.java @@ -0,0 +1,50 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.analysis; + +import org.apache.lucene.analysis.CharTokenizer; +import java.io.IOException; +import java.io.Reader; + + +public class PythonCharTokenizer extends CharTokenizer { + + private long pythonObject; + + public PythonCharTokenizer(Reader reader) + { + super(reader); + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native boolean isTokenChar(char c); + public native char normalize(char c); +} diff --git a/java/org/apache/pylucene/analysis/PythonTokenFilter.java b/java/org/apache/pylucene/analysis/PythonTokenFilter.java new file mode 100644 index 0000000..64137c6 --- /dev/null +++ b/java/org/apache/pylucene/analysis/PythonTokenFilter.java @@ -0,0 +1,51 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.analysis; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Token; +import java.io.IOException; + + +public class PythonTokenFilter extends TokenFilter { + + private long pythonObject; + + public PythonTokenFilter(TokenStream tokenStream) + { + super(tokenStream); + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native boolean incrementToken() + throws IOException; +} diff --git a/java/org/apache/pylucene/analysis/PythonTokenStream.java b/java/org/apache/pylucene/analysis/PythonTokenStream.java new file mode 100644 index 0000000..7effc4d --- /dev/null +++ b/java/org/apache/pylucene/analysis/PythonTokenStream.java @@ -0,0 +1,56 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Token; +import java.io.IOException; + + +public class PythonTokenStream extends TokenStream { + + private long pythonObject; + + public PythonTokenStream() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + + public native boolean incrementToken() + throws IOException; + public native void end() + throws IOException; + public native void reset() + throws IOException; + public native void close() + throws IOException; +} diff --git a/java/org/apache/pylucene/analysis/PythonTokenizer.java b/java/org/apache/pylucene/analysis/PythonTokenizer.java new file mode 100644 index 0000000..1849636 --- /dev/null +++ b/java/org/apache/pylucene/analysis/PythonTokenizer.java @@ -0,0 +1,52 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.analysis; + +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.Token; +import java.io.IOException; +import java.io.Reader; + + +public class PythonTokenizer extends Tokenizer { + + private long pythonObject; + + public PythonTokenizer(Reader reader) + { + super(reader); + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + + public native boolean incrementToken() + throws IOException; +} diff --git a/java/org/apache/pylucene/queryParser/PythonMultiFieldQueryParser.java b/java/org/apache/pylucene/queryParser/PythonMultiFieldQueryParser.java new file mode 100644 index 0000000..70e2233 --- /dev/null +++ b/java/org/apache/pylucene/queryParser/PythonMultiFieldQueryParser.java @@ -0,0 +1,91 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.queryParser; + +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.Query; +import org.apache.lucene.queryParser.MultiFieldQueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.util.Version; + + +public class PythonMultiFieldQueryParser extends MultiFieldQueryParser { + + private long pythonObject; + + public PythonMultiFieldQueryParser(Version version, String[] fields, + Analyzer analyzer) + { + super(version, fields, analyzer); + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native Query getBooleanQuery(List clauses, boolean disableCoord); + + public native Query getFuzzyQuery(String field, String termText, + float minSimilarity); + public native Query getPrefixQuery(String field, String termText); + public native Query getRangeQuery(String field, + String part1, String part2, + boolean inclusive); + public native Query getWildcardQuery(String field, String termText); + + public native Query getFieldQuery_quoted(String field, String queryText, + boolean quoted); + public native Query getFieldQuery_slop(String field, String queryText, + int slop); + + public Query getFieldQuery_quoted_super(String field, String queryText, + boolean quoted) + throws ParseException + { + return super.getFieldQuery(field, queryText, quoted); + } + + public Query getFieldQuery_slop_super(String field, String queryText, + int slop) + throws ParseException + { + return super.getFieldQuery(field, queryText, slop); + } + + public Query getFieldQuery(String field, String queryText, boolean quoted) + { + return getFieldQuery_quoted(field, queryText, quoted); + } + + public Query getFieldQuery(String field, String queryText, int slop) + { + return getFieldQuery_slop(field, queryText, slop); + } +} diff --git a/java/org/apache/pylucene/queryParser/PythonQueryParser.java b/java/org/apache/pylucene/queryParser/PythonQueryParser.java new file mode 100644 index 0000000..739cd6a --- /dev/null +++ b/java/org/apache/pylucene/queryParser/PythonQueryParser.java @@ -0,0 +1,95 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.queryParser; + +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.Query; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.CharStream; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.util.Version; + + +public class PythonQueryParser extends QueryParser { + + private long pythonObject; + + public PythonQueryParser(Version version, String field, Analyzer analyzer) + { + super(version, field, analyzer); + } + + public PythonQueryParser(CharStream stream) + { + super(stream); + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native Query getBooleanQuery(List clauses, boolean disableCoord); + public native Query getFuzzyQuery(String field, String termText, + float minSimilarity); + public native Query getPrefixQuery(String field, String termText); + public native Query getRangeQuery(String field, + String part1, String part2, + boolean inclusive); + public native Query getWildcardQuery(String field, String termText); + + public native Query getFieldQuery_quoted(String field, String queryText, + boolean quoted); + public native Query getFieldQuery_slop(String field, String queryText, + int slop); + + public Query getFieldQuery_quoted_super(String field, String queryText, + boolean quoted) + throws ParseException + { + return super.getFieldQuery(field, queryText, quoted); + } + + public Query getFieldQuery_slop_super(String field, String queryText, + int slop) + throws ParseException + { + return super.getFieldQuery(field, queryText, slop); + } + + public Query getFieldQuery(String field, String queryText, boolean quoted) + { + return getFieldQuery_quoted(field, queryText, quoted); + } + + public Query getFieldQuery(String field, String queryText, int slop) + { + return getFieldQuery_slop(field, queryText, slop); + } +} diff --git a/java/org/apache/pylucene/search/PythonByteParser.java b/java/org/apache/pylucene/search/PythonByteParser.java new file mode 100644 index 0000000..c87d74a --- /dev/null +++ b/java/org/apache/pylucene/search/PythonByteParser.java @@ -0,0 +1,49 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import org.apache.lucene.search.FieldCache; + +/** + * @author Andi Vajda + */ + +public class PythonByteParser implements FieldCache.ByteParser { + + private long pythonObject; + + public PythonByteParser() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native byte parseByte(String ref); +} diff --git a/java/org/apache/pylucene/search/PythonCollector.java b/java/org/apache/pylucene/search/PythonCollector.java new file mode 100644 index 0000000..4900d30 --- /dev/null +++ b/java/org/apache/pylucene/search/PythonCollector.java @@ -0,0 +1,68 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import java.io.IOException; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.index.IndexReader; + + +public class PythonCollector extends Collector { + + private long pythonObject; + + public PythonCollector() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + protected Scorer scorer; + + public void setScorer(Scorer scorer) + throws IOException + { + this.scorer = scorer; + } + + public void collect(int doc) + throws IOException + { + collect(doc, scorer.score()); + } + + public native void pythonDecRef(); + public native void collect(int doc, float score) + throws IOException; + public native void setNextReader(IndexReader reader, int docBase) + throws IOException; + public native boolean acceptsDocsOutOfOrder(); +} diff --git a/java/org/apache/pylucene/search/PythonDoubleParser.java b/java/org/apache/pylucene/search/PythonDoubleParser.java new file mode 100644 index 0000000..461b79b --- /dev/null +++ b/java/org/apache/pylucene/search/PythonDoubleParser.java @@ -0,0 +1,49 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import org.apache.lucene.search.FieldCache; + +/** + * @author Andi Vajda + */ + +public class PythonDoubleParser implements FieldCache.DoubleParser { + + private long pythonObject; + + public PythonDoubleParser() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native double parseDouble(String ref); +} diff --git a/java/org/apache/pylucene/search/PythonFieldComparator.java b/java/org/apache/pylucene/search/PythonFieldComparator.java new file mode 100644 index 0000000..7c116d8 --- /dev/null +++ b/java/org/apache/pylucene/search/PythonFieldComparator.java @@ -0,0 +1,60 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import java.io.IOException; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.index.IndexReader; + +/** + * @author Andi Vajda + */ + +public class PythonFieldComparator extends FieldComparator { + + private long pythonObject; + + public PythonFieldComparator() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + + public native int compare(int slot1, int slot2); + public native void setBottom(final int slot); + public native int compareBottom(int doc) + throws IOException; + public native void copy(int slot, int doc) + throws IOException; + public native void setNextReader(IndexReader reader, int docBase) + throws IOException; + public native T value(int slot); +} diff --git a/java/org/apache/pylucene/search/PythonFieldComparatorSource.java b/java/org/apache/pylucene/search/PythonFieldComparatorSource.java new file mode 100644 index 0000000..d7b3506 --- /dev/null +++ b/java/org/apache/pylucene/search/PythonFieldComparatorSource.java @@ -0,0 +1,55 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import java.io.IOException; +import org.apache.lucene.search.FieldComparatorSource; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.index.IndexReader; + +/** + * @author Andi Vajda + */ + +public class PythonFieldComparatorSource extends FieldComparatorSource { + + private long pythonObject; + + public PythonFieldComparatorSource() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + + public native FieldComparator newComparator(String fieldname, int numHits, + int sortPos, boolean reversed) + throws IOException; +} diff --git a/java/org/apache/pylucene/search/PythonFilter.java b/java/org/apache/pylucene/search/PythonFilter.java new file mode 100644 index 0000000..3d12c28 --- /dev/null +++ b/java/org/apache/pylucene/search/PythonFilter.java @@ -0,0 +1,51 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import java.io.IOException; + +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.index.IndexReader; + + +public class PythonFilter extends Filter { + + private long pythonObject; + + public PythonFilter() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native DocIdSet getDocIdSet(IndexReader reader) + throws IOException; +} diff --git a/java/org/apache/pylucene/search/PythonFloatParser.java b/java/org/apache/pylucene/search/PythonFloatParser.java new file mode 100644 index 0000000..5f66a2b --- /dev/null +++ b/java/org/apache/pylucene/search/PythonFloatParser.java @@ -0,0 +1,49 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import org.apache.lucene.search.FieldCache; + +/** + * @author Andi Vajda + */ + +public class PythonFloatParser implements FieldCache.FloatParser { + + private long pythonObject; + + public PythonFloatParser() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native float parseFloat(String ref); +} diff --git a/java/org/apache/pylucene/search/PythonIntParser.java b/java/org/apache/pylucene/search/PythonIntParser.java new file mode 100644 index 0000000..6457e5a --- /dev/null +++ b/java/org/apache/pylucene/search/PythonIntParser.java @@ -0,0 +1,49 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import org.apache.lucene.search.FieldCache; + +/** + * @author Andi Vajda + */ + +public class PythonIntParser implements FieldCache.IntParser { + + private long pythonObject; + + public PythonIntParser() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native int parseInt(String ref); +} diff --git a/java/org/apache/pylucene/search/PythonLongParser.java b/java/org/apache/pylucene/search/PythonLongParser.java new file mode 100644 index 0000000..d9b8296 --- /dev/null +++ b/java/org/apache/pylucene/search/PythonLongParser.java @@ -0,0 +1,49 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import org.apache.lucene.search.FieldCache; + +/** + * @author Andi Vajda + */ + +public class PythonLongParser implements FieldCache.LongParser { + + private long pythonObject; + + public PythonLongParser() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native long parseLong(String ref); +} diff --git a/java/org/apache/pylucene/search/PythonShortParser.java b/java/org/apache/pylucene/search/PythonShortParser.java new file mode 100644 index 0000000..f599c85 --- /dev/null +++ b/java/org/apache/pylucene/search/PythonShortParser.java @@ -0,0 +1,49 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import org.apache.lucene.search.FieldCache; + +/** + * @author Andi Vajda + */ + +public class PythonShortParser implements FieldCache.ShortParser { + + private long pythonObject; + + public PythonShortParser() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native short parseShort(String ref); +} diff --git a/java/org/apache/pylucene/search/PythonSimilarity.java b/java/org/apache/pylucene/search/PythonSimilarity.java new file mode 100644 index 0000000..e799b88 --- /dev/null +++ b/java/org/apache/pylucene/search/PythonSimilarity.java @@ -0,0 +1,79 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search; + +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.Explanation.IDFExplanation; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.Term; +import java.util.Collection; + + +public class PythonSimilarity extends Similarity { + + private long pythonObject; + + public PythonSimilarity() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public IDFExplanation idfExplain(final Collection terms, + final Searcher searcher) + { + return new IDFExplanation() { + public float getIdf() + { + return idfTerms(terms, searcher); + } + + public String explain() + { + return null; + } + }; + } + + public native void pythonDecRef(); + + public native float idfTerms(Collection terms, Searcher searcher); + + public native float coord(int overlap, int maxOverlap); + public native float idf(int docFreq, int numDocs); + public native float computeNorm(String fieldName, FieldInvertState state); + public native float queryNorm(float sumOfSquaredWeights); + public native float sloppyFreq(int distance); + public native float tf(float freq); + public native float scorePayload(int docId, String fieldName, + int start, int end, byte [] payload, + int offset, int length); + +} diff --git a/java/org/apache/pylucene/search/highlight/PythonFormatter.java b/java/org/apache/pylucene/search/highlight/PythonFormatter.java new file mode 100644 index 0000000..908ca7e --- /dev/null +++ b/java/org/apache/pylucene/search/highlight/PythonFormatter.java @@ -0,0 +1,48 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search.highlight; + +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.TokenGroup; + + +public class PythonFormatter implements Formatter { + + private long pythonObject; + + public PythonFormatter() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native String highlightTerm(String originalText, + TokenGroup tokenGroup); +} diff --git a/java/org/apache/pylucene/search/highlight/PythonFragmenter.java b/java/org/apache/pylucene/search/highlight/PythonFragmenter.java new file mode 100644 index 0000000..e89ceb6 --- /dev/null +++ b/java/org/apache/pylucene/search/highlight/PythonFragmenter.java @@ -0,0 +1,47 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.search.highlight; + +import org.apache.lucene.search.highlight.Fragmenter; +import org.apache.lucene.analysis.TokenStream; + +public class PythonFragmenter implements Fragmenter { + + private long pythonObject; + + public PythonFragmenter() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native boolean isNewFragment(); + public native void start(String originalText, TokenStream tokenStream); +} diff --git a/java/org/apache/pylucene/store/PythonDirectory.java b/java/org/apache/pylucene/store/PythonDirectory.java new file mode 100644 index 0000000..aacd049 --- /dev/null +++ b/java/org/apache/pylucene/store/PythonDirectory.java @@ -0,0 +1,83 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.store; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.LockFactory; + + +public class PythonDirectory extends Directory { + + private long pythonObject; + + public PythonDirectory(LockFactory factory) + throws IOException + { + setLockFactory(factory); + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public void sync(Collection names) + throws IOException + { + for (String name : names) + sync(name); + } + + public native void pythonDecRef(); + + public native void close() + throws IOException; + public native IndexOutput createOutput(String name) + throws IOException; + public native void deleteFile(String name) + throws IOException; + public native boolean fileExists(String name) + throws IOException; + public native long fileLength(String name) + throws IOException; + public native long fileModified(String name) + throws IOException; + public native String[] listAll() + throws IOException; + public native IndexInput openInput(String name) + throws IOException; + public native IndexInput openInput(String name, int bufferSize) + throws IOException; + public native void touchFile(String name) + throws IOException; + public native void sync(String name) + throws IOException; +} diff --git a/java/org/apache/pylucene/store/PythonIndexInput.java b/java/org/apache/pylucene/store/PythonIndexInput.java new file mode 100644 index 0000000..592bf4d --- /dev/null +++ b/java/org/apache/pylucene/store/PythonIndexInput.java @@ -0,0 +1,62 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.store; + +import java.io.IOException; +import org.apache.lucene.store.BufferedIndexInput; + + +public class PythonIndexInput extends BufferedIndexInput { + + private long pythonObject; + + public PythonIndexInput() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + + public native Object clone(); + public native long length(); + public native void close() + throws IOException; + public native byte[] readInternal(int length, long pos) + throws IOException; + public native void seekInternal(long pos) + throws IOException; + + protected void readInternal(byte[] b, int offset, int length) + throws IOException + { + byte[] data = readInternal(length, getFilePointer()); + System.arraycopy(data, 0, b, offset, data.length); + } +} diff --git a/java/org/apache/pylucene/store/PythonIndexOutput.java b/java/org/apache/pylucene/store/PythonIndexOutput.java new file mode 100644 index 0000000..66fa442 --- /dev/null +++ b/java/org/apache/pylucene/store/PythonIndexOutput.java @@ -0,0 +1,69 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.store; + +import java.io.IOException; +import org.apache.lucene.store.BufferedIndexOutput; + + +public class PythonIndexOutput extends BufferedIndexOutput { + + private long pythonObject; + + public PythonIndexOutput() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public void seek(long pos) + throws IOException + { + super.seek(pos); + seekInternal(pos); + } + + public native void pythonDecRef(); + public native long length() + throws IOException; + public native void flushBuffer(byte[] data) + throws IOException; + public native void seekInternal(long pos) + throws IOException; + public native void close() + throws IOException; + + protected void flushBuffer(byte[] b, int offset, int len) + throws IOException + { + byte[] data = new byte[len]; + System.arraycopy(b, offset, data, 0, len); + flushBuffer(data); + } +} diff --git a/java/org/apache/pylucene/store/PythonLock.java b/java/org/apache/pylucene/store/PythonLock.java new file mode 100644 index 0000000..74b9a90 --- /dev/null +++ b/java/org/apache/pylucene/store/PythonLock.java @@ -0,0 +1,48 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.store; + +import org.apache.lucene.store.Lock; + + +public class PythonLock extends Lock { + + private long pythonObject; + + public PythonLock() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native boolean isLocked(); + public native boolean obtain(); + public native void release(); +} diff --git a/java/org/apache/pylucene/store/PythonLockFactory.java b/java/org/apache/pylucene/store/PythonLockFactory.java new file mode 100644 index 0000000..d440d04 --- /dev/null +++ b/java/org/apache/pylucene/store/PythonLockFactory.java @@ -0,0 +1,52 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.store; + +import java.io.IOException; + +import org.apache.lucene.store.Lock; +import org.apache.lucene.store.LockFactory; + + +public class PythonLockFactory extends LockFactory { + + private long pythonObject; + + public PythonLockFactory() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + + public native Lock makeLock(String lockName); + public native void clearLock(String lockName) + throws IOException; +} diff --git a/java/org/apache/pylucene/util/PythonComparable.java b/java/org/apache/pylucene/util/PythonComparable.java new file mode 100644 index 0000000..5d473fa --- /dev/null +++ b/java/org/apache/pylucene/util/PythonComparable.java @@ -0,0 +1,44 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.util; + + +public class PythonComparable implements Comparable { + + private long pythonObject; + + public PythonComparable() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + public native int compareTo(Object o); +} diff --git a/java/org/apache/pylucene/util/PythonIterator.java b/java/org/apache/pylucene/util/PythonIterator.java new file mode 100644 index 0000000..df6eb97 --- /dev/null +++ b/java/org/apache/pylucene/util/PythonIterator.java @@ -0,0 +1,53 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.util; + +import java.util.Iterator; + + +public class PythonIterator implements Iterator { + + private long pythonObject; + + public PythonIterator() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + + public native boolean hasNext(); + public native Object next(); + + public void remove() + { + throw new UnsupportedOperationException(); + } +} diff --git a/java/org/apache/pylucene/util/PythonSet.java b/java/org/apache/pylucene/util/PythonSet.java new file mode 100644 index 0000000..ceb1c32 --- /dev/null +++ b/java/org/apache/pylucene/util/PythonSet.java @@ -0,0 +1,75 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.pylucene.util; + +import java.util.Set; +import java.util.Collection; +import java.util.Iterator; +import java.lang.reflect.Array; + + +public class PythonSet implements Set { + + private long pythonObject; + + public PythonSet() + { + } + + public void pythonExtension(long pythonObject) + { + this.pythonObject = pythonObject; + } + public long pythonExtension() + { + return this.pythonObject; + } + + public void finalize() + throws Throwable + { + pythonDecRef(); + } + + public native void pythonDecRef(); + + public native boolean add(Object obj); + public native boolean addAll(Collection c); + public native void clear(); + public native boolean contains(Object obj); + public native boolean containsAll(Collection c); + public native boolean equals(Object obj); + public native boolean isEmpty(); + public native Iterator iterator(); + public native boolean remove(Object obj); + public native boolean removeAll(Collection c); + public native boolean retainAll(Collection c); + public native int size(); + public native Object[] toArray(); + + public Object[] toArray(Object[] a) + { + Object[] array = toArray(); + + if (a.length < array.length) + a = (Object[]) Array.newInstance(a.getClass().getComponentType(), + array.length); + + System.arraycopy(array, 0, a, 0, array.length); + + return a; + } +} diff --git a/jcc/CHANGES b/jcc/CHANGES new file mode 100644 index 0000000..7f41cba --- /dev/null +++ b/jcc/CHANGES @@ -0,0 +1,200 @@ + +Version 2.10 -> 2.11 +-------------------- + - improved recovery from toString() failure, displaying class name instead + - non public classes listed on command line now wrapped (Lukasz Jancewicz) + - fixed bug with generating wrappers for unimplemented interface methods + - fixed bug with generating propMethods for void getters + - fixed bug with missing code for returning arrays from iterators + +Version 2.9 -> 2.10 +------------------- + - added javadoc for org.apache.jcc.PythonVM class (Bill Janssen) + - fixed bug with Constructor.getGenericParameterTypes() losing first parameter + +Version 2.8 -> 2.9 +------------------ + - fixed bug with excluding inner classes only (Christian Heimes) + - fixed bug with Python gc in non-attached JVM thread (Christian Heimes) + +Version 2.7 -> 2.8 +------------------ + - fixed bug with --module using relative paths (Roman Chyla) + - made fully qualified C++ class names absolute + - fixed bug with parameterized types not requiring classes found in parameters + - fixed bug with missing space between >> of nested generics + +Version 2.6 -> 2.7 +------------------ + - added 'IGNORE' to reserved word list + - improved --exclude logic to also exclude inner classes of excluded classes + - moved --find-jvm-dll logic to __init__.py to accomodate Python 2.7 + - fixed bug with faulty Java version logic causing crashes with Java 1.5 + - added logic for finding JavaVM.framework headers on Mac OS X (Bill Janssen) + +Version 2.5 -> 2.6 +------------------ + - added freebsd7 settings to setup.py (Sujan Shakya) + - added support for unix-specific --home distutils install parameter + - added support for extracting JAVAHOME from Windows registry (Bill Janssen) + - updated MANIFEST.in as sdist started creating incomplete source archives + - improved support for building on Windows with mingw32 (Bill Janssen) + - added support for --find-jvm-dll parameter (Bill Janssen) + - fixed bug with not inheriting type parameters to inner parameterized classes + - added support for of_() method to set instance type parameters + - fixed bug with not heeding type parameter for --sequence get method + - parameterized return values are now unboxed + - improved auto-boxing of primitive type parameters + - added support for auto-boxing CharSequence from python str and unicode + - added support for auto-boxing Number from python int, long and float + - added 'asm' to list of reserved words + - added JCC_VERSION string to modules using JCC + - added support for --resources + - fixed bug with array Release calls using isCopy instead of 0 mode + - added support for --import + - added read-only env.classpath property + - config.py now written only during build or when missing (Christian Heimes) + - fixed bug with not enforcing Iterable for iterator method detection + +Version 2.4 -> 2.5 +------------------ + - added env.jni_version for the JNI version as returned by JNI's GetVersion() + - added env.java_version for java.lang.System.getProperty('java.version') + - default value to initVM's classpath parameter now is importing module's + - added support for Java generics + - added 'string_' property to JArray('byte') instances to extract byte string + - fixed bug with passing list of extension objects + - reworked iterator and enumeration 'next' templates to shortcut C++ wrappers + - fixed bug in code comparing setuptools versions when >= 0.6c10 + - verified build against setuptools from distribute 0.6.6 + - renamed patch.43 to differentiate setuptools versions + +Version 2.3 -> 2.4 +------------------ + - added 'typeof' to reserved word list + - added Java stacktrace to __str__() of JavaError, fixing half of PYLUCENE-1 + - fixed local string ref leaks in JArray.get/toSequence (Aric Coady) + - added --libpath parameter to specify -Djava.library.path + - classes listed with --exclude are no longer loaded (except for dependencies) + - added --vmarg to add Java VM initialization parameters (Christian Kofler) + - added support for passing a directory to --module + - byte values are now returned as signed numbers as opposed to one-char strs + - added --arch command line flag to filter Mac OS X python config settings + - cast_() and instance_() methods now generated on extension classes as well + - fixed bug with reporting Python error that occurred in an extension class + +Version 2.2 -> 2.3 +------------------ + - fixed Solaris compilation issue with using va_arg() with function pointers + - added --reserved command line arg to extend list of words to mangle + - fixed bug with initJCC not being run when Python VM embedded in JVM + - added --wininst to enable use of bdist_wininst with distutils (Jonas Maurus) + - added --help to describe command line options (Jonas Maurus) + - added support for --rename to workaround python flattened namespace clashes + - fixed bug with Enumeration/Iterator template function instantiation + - removed -framework Python from darwin link flags in setup.py + +Version 2.1 -> 2.2 +------------------ + - JCC now a subproject of the Apache PyLucene project + - fixed bug where thread's JNIEnv was not set before calling findClass() + - unhandled java exception now is printed out via ExceptionDescribe() + - added cast to placate Solaris compiler error in JArray.cpp + - JArray Python wrappers should return None when passed a null array + - added JDK variable to setup.py to better parameterize build configuration + - added support for proxying mapping and sequence protocols on FinalizerProxy + - changed Type suffix to $$Type to avoid clashes with like-named Java classes + - added 'bool' and 'operator' to list of reserved words + - added support for packages and classes named with C++ reserved words + - static methods shadowed by non-static methods of same name now '_' suffixed + - added 'java.lang' to --package by default + - added isCurrentThreadAttached() method to VMEnv + - added MANIFEST.in to exclude generated jcc/config.py (Christian Heimes) + - passing strings for byte[] or char[] is no longer supported, use JArray + - failure to call initVM() now reported with error instead of crash + - failure to find class now reported with error instead of crash + - failure to call attachCurrentThread() now reported with error, not crash + +Version 2.0 -> 2.1 +------------------ + - fixed bug with not checking missing module name when attempting build + - increased jcc's java stack to 512k + - added support for iPod/iPhone, with shared mode + - added missing cast to jweak in call to DeleteWeakGlobalRef() + - fixed local string ref leak in JArray (Aaron Lav) + - fixed local ref leak if ref for object already exists in table (Aaron Lav) + - fixed bug with error reporting from class methods (Aaron Lav) + - fixed bug with reporting python errors with RuntimeException when not shared + - removed bogus storage class from template specializations (Joseph Barillari) + +Version 1.9 -> 2.0 +------------------ + - fixed bug with failed findClass() import + - fixed bug http://bugzilla.osafoundation.org/show_bug.cgi?id=12127 + - added -ljvm to linux2 and sunos5 LFLAGS + - added support for using JCC in reverse (starting from Java VM) (shared only) + - using PythonException for reporting Python errors (shared only) + - inserted Apache 2.0 license copyright notices + - fixed bug with declaring array parameters in extension methods + - added support for --module to add individual python files to resulting egg + - JCC in reverse functional on Mac OS X and Linux + - fixed JCC in reverse threading issues + - JCC in reverse usable with Tomcat + - got python stacktrace into PythonException's message + - added 'self' property to get wrapped python object from extension wrapper + - added headless AWT workaround to JCC's own initVM() call + - added DEBUG_CFLAGS to setup.py to improve debug build support + - fixed uninitialized Class class bug (parseArgs) + - added errorName field to PythonException + - added support for excluding stack trace from PythonException + - arrays are now wrapped by JArray() objects instead of expanded into lists + - return by value in arrays now supported + - added support for nested arrays via JArray().cast_() + - included patch to setuptools to support shared mode on Linux + +Version 1.8 -> 1.9 +------------------ + - fixed code generation for clone() broken by finalization proxy work + - added 'union' to the list of reserved words + - fixed castCheck() to work with finalization proxies + - --compile no longer installs by default + - fixed bug in __init__.cpp #include statements for package-less classes + - fixed line ending bug on Windows + - fixed multiple JCC-built extensions in same process problem + - removed env argument from initVM() as it's redundant with the libjcc.dylib + - reimplemented env->setClassPath() in terms of system URLClassLoader hack + - added support for --include option + - added 'NULL' to list of reserved words + - added support for building shared libjcc library on Mac OS X and Linux + - fixed bug with generating wrappers for abstract Enumeration implementations + - added support for --install-dir and --use-distutils options + - copy jcc runtime sources into extension source tree before compiling + - added detection of invalid command line args + - fixed double-free bug when passing in vmargs + - added defines to enable building with MinGW (Bill Janssen) + - added support for --bdist + - added support for --compiler + - fixed crasher on Windows with virtual JObject.weaken$() + - fixed bug not checking return value from initVM() + - fixed bug with findClass() not catching C++ exception when class not found + - added missing code in parseArgs() to handle double[], float[] and long[] + +Version 1.7 -> 1.8 +------------------ + + - fixed bug using the wrong field modifiers for setter (Bill Janssen) + - added missing calls for generating wrappers for ancestors of Exception + - added missing call for generating wrappers for String + - added note about --classpath to README + +Version 1.6 -> 1.7 +------------------ + - fixed memory leak when calling inherited methods via callSuper() + - added support for building on Solaris with Sun Studio C++ (Solaris 11) + - fixed leak of local refs of jstring when converting to an array of String + - automated finalization of extensions via proxy for breaking ref cycle + - added Py_CLEAR and Py_VISIT macros for Python 2.3.5 compilation + +Earlier versions (changes included in PyLucene versions < 2.3) +-------------------------------------------------------------- + - see http://svn.osafoundation.org/pylucene/trunk/jcc/CHANGES diff --git a/jcc/DESCRIPTION b/jcc/DESCRIPTION new file mode 100644 index 0000000..3c0ed70 --- /dev/null +++ b/jcc/DESCRIPTION @@ -0,0 +1,47 @@ + +For changes since earlier releases, see: + http://svn.apache.org/repos/asf/lucene/pylucene/trunk/jcc/CHANGES + +JCC is a C++ code generator for producing the code necessary to call +into Java classes from CPython via Java's Native Invocation Interface (JNI). + +JCC generates C++ wrapper classes that hide all the gory details of JNI +access as well Java memory and object reference management. + +JCC generates CPython types that make these C++ classes accessible from a +Python interpreter. JCC attempts to make these Python types pythonic by +detecting iterators and property accessors. Iterators and mappings may also +be declared to JCC. + +JCC has been built on Python 2.3, 2.4, 2.5, 2.6 and 2.7 and has been used with +various Java Runtime Environments such as Sun Java 1.4, 1.5 and 1.6, Apple's +Java 1.4 and 1.5 on Mac OS X and open source Java OpenJDK 1.7 builds. + +An experimental port to Python 3 is available from a branch: + http://svn.apache.org/repos/asf/lucene/pylucene/branches/python_3/jcc/ + +JCC is supported on Mac OS X, Linux, Solaris and Windows. + +JCC is written in C++ and Python. It uses Java's reflection API to do its +job and needs a Java Runtime Environment to be present to operate. + +JCC is built with distutils or setuptools:: + + python setup.py build + sudo python setup.py install + +Setuptools is required to build JCC on Python 2.3. + +Except for Mac OS X - where Apple's Java comes pre-installed in a known +framework location - JCC's setup.py file needs to be edited before building +JCC to specify the location of the Java Runtime Environment's header files +and libraries. + +The svn sources for JCC are available at: + http://svn.apache.org/repos/asf/lucene/pylucene/trunk/jcc/ + +For more information about JCC see: + http://lucene.apache.org/pylucene/jcc/index.html + +JCC comes with an Apache 2.0 copyright license: + http://www.apache.org/licenses/LICENSE-2.0 diff --git a/jcc/INSTALL b/jcc/INSTALL new file mode 100644 index 0000000..f46ae03 --- /dev/null +++ b/jcc/INSTALL @@ -0,0 +1,3 @@ + +Please see doc/jcc/documentation/install.html or +http://lucene.apache.org/pylucene/jcc/documentation/install.html diff --git a/jcc/LICENSE b/jcc/LICENSE new file mode 100644 index 0000000..6492205 --- /dev/null +++ b/jcc/LICENSE @@ -0,0 +1,12 @@ + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/jcc/MANIFEST.in b/jcc/MANIFEST.in new file mode 100644 index 0000000..c692711 --- /dev/null +++ b/jcc/MANIFEST.in @@ -0,0 +1,11 @@ +exclude jcc/config.py +include jcc/patches/patch.* +recursive-include jcc *.h +recursive-include _jcc *.h +recursive-include java *.java +recursive-include helpers *.py +include CHANGES +include DESCRIPTION +include INSTALL +include LICENSE +include NOTICE diff --git a/jcc/NOTICE b/jcc/NOTICE new file mode 100644 index 0000000..efdba98 --- /dev/null +++ b/jcc/NOTICE @@ -0,0 +1,4 @@ + +Apache PyLucene (JCC) + Copyright 2009 The Apache Software Foundation + Copyright (c) 2007-2008 Open Source Applications Foundation diff --git a/jcc/README b/jcc/README new file mode 100644 index 0000000..c17fe48 --- /dev/null +++ b/jcc/README @@ -0,0 +1,6 @@ + +If you obtained JCC with a PyLucene source archive, please see + doc/jcc/documentation/readme.html + +If you obtained JCC from http://www.python.org/pypi, please see + http://lucene.apache.org/pylucene/jcc/documentation/readme.html diff --git a/jcc/_jcc/boot.cpp b/jcc/_jcc/boot.cpp new file mode 100644 index 0000000..896494d --- /dev/null +++ b/jcc/_jcc/boot.cpp @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "java/lang/Class.h" +#include "java/lang/RuntimeException.h" +#include "macros.h" + +extern PyTypeObject PY_TYPE(JObject), PY_TYPE(ConstVariableDescriptor); + +PyObject *initJCC(PyObject *module); +PyObject *initVM(PyObject *self, PyObject *args, PyObject *kwds); + +namespace java { + namespace lang { + void __install__(PyObject *m); + } + namespace io { + void __install__(PyObject *m); + } +} + +PyObject *__initialize__(PyObject *module, PyObject *args, PyObject *kwds) +{ + PyObject *env = initVM(module, args, kwds); + + if (env == NULL) + return NULL; + + java::lang::Class::initializeClass(); + java::lang::RuntimeException::initializeClass(); + + return env; +} + +#include "jccfuncs.h" + +extern "C" { + + void init_jcc(void) + { + PyObject *m = Py_InitModule3("_jcc", jcc_funcs, "_jcc"); + + initJCC(m); + + INSTALL_TYPE(JObject, m); + INSTALL_TYPE(ConstVariableDescriptor, m); + java::lang::__install__(m); + java::io::__install__(m); + } +} diff --git a/jcc/_jcc/java/io/PrintWriter.cpp b/jcc/_jcc/java/io/PrintWriter.cpp new file mode 100644 index 0000000..533ad37 --- /dev/null +++ b/jcc/_jcc/java/io/PrintWriter.cpp @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/io/PrintWriter.h" + +namespace java { + namespace io { + + enum { + mid__init_, + max_mid + }; + + java::lang::Class *PrintWriter::class$ = NULL; + jmethodID *PrintWriter::_mids = NULL; + + jclass PrintWriter::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/io/PrintWriter"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = + env->getMethodID(cls, "", "(Ljava/io/Writer;)V"); + + class$ = (java::lang::Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + PrintWriter::PrintWriter(Writer writer) : Writer(env->newObject(initializeClass, &_mids, mid__init_, writer.this$)) { + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace io { + + static int t_PrintWriter_init(t_PrintWriter *self, + PyObject *args, PyObject *kwds); + + static PyMethodDef t_PrintWriter__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(PrintWriter, t_PrintWriter, Writer, + java::io::PrintWriter, t_PrintWriter_init, + 0, 0, 0, 0, 0); + + static int t_PrintWriter_init(t_PrintWriter *self, + PyObject *args, PyObject *kwds) + { + Writer writer((jobject) NULL); + + if (!parseArgs(args, "j", Writer::class$, &writer)) + { + INT_CALL(self->object = PrintWriter(writer)); + return 0; + } + + PyErr_SetString(PyExc_ValueError, "invalid args"); + return -1; + } + } +} diff --git a/jcc/_jcc/java/io/PrintWriter.h b/jcc/_jcc/java/io/PrintWriter.h new file mode 100644 index 0000000..0b29078 --- /dev/null +++ b/jcc/_jcc/java/io/PrintWriter.h @@ -0,0 +1,50 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _PrintWriter_H +#define _PrintWriter_H + +#include +#include "java/lang/Class.h" +#include "java/io/Writer.h" + +namespace java { + namespace io { + + class PrintWriter : public Writer { + public: + static java::lang::Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit PrintWriter(jobject obj) : Writer(obj) { + initializeClass(); + } + PrintWriter(Writer writer); + PrintWriter(const PrintWriter& obj) : Writer(obj) {} + }; + + extern PyTypeObject PY_TYPE(PrintWriter); + + class t_PrintWriter { + public: + PyObject_HEAD + PrintWriter object; + static PyObject *wrap_Object(const PrintWriter& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _PrintWriter_H */ diff --git a/jcc/_jcc/java/io/StringWriter.cpp b/jcc/_jcc/java/io/StringWriter.cpp new file mode 100644 index 0000000..43bbf4c --- /dev/null +++ b/jcc/_jcc/java/io/StringWriter.cpp @@ -0,0 +1,86 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/io/StringWriter.h" + +namespace java { + namespace io { + + enum { + mid__init_, + max_mid + }; + + java::lang::Class *StringWriter::class$ = NULL; + jmethodID *StringWriter::_mids = NULL; + + jclass StringWriter::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/io/StringWriter"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "()V"); + + class$ = (java::lang::Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + StringWriter::StringWriter() : Writer(env->newObject(initializeClass, &_mids, mid__init_)) { + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace io { + + static int t_StringWriter_init(t_StringWriter *self, + PyObject *args, PyObject *kwds); + + static PyMethodDef t_StringWriter__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(StringWriter, t_StringWriter, Writer, + java::io::StringWriter, t_StringWriter_init, + 0, 0, 0, 0, 0); + + static int t_StringWriter_init(t_StringWriter *self, + PyObject *args, PyObject *kwds) + { + switch (PyTuple_Size(args)) { + case 0: + INT_CALL(self->object = StringWriter()); + break; + default: + PyErr_SetString(PyExc_ValueError, "invalid args"); + return -1; + } + + return 0; + } + } +} diff --git a/jcc/_jcc/java/io/StringWriter.h b/jcc/_jcc/java/io/StringWriter.h new file mode 100644 index 0000000..4661324 --- /dev/null +++ b/jcc/_jcc/java/io/StringWriter.h @@ -0,0 +1,50 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _StringWriter_H +#define _StringWriter_H + +#include +#include "java/lang/Class.h" +#include "java/io/Writer.h" + +namespace java { + namespace io { + + class StringWriter : public Writer { + public: + static java::lang::Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit StringWriter(jobject obj) : Writer(obj) { + initializeClass(); + } + StringWriter(); + StringWriter(const StringWriter& obj) : Writer(obj) {} + }; + + extern PyTypeObject PY_TYPE(StringWriter); + + class t_StringWriter { + public: + PyObject_HEAD + StringWriter object; + static PyObject *wrap_Object(const StringWriter& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _StringWriter_H */ diff --git a/jcc/_jcc/java/io/Writer.cpp b/jcc/_jcc/java/io/Writer.cpp new file mode 100644 index 0000000..171b495 --- /dev/null +++ b/jcc/_jcc/java/io/Writer.cpp @@ -0,0 +1,61 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/io/Writer.h" + +namespace java { + namespace io { + + enum { + max_mid + }; + + java::lang::Class *Writer::class$ = NULL; + jmethodID *Writer::_mids = NULL; + + jclass Writer::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/io/Writer"); + + _mids = NULL; + class$ = (java::lang::Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace io { + + static PyMethodDef t_Writer__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Writer, t_Writer, java::lang::Object, Writer, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/io/Writer.h b/jcc/_jcc/java/io/Writer.h new file mode 100644 index 0000000..22f8cc1 --- /dev/null +++ b/jcc/_jcc/java/io/Writer.h @@ -0,0 +1,49 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Writer_H +#define _Writer_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "JArray.h" + +namespace java { + namespace io { + + class Writer : public java::lang::Object { + public: + static java::lang::Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Writer(jobject obj) : Object(obj) { + initializeClass(); + } + }; + + extern PyTypeObject PY_TYPE(Writer); + + class t_Writer { + public: + PyObject_HEAD + Writer object; + static PyObject *wrap_Object(const Writer& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Writer_H */ diff --git a/jcc/_jcc/java/io/__init__.cpp b/jcc/_jcc/java/io/__init__.cpp new file mode 100644 index 0000000..317c6d3 --- /dev/null +++ b/jcc/_jcc/java/io/__init__.cpp @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "macros.h" + +namespace java { + namespace io { + + extern PyTypeObject PY_TYPE(Writer); + extern PyTypeObject PY_TYPE(StringWriter); + extern PyTypeObject PY_TYPE(PrintWriter); + + namespace reflect { + void __install__(PyObject *module); + } + + void __install__(PyObject *m) + { + INSTALL_TYPE(Writer, m); + INSTALL_TYPE(StringWriter, m); + INSTALL_TYPE(PrintWriter, m); + } + } +} diff --git a/jcc/_jcc/java/lang/Boolean.cpp b/jcc/_jcc/java/lang/Boolean.cpp new file mode 100644 index 0000000..f21c934 --- /dev/null +++ b/jcc/_jcc/java/lang/Boolean.cpp @@ -0,0 +1,81 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Boolean.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_booleanValue, + max_mid + }; + + Class *Boolean::class$ = NULL; + jmethodID *Boolean::_mids = NULL; + + Boolean *Boolean::TRUE = NULL; + Boolean *Boolean::FALSE = NULL; + + jclass Boolean::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Boolean"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "(Z)V"); + _mids[mid_booleanValue] = + env->getMethodID(cls, "booleanValue", "()Z"); + + class$ = (Class *) new JObject(cls); + + FALSE = new Boolean(env->getStaticObjectField(cls, "FALSE", "Ljava/lang/Boolean;")); + TRUE = new Boolean(env->getStaticObjectField(cls, "TRUE", "Ljava/lang/Boolean;")); + } + + return (jclass) class$->this$; + } + + Boolean::Boolean(jboolean b) : Object(env->newObject(initializeClass, &_mids, mid__init_, b)) { + } + + int Boolean::booleanValue() const + { + return (int) env->callBooleanMethod(this$, _mids[mid_booleanValue]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Boolean__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Boolean, t_Boolean, Object, java::lang::Boolean, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Boolean.h b/jcc/_jcc/java/lang/Boolean.h new file mode 100644 index 0000000..d182d37 --- /dev/null +++ b/jcc/_jcc/java/lang/Boolean.h @@ -0,0 +1,54 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Boolean_H +#define _Boolean_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" + +namespace java { + namespace lang { + + class Boolean : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Boolean(jobject obj) : Object(obj) { + initializeClass(); + } + Boolean(jboolean); + + int booleanValue() const; + + static Boolean *TRUE; + static Boolean *FALSE; + }; + + extern PyTypeObject PY_TYPE(Boolean); + + class t_Boolean { + public: + PyObject_HEAD + Boolean object; + static PyObject *wrap_Object(const Boolean& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Boolean_H */ diff --git a/jcc/_jcc/java/lang/Byte.cpp b/jcc/_jcc/java/lang/Byte.cpp new file mode 100644 index 0000000..2714f92 --- /dev/null +++ b/jcc/_jcc/java/lang/Byte.cpp @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Byte.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_byteValue, + max_mid + }; + + Class *Byte::class$ = NULL; + jmethodID *Byte::_mids = NULL; + + jclass Byte::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Byte"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "(B)V"); + _mids[mid_byteValue] = + env->getMethodID(cls, "byteValue", "()B"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + Byte::Byte(jbyte b) : Object(env->newObject(initializeClass, &_mids, mid__init_, b)) { + } + + jbyte Byte::byteValue() const + { + return env->callByteMethod(this$, _mids[mid_byteValue]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Byte__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Byte, t_Byte, Object, java::lang::Byte, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Byte.h b/jcc/_jcc/java/lang/Byte.h new file mode 100644 index 0000000..e0690c4 --- /dev/null +++ b/jcc/_jcc/java/lang/Byte.h @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Byte_H +#define _Byte_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" + +namespace java { + namespace lang { + + class Byte : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Byte(jobject obj) : Object(obj) { + initializeClass(); + } + Byte(jbyte); + + jbyte byteValue() const; + }; + + extern PyTypeObject PY_TYPE(Byte); + + class t_Byte { + public: + PyObject_HEAD + Byte object; + static PyObject *wrap_Object(const Byte& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Byte_H */ diff --git a/jcc/_jcc/java/lang/Character.cpp b/jcc/_jcc/java/lang/Character.cpp new file mode 100644 index 0000000..b8ebe39 --- /dev/null +++ b/jcc/_jcc/java/lang/Character.cpp @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Character.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_charValue, + max_mid + }; + + Class *Character::class$ = NULL; + jmethodID *Character::_mids = NULL; + + jclass Character::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Character"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "(C)V"); + _mids[mid_charValue] = + env->getMethodID(cls, "charValue", "()C"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + Character::Character(jchar c) : Object(env->newObject(initializeClass, &_mids, mid__init_, c)) { + } + + jchar Character::charValue() const + { + return env->callCharMethod(this$, _mids[mid_charValue]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Character__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Character, t_Character, Object, java::lang::Character, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Character.h b/jcc/_jcc/java/lang/Character.h new file mode 100644 index 0000000..9a8c6c0 --- /dev/null +++ b/jcc/_jcc/java/lang/Character.h @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Character_H +#define _Character_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" + +namespace java { + namespace lang { + + class Character : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Character(jobject obj) : Object(obj) { + initializeClass(); + } + Character(jchar); + + jchar charValue() const; + }; + + extern PyTypeObject PY_TYPE(Character); + + class t_Character { + public: + PyObject_HEAD + Character object; + static PyObject *wrap_Object(const Character& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Character_H */ diff --git a/jcc/_jcc/java/lang/Class.cpp b/jcc/_jcc/java/lang/Class.cpp new file mode 100644 index 0000000..17ac775 --- /dev/null +++ b/jcc/_jcc/java/lang/Class.cpp @@ -0,0 +1,623 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" + +#include "JArray.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/String.h" +#include "java/lang/reflect/Method.h" +#include "java/lang/reflect/Constructor.h" +#include "java/lang/reflect/Field.h" +#ifdef _java_generics +#include "java/lang/reflect/Type.h" +#include "java/lang/reflect/TypeVariable.h" +#endif + +namespace java { + namespace lang { + using namespace reflect; + + enum { + mid_forName, + mid_getDeclaredMethods, + mid_getMethods, + mid_getMethod, + mid_getDeclaredMethod, + mid_getDeclaredConstructors, + mid_getDeclaredFields, + mid_getDeclaredClasses, + mid_isArray, + mid_isPrimitive, + mid_isInterface, + mid_isAssignableFrom, + mid_getComponentType, + mid_getSuperclass, + mid_getDeclaringClass, + mid_getEnclosingClass, + mid_getInterfaces, + mid_getName, + mid_getModifiers, + mid_isInstance, +#ifdef _java_generics + mid_getTypeParameters, + mid_getGenericInterfaces, + mid_getGenericSuperclass, +#endif + max_mid + }; + + Class *Class::class$ = NULL; + jmethodID *Class::_mids = NULL; + + jclass Class::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Class"); + + _mids = new jmethodID[max_mid]; + _mids[mid_forName] = + env->getStaticMethodID(cls, "forName", + "(Ljava/lang/String;)Ljava/lang/Class;"); + _mids[mid_getDeclaredMethods] = + env->getMethodID(cls, "getDeclaredMethods", + "()[Ljava/lang/reflect/Method;"); + _mids[mid_getMethods] = + env->getMethodID(cls, "getMethods", + "()[Ljava/lang/reflect/Method;"); + _mids[mid_getMethod] = + env->getMethodID(cls, "getMethod", + "(Ljava/lang/String;[Ljava/lang/Class;)Ljava/lang/reflect/Method;"); + _mids[mid_getDeclaredMethod] = + env->getMethodID(cls, "getDeclaredMethod", + "(Ljava/lang/String;[Ljava/lang/Class;)Ljava/lang/reflect/Method;"); + _mids[mid_getDeclaredConstructors] = + env->getMethodID(cls, "getDeclaredConstructors", + "()[Ljava/lang/reflect/Constructor;"); + _mids[mid_getDeclaredFields] = + env->getMethodID(cls, "getDeclaredFields", + "()[Ljava/lang/reflect/Field;"); + _mids[mid_getDeclaredClasses] = + env->getMethodID(cls, "getDeclaredClasses", + "()[Ljava/lang/Class;"); + _mids[mid_isArray] = + env->getMethodID(cls, "isArray", + "()Z"); + _mids[mid_isPrimitive] = + env->getMethodID(cls, "isPrimitive", + "()Z"); + _mids[mid_isInterface] = + env->getMethodID(cls, "isInterface", + "()Z"); + _mids[mid_isAssignableFrom] = + env->getMethodID(cls, "isAssignableFrom", + "(Ljava/lang/Class;)Z"); + _mids[mid_getComponentType] = + env->getMethodID(cls, "getComponentType", + "()Ljava/lang/Class;"); + _mids[mid_getSuperclass] = + env->getMethodID(cls, "getSuperclass", + "()Ljava/lang/Class;"); + _mids[mid_getDeclaringClass] = + env->getMethodID(cls, "getDeclaringClass", + "()Ljava/lang/Class;"); + _mids[mid_getEnclosingClass] = + env->getMethodID(cls, "getEnclosingClass", + "()Ljava/lang/Class;"); + _mids[mid_getInterfaces] = + env->getMethodID(cls, "getInterfaces", + "()[Ljava/lang/Class;"); + _mids[mid_getName] = + env->getMethodID(cls, "getName", + "()Ljava/lang/String;"); + _mids[mid_getModifiers] = + env->getMethodID(cls, "getModifiers", + "()I"); + _mids[mid_isInstance] = + env->getMethodID(cls, "isInstance", + "(Ljava/lang/Object;)Z"); +#ifdef _java_generics + _mids[mid_getTypeParameters] = + env->getMethodID(cls, "getTypeParameters", + "()[Ljava/lang/reflect/TypeVariable;"); + _mids[mid_getGenericInterfaces] = + env->getMethodID(cls, "getGenericInterfaces", + "()[Ljava/lang/reflect/Type;"); + _mids[mid_getGenericSuperclass] = + env->getMethodID(cls, "getGenericSuperclass", + "()Ljava/lang/reflect/Type;"); +#endif + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + + Class Class::forName(const String& className) + { + jclass cls = initializeClass(); + jobject obj = env->callStaticObjectMethod(cls, _mids[mid_forName], className.this$); + + return Class((jclass) obj); + } + + JArray Class::getDeclaredMethods() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getDeclaredMethods]); + + return JArray(array); + } + + JArray Class::getMethods() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getMethods]); + + return JArray(array); + } + + Method Class::getMethod(const String& name, const JArray& params) const + { + return Method(env->callObjectMethod(this$, _mids[mid_getMethod], name.this$, params.this$)); + } + + Method Class::getDeclaredMethod(const String& name, const JArray& params) const + { + return Method(env->callObjectMethod(this$, _mids[mid_getDeclaredMethod], name.this$, params.this$)); + } + + JArray Class::getDeclaredConstructors() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getDeclaredConstructors]); + + return JArray(array); + } + + JArray Class::getDeclaredFields() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getDeclaredFields]); + + return JArray(array); + } + + JArray Class::getDeclaredClasses() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getDeclaredClasses]); + + return JArray(array); + } + + int Class::isArray() const + { + return (int) env->callBooleanMethod(this$, _mids[mid_isArray]); + } + + int Class::isPrimitive() const + { + return (int) env->callBooleanMethod(this$, _mids[mid_isPrimitive]); + } + + int Class::isInterface() const + { + return (int) env->callBooleanMethod(this$, _mids[mid_isInterface]); + } + + int Class::isAssignableFrom(const Class& obj) const + { + return (int) env->callBooleanMethod(this$, _mids[mid_isAssignableFrom], obj.this$); + } + + Class Class::getComponentType() const + { + return Class(env->callObjectMethod(this$, _mids[mid_getComponentType])); + } + + Class Class::getSuperclass() const + { + return Class(env->callObjectMethod(this$, _mids[mid_getSuperclass])); + } + + Class Class::getDeclaringClass() const + { + return Class(env->callObjectMethod(this$, _mids[mid_getDeclaringClass])); + } + + Class Class::getEnclosingClass() const + { + return Class(env->callObjectMethod(this$, _mids[mid_getEnclosingClass])); + } + + JArray Class::getInterfaces() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getInterfaces]); + + return JArray(array); + } + + String Class::getName() const + { + return String(env->callObjectMethod(this$, _mids[mid_getName])); + } + + int Class::getModifiers() const + { + return env->callIntMethod(this$, _mids[mid_getModifiers]); + } + + int Class::isInstance(const Object &obj) const + { + return env->callBooleanMethod(this$, _mids[mid_isInstance], + obj.this$); + } + +#ifdef _java_generics + JArray Class::getTypeParameters() const + { + return JArray(env->callObjectMethod(this$, _mids[mid_getTypeParameters])); + } + + JArray Class::getGenericInterfaces() const + { + return JArray(env->callObjectMethod(this$, _mids[mid_getGenericInterfaces])); + } + + Type Class::getGenericSuperclass() const + { + return Type(env->callObjectMethod(this$, _mids[mid_getGenericSuperclass])); + } +#endif + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + using namespace reflect; + + static PyObject *t_Class_cast_(PyTypeObject *type, PyObject *arg); + static PyObject *t_Class_instance_(PyTypeObject *type, PyObject *arg); + static PyObject *t_Class_forName(PyTypeObject *type, PyObject *arg); + static PyObject *t_Class_getDeclaredConstructors(t_Class *self); + static PyObject *t_Class_getDeclaredMethods(t_Class *self); + static PyObject *t_Class_getMethods(t_Class *self); + static PyObject *t_Class_getMethod(t_Class *self, PyObject *args); + static PyObject *t_Class_getDeclaredMethod(t_Class *self, PyObject *args); + static PyObject *t_Class_getDeclaredFields(t_Class *self); + static PyObject *t_Class_getDeclaredClasses(t_Class *self); + static PyObject *t_Class_isArray(t_Class *self); + static PyObject *t_Class_isPrimitive(t_Class *self); + static PyObject *t_Class_isInterface(t_Class *self); + static PyObject *t_Class_isAssignableFrom(t_Class *self, PyObject *arg); + static PyObject *t_Class_getComponentType(t_Class *self); + static PyObject *t_Class_getSuperclass(t_Class *self); + static PyObject *t_Class_getDeclaringClass(t_Class *self); + static PyObject *t_Class_getEnclosingClass(t_Class *self); + static PyObject *t_Class_getInterfaces(t_Class *self); + static PyObject *t_Class_getName(t_Class *self); + static PyObject *t_Class_getModifiers(t_Class *self); +#ifdef _java_generics + static PyObject *t_Class_getTypeParameters(t_Class *self); + static PyObject *t_Class_getGenericInterfaces(t_Class *self); + static PyObject *t_Class_getGenericSuperclass(t_Class *self); + static PyObject *t_Class_get__parameters_(t_Class *self, void *data); + + static PyGetSetDef t_Class__fields_[] = { + DECLARE_GET_FIELD(t_Class, parameters_), + { NULL, NULL, NULL, NULL, NULL } + }; +#else + static PyGetSetDef t_Class__fields_[] = { + { NULL, NULL, NULL, NULL, NULL } + }; +#endif + + static PyMethodDef t_Class__methods_[] = { + DECLARE_METHOD(t_Class, cast_, METH_O | METH_CLASS), + DECLARE_METHOD(t_Class, instance_, METH_O | METH_CLASS), + DECLARE_METHOD(t_Class, forName, METH_O | METH_CLASS), + DECLARE_METHOD(t_Class, getDeclaredConstructors, METH_NOARGS), + DECLARE_METHOD(t_Class, getDeclaredMethods, METH_NOARGS), + DECLARE_METHOD(t_Class, getMethods, METH_NOARGS), + DECLARE_METHOD(t_Class, getMethod, METH_VARARGS), + DECLARE_METHOD(t_Class, getDeclaredMethod, METH_VARARGS), + DECLARE_METHOD(t_Class, getDeclaredFields, METH_NOARGS), + DECLARE_METHOD(t_Class, getDeclaredClasses, METH_NOARGS), + DECLARE_METHOD(t_Class, isArray, METH_NOARGS), + DECLARE_METHOD(t_Class, isPrimitive, METH_NOARGS), + DECLARE_METHOD(t_Class, isInterface, METH_NOARGS), + DECLARE_METHOD(t_Class, isAssignableFrom, METH_O), + DECLARE_METHOD(t_Class, getComponentType, METH_NOARGS), + DECLARE_METHOD(t_Class, getSuperclass, METH_NOARGS), + DECLARE_METHOD(t_Class, getDeclaringClass, METH_NOARGS), + DECLARE_METHOD(t_Class, getEnclosingClass, METH_NOARGS), + DECLARE_METHOD(t_Class, getInterfaces, METH_NOARGS), + DECLARE_METHOD(t_Class, getName, METH_NOARGS), + DECLARE_METHOD(t_Class, getModifiers, METH_NOARGS), +#ifdef _java_generics + DECLARE_METHOD(t_Class, getTypeParameters, METH_NOARGS), + DECLARE_METHOD(t_Class, getGenericInterfaces, METH_NOARGS), + DECLARE_METHOD(t_Class, getGenericSuperclass, METH_NOARGS), +#endif + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Class, t_Class, Object, java::lang::Class, + abstract_init, 0, 0, t_Class__fields_, 0, 0); + +#ifdef _java_generics + PyObject *t_Class::wrap_Object(const Class& object, PyTypeObject *T) + { + PyObject *obj = t_Class::wrap_Object(object); + if (obj != Py_None) + { + t_Class *self = (t_Class *) obj; + self->parameters[0] = T; + } + return obj; + } +#endif + static PyObject *t_Class_cast_(PyTypeObject *type, PyObject *arg) + { + if (!(arg = castCheck(arg, Class::initializeClass, 1))) + return NULL; + return t_Class::wrap_Object(Class(((t_Class *) arg)->object.this$)); + } + static PyObject *t_Class_instance_(PyTypeObject *type, PyObject *arg) + { + if (!castCheck(arg, Class::initializeClass, 0)) + Py_RETURN_FALSE; + Py_RETURN_TRUE; + } + + static PyObject *t_Class_forName(PyTypeObject *type, PyObject *arg) + { + if (!PyString_Check(arg)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + try { + char *className = PyString_AsString(arg); + String name = String(env->fromUTF(className)); + + return t_Class::wrap_Object(Class::forName(name)); + } catch (int e) { + switch (e) { + case _EXC_JAVA: + return PyErr_SetJavaError(); + default: + throw; + } + } + } + + static PyObject *t_Class_getDeclaredConstructors(t_Class *self) + { + JArray constructors((jobject) NULL); + + OBJ_CALL(constructors = self->object.getDeclaredConstructors()); + return constructors.toSequence(t_Constructor::wrap_Object); + } + + static PyObject *t_Class_getDeclaredMethods(t_Class *self) + { + JArray methods((jobject) NULL); + + OBJ_CALL(methods = self->object.getDeclaredMethods()); + return methods.toSequence(t_Method::wrap_Object); + } + + static PyObject *t_Class_getMethods(t_Class *self) + { + JArray methods((jobject) NULL); + + OBJ_CALL(methods = self->object.getMethods()); + return methods.toSequence(t_Method::wrap_Object); + } + + static PyObject *t_Class_getMethod(t_Class *self, PyObject *args) + { + String name((jobject) NULL); + JArray params((jobject) NULL); + Method method((jobject) NULL); + + if (!parseArgs(args, "s[j", Class::class$, &name, ¶ms)) + { + OBJ_CALL(method = self->object.getMethod(name, params)); + return t_Method::wrap_Object(method); + } + + return PyErr_SetArgsError((PyObject *) self, "getMethod", args); + } + + static PyObject *t_Class_getDeclaredMethod(t_Class *self, PyObject *args) + { + String name((jobject) NULL); + JArray params((jobject) NULL); + Method method((jobject) NULL); + + if (!parseArgs(args, "s[j", Class::class$, &name, ¶ms)) + { + OBJ_CALL(method = self->object.getDeclaredMethod(name, params)); + return t_Method::wrap_Object(method); + } + + return PyErr_SetArgsError((PyObject *) self, "getMethod", args); + } + + static PyObject *t_Class_getDeclaredFields(t_Class *self) + { + JArray fields((jobject) NULL); + + OBJ_CALL(fields = self->object.getDeclaredFields()); + return fields.toSequence(t_Field::wrap_Object); + } + + static PyObject *t_Class_getDeclaredClasses(t_Class *self) + { + JArray array((jobject) NULL); + + OBJ_CALL(array = self->object.getDeclaredClasses()); + return array.toSequence(t_Class::wrap_Object); + } + + static PyObject *t_Class_isArray(t_Class *self) + { + int isArray; + + OBJ_CALL(isArray = self->object.isArray()); + Py_RETURN_BOOL(isArray); + } + + static PyObject *t_Class_isPrimitive(t_Class *self) + { + int isPrimitive; + + OBJ_CALL(isPrimitive = self->object.isPrimitive()); + Py_RETURN_BOOL(isPrimitive); + } + + static PyObject *t_Class_isInterface(t_Class *self) + { + int isInterface; + + OBJ_CALL(isInterface = self->object.isInterface()); + Py_RETURN_BOOL(isInterface); + } + + static PyObject *t_Class_isAssignableFrom(t_Class *self, PyObject *arg) + { + if (!PyObject_TypeCheck(arg, &PY_TYPE(Class))) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + try { + Class cls = ((t_Class *) arg)->object; + int isAssignableFrom = self->object.isAssignableFrom(cls); + + Py_RETURN_BOOL(isAssignableFrom); + } catch (int e) { + switch (e) { + case _EXC_JAVA: + return PyErr_SetJavaError(); + default: + throw; + } + } + } + + static PyObject *t_Class_getComponentType(t_Class *self) + { + Class cls((jobject) NULL); + + OBJ_CALL(cls = self->object.getComponentType()); + return t_Class::wrap_Object(cls); + } + + static PyObject *t_Class_getSuperclass(t_Class *self) + { + Class cls((jobject) NULL); + + OBJ_CALL(cls = self->object.getSuperclass()); + return t_Class::wrap_Object(cls); + } + + static PyObject *t_Class_getDeclaringClass(t_Class *self) + { + Class cls((jobject) NULL); + + OBJ_CALL(cls = self->object.getDeclaringClass()); + return t_Class::wrap_Object(cls); + } + + static PyObject *t_Class_getEnclosingClass(t_Class *self) + { + Class cls((jobject) NULL); + + OBJ_CALL(cls = self->object.getEnclosingClass()); + return t_Class::wrap_Object(cls); + } + + static PyObject *t_Class_getInterfaces(t_Class *self) + { + JArray interfaces((jobject) NULL); + + OBJ_CALL(interfaces = self->object.getInterfaces()); + return interfaces.toSequence(t_Class::wrap_Object); + } + + static PyObject *t_Class_getName(t_Class *self) + { + String name((jobject) NULL); + + OBJ_CALL(name = self->object.getName()); + return j2p(name); + } + + static PyObject *t_Class_getModifiers(t_Class *self) + { + jint modifiers; + + OBJ_CALL(modifiers = self->object.getModifiers()); + return PyInt_FromLong(modifiers); + } + +#ifdef _java_generics + static PyObject *t_Class_getTypeParameters(t_Class *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getTypeParameters()); + + return result.toSequence(t_TypeVariable::wrap_Object); + } + + static PyObject *t_Class_getGenericInterfaces(t_Class *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericInterfaces()); + + return result.toSequence(t_Type::wrap_Object); + } + + static PyObject *t_Class_getGenericSuperclass(t_Class *self) + { + Type result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericSuperclass()); + + return t_Type::wrap_Object(result); + } + + static PyObject *t_Class_get__parameters_(t_Class *self, void *data) + { + return typeParameters(self->parameters, sizeof(self->parameters)); + } +#endif + } +} diff --git a/jcc/_jcc/java/lang/Class.h b/jcc/_jcc/java/lang/Class.h new file mode 100644 index 0000000..1557661 --- /dev/null +++ b/jcc/_jcc/java/lang/Class.h @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Class_H +#define _Class_H + +#include +#include "JArray.h" +#include "java/lang/Object.h" + +namespace java { + namespace lang { + namespace reflect { + class Method; + class Constructor; + class Field; +#ifdef _java_generics + class Type; + class TypeVariable; +#endif + } + + using namespace reflect; + + class Class : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Class(jobject obj) : Object(obj) { + initializeClass(); + } + Class(const Class& obj) : Object(obj) {} + + static Class forName(const String& className); + JArray getDeclaredMethods() const; + JArray getMethods() const; + Method getMethod(const String &name, const JArray& params) const; + Method getDeclaredMethod(const String &name, const JArray& params) const; + JArray getDeclaredConstructors() const; + JArray getDeclaredFields() const; + JArray getDeclaredClasses() const; + int isArray() const; + int isPrimitive() const; + int isInterface() const; + int isAssignableFrom(const Class& obj) const; + Class getComponentType() const; + Class getSuperclass() const; + Class getDeclaringClass() const; + Class getEnclosingClass() const; + JArray getInterfaces() const; + String getName() const; + int getModifiers() const; + int isInstance(const Object &obj) const; +#ifdef _java_generics + JArray getTypeParameters() const; + JArray getGenericInterfaces() const; + Type getGenericSuperclass() const; +#endif + }; + + extern PyTypeObject PY_TYPE(Class); + + class t_Class { + public: + PyObject_HEAD + Class object; +#ifdef _java_generics + PyTypeObject *parameters[1]; + static PyTypeObject **parameters_(t_Class *self) + { + return (PyTypeObject **) &(self->parameters); + } +#endif + static PyObject *wrap_Object(const Class& object); +#ifdef _java_generics + static PyObject *wrap_Object(const Class& object, PyTypeObject *T); +#endif + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Class_H */ diff --git a/jcc/_jcc/java/lang/Double.cpp b/jcc/_jcc/java/lang/Double.cpp new file mode 100644 index 0000000..e0b204e --- /dev/null +++ b/jcc/_jcc/java/lang/Double.cpp @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Double.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_doubleValue, + max_mid + }; + + Class *Double::class$ = NULL; + jmethodID *Double::_mids = NULL; + + jclass Double::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Double"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "(D)V"); + _mids[mid_doubleValue] = + env->getMethodID(cls, "doubleValue", "()D"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + Double::Double(jdouble n) : Object(env->newObject(initializeClass, &_mids, mid__init_, n)) { + } + + jdouble Double::doubleValue() const + { + return env->callDoubleMethod(this$, _mids[mid_doubleValue]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Double__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Double, t_Double, Object, java::lang::Double, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Double.h b/jcc/_jcc/java/lang/Double.h new file mode 100644 index 0000000..3bb5808 --- /dev/null +++ b/jcc/_jcc/java/lang/Double.h @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Double_H +#define _Double_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" + +namespace java { + namespace lang { + + class Double : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Double(jobject obj) : Object(obj) { + initializeClass(); + } + Double(jdouble); + + jdouble doubleValue() const; + }; + + extern PyTypeObject PY_TYPE(Double); + + class t_Double { + public: + PyObject_HEAD + Double object; + static PyObject *wrap_Object(const Double& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Double_H */ diff --git a/jcc/_jcc/java/lang/Exception.cpp b/jcc/_jcc/java/lang/Exception.cpp new file mode 100644 index 0000000..8b45f5c --- /dev/null +++ b/jcc/_jcc/java/lang/Exception.cpp @@ -0,0 +1,60 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Exception.h" + +namespace java { + namespace lang { + + enum { + max_mid + }; + + Class *Exception::class$ = NULL; + jmethodID *Exception::_mids = NULL; + + jclass Exception::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Exception"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Exception__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Exception, t_Exception, Throwable, Exception, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Exception.h b/jcc/_jcc/java/lang/Exception.h new file mode 100644 index 0000000..2f214a4 --- /dev/null +++ b/jcc/_jcc/java/lang/Exception.h @@ -0,0 +1,49 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Exception_H +#define _Exception_H + +#include +#include "java/lang/Class.h" +#include "java/lang/Throwable.h" +#include "JArray.h" + +namespace java { + namespace lang { + + class Exception : public Throwable { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Exception(jobject obj) : Throwable(obj) { + initializeClass(); + } + }; + + extern PyTypeObject PY_TYPE(Exception); + + class t_Exception { + public: + PyObject_HEAD + Exception object; + static PyObject *wrap_Object(const Exception& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Exception_H */ diff --git a/jcc/_jcc/java/lang/Float.cpp b/jcc/_jcc/java/lang/Float.cpp new file mode 100644 index 0000000..18b9925 --- /dev/null +++ b/jcc/_jcc/java/lang/Float.cpp @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Float.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_floatValue, + max_mid + }; + + Class *Float::class$ = NULL; + jmethodID *Float::_mids = NULL; + + jclass Float::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Float"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "(F)V"); + _mids[mid_floatValue] = + env->getMethodID(cls, "floatValue", "()F"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + Float::Float(jfloat f) : Object(env->newObject(initializeClass, &_mids, mid__init_, f)) { + } + + jfloat Float::floatValue() const + { + return env->callFloatMethod(this$, _mids[mid_floatValue]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Float__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Float, t_Float, Object, java::lang::Float, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Float.h b/jcc/_jcc/java/lang/Float.h new file mode 100644 index 0000000..4848f55 --- /dev/null +++ b/jcc/_jcc/java/lang/Float.h @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Float_H +#define _Float_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" + +namespace java { + namespace lang { + + class Float : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Float(jobject obj) : Object(obj) { + initializeClass(); + } + Float(jfloat); + + jfloat floatValue() const; + }; + + extern PyTypeObject PY_TYPE(Float); + + class t_Float { + public: + PyObject_HEAD + Float object; + static PyObject *wrap_Object(const Float& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Float_H */ diff --git a/jcc/_jcc/java/lang/Integer.cpp b/jcc/_jcc/java/lang/Integer.cpp new file mode 100644 index 0000000..6c728d5 --- /dev/null +++ b/jcc/_jcc/java/lang/Integer.cpp @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Integer.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_intValue, + max_mid + }; + + Class *Integer::class$ = NULL; + jmethodID *Integer::_mids = NULL; + + jclass Integer::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Integer"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "(I)V"); + _mids[mid_intValue] = + env->getMethodID(cls, "intValue", "()I"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + Integer::Integer(jint n) : Object(env->newObject(initializeClass, &_mids, mid__init_, n)) { + } + + jint Integer::intValue() const + { + return env->callIntMethod(this$, _mids[mid_intValue]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Integer__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Integer, t_Integer, Object, java::lang::Integer, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Integer.h b/jcc/_jcc/java/lang/Integer.h new file mode 100644 index 0000000..da40608 --- /dev/null +++ b/jcc/_jcc/java/lang/Integer.h @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Integer_H +#define _Integer_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" + +namespace java { + namespace lang { + + class Integer : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Integer(jobject obj) : Object(obj) { + initializeClass(); + } + Integer(jint); + + jint intValue() const; + }; + + extern PyTypeObject PY_TYPE(Integer); + + class t_Integer { + public: + PyObject_HEAD + Integer object; + static PyObject *wrap_Object(const Integer& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Integer_H */ diff --git a/jcc/_jcc/java/lang/Long.cpp b/jcc/_jcc/java/lang/Long.cpp new file mode 100644 index 0000000..434b95a --- /dev/null +++ b/jcc/_jcc/java/lang/Long.cpp @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Long.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_longValue, + max_mid + }; + + Class *Long::class$ = NULL; + jmethodID *Long::_mids = NULL; + + jclass Long::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Long"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "(J)V"); + _mids[mid_longValue] = + env->getMethodID(cls, "longValue", "()J"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + Long::Long(jlong n) : Object(env->newObject(initializeClass, &_mids, mid__init_, n)) { + } + + jlong Long::longValue() const + { + return env->callLongMethod(this$, _mids[mid_longValue]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Long__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Long, t_Long, Object, java::lang::Long, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Long.h b/jcc/_jcc/java/lang/Long.h new file mode 100644 index 0000000..15eaf5d --- /dev/null +++ b/jcc/_jcc/java/lang/Long.h @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Long_H +#define _Long_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" + +namespace java { + namespace lang { + + class Long : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Long(jobject obj) : Object(obj) { + initializeClass(); + } + Long(jlong); + + jlong longValue() const; + }; + + extern PyTypeObject PY_TYPE(Long); + + class t_Long { + public: + PyObject_HEAD + Long object; + static PyObject *wrap_Object(const Long& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Long_H */ diff --git a/jcc/_jcc/java/lang/Object.cpp b/jcc/_jcc/java/lang/Object.cpp new file mode 100644 index 0000000..858a3ff --- /dev/null +++ b/jcc/_jcc/java/lang/Object.cpp @@ -0,0 +1,145 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" + +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/String.h" + +namespace java { + namespace lang { + enum { + mid__init_, + mid_toString, + mid_getClass, + mid_hashCode, + mid_equals, + max_mid + }; + + Class *Object::class$ = NULL; + jmethodID *Object::mids$ = NULL; + + jclass Object::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Object"); + + mids$ = new jmethodID[max_mid]; + mids$[mid__init_] = env->getMethodID(cls, "", + "()V"); + mids$[mid_toString] = env->getMethodID(cls, "toString", + "()Ljava/lang/String;"); + mids$[mid_getClass] = env->getMethodID(cls, "getClass", + "()Ljava/lang/Class;"); + mids$[mid_hashCode] = env->getMethodID(cls, "hashCode", + "()I"); + mids$[mid_equals] = env->getMethodID(cls, "equals", + "(Ljava/lang/Object;)Z"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + Object::Object() : JObject(env->newObject(initializeClass, &mids$, mid__init_)) { + } + + String Object::toString() const + { + return String(env->callObjectMethod(this$, mids$[mid_toString])); + } + + Class Object::getClass() const + { + return Class(env->callObjectMethod(this$, mids$[mid_getClass])); + } + + int Object::hashCode() const + { + return env->callIntMethod(this$, mids$[mid_hashCode]); + } + + jboolean Object::equals(const Object& a0) const + { + return env->callBooleanMethod(this$, mids$[mid_equals], a0.this$); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static int t_Object_init(t_Object *self, + PyObject *args, PyObject *kwds); + static PyObject *t_Object_getClass(t_Object *self); + static PyObject *t_Object_equals(t_Object *self, PyObject *arg); + + static PyMethodDef t_Object__methods_[] = { + DECLARE_METHOD(t_Object, getClass, METH_NOARGS), + DECLARE_METHOD(t_Object, equals, METH_O), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Object, t_Object, JObject, java::lang::Object, + t_Object_init, 0, 0, 0, 0, 0); + + static int t_Object_init(t_Object *self, + PyObject *args, PyObject *kwds) + { + switch (PyTuple_Size(args)) { + case 0: + INT_CALL(self->object = Object()); + break; + default: + PyErr_SetString(PyExc_ValueError, "invalid args"); + return -1; + } + + return 0; + } + + static PyObject *t_Object_getClass(t_Object *self) + { + Class cls((jobject) NULL); + + OBJ_CALL(cls = self->object.getClass()); + return t_Class::wrap_Object(cls); + } + + static PyObject *t_Object_equals(t_Object *self, PyObject *arg) + { + Object a0((jobject) NULL); + jboolean result; + + if (!parseArg(arg, "o", &a0)) + { + OBJ_CALL(result = self->object.equals(a0)); + Py_RETURN_BOOL(result); + } + + PyErr_SetArgsError((PyObject *) self, "equals", arg); + return NULL; + } + } +} diff --git a/jcc/_jcc/java/lang/Object.h b/jcc/_jcc/java/lang/Object.h new file mode 100644 index 0000000..c36bca8 --- /dev/null +++ b/jcc/_jcc/java/lang/Object.h @@ -0,0 +1,56 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Object_H +#define _Object_H + +#include +#include "JObject.h" + +namespace java { + namespace lang { + class Class; + class String; + + class Object : public JObject { + public: + static Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit Object(); + explicit Object(jobject obj) : JObject(obj) { + initializeClass(); + } + + String toString() const; + Class getClass() const; + int hashCode() const; + jboolean equals(const Object& obj) const; + }; + + extern PyTypeObject PY_TYPE(Object); + + class t_Object { + public: + PyObject_HEAD + Object object; + static PyObject *wrap_Object(const Object& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + + +#endif /* _Object_H */ diff --git a/jcc/_jcc/java/lang/RuntimeException.cpp b/jcc/_jcc/java/lang/RuntimeException.cpp new file mode 100644 index 0000000..cdbfb90 --- /dev/null +++ b/jcc/_jcc/java/lang/RuntimeException.cpp @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/RuntimeException.h" + +namespace java { + namespace lang { + + enum { + mid_printStackTrace, + max_mid + }; + + Class *RuntimeException::class$ = NULL; + jmethodID *RuntimeException::_mids = NULL; + + jclass RuntimeException::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/RuntimeException"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + + static PyMethodDef t_RuntimeException__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(RuntimeException, t_RuntimeException, Object, + java::lang::RuntimeException, abstract_init, + 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/RuntimeException.h b/jcc/_jcc/java/lang/RuntimeException.h new file mode 100644 index 0000000..9a13d90 --- /dev/null +++ b/jcc/_jcc/java/lang/RuntimeException.h @@ -0,0 +1,49 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _RuntimeException_H +#define _RuntimeException_H + +#include +#include "java/lang/Class.h" +#include "java/lang/Exception.h" +#include "JArray.h" + +namespace java { + namespace lang { + + class RuntimeException : public Exception { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit RuntimeException(jobject obj) : Exception(obj) { + initializeClass(); + } + }; + + extern PyTypeObject PY_TYPE(RuntimeException); + + class t_RuntimeException { + public: + PyObject_HEAD + RuntimeException object; + static PyObject *wrap_Object(const RuntimeException& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _RuntimeException_H */ diff --git a/jcc/_jcc/java/lang/Short.cpp b/jcc/_jcc/java/lang/Short.cpp new file mode 100644 index 0000000..eab1537 --- /dev/null +++ b/jcc/_jcc/java/lang/Short.cpp @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/Short.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_shortValue, + max_mid + }; + + Class *Short::class$ = NULL; + jmethodID *Short::_mids = NULL; + + jclass Short::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Short"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = env->getMethodID(cls, "", "(S)V"); + _mids[mid_shortValue] = + env->getMethodID(cls, "shortValue", "()S"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + Short::Short(jshort n) : Object(env->newObject(initializeClass, &_mids, mid__init_, n)) { + } + + jshort Short::shortValue() const + { + return env->callShortMethod(this$, _mids[mid_shortValue]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyMethodDef t_Short__methods_[] = { + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Short, t_Short, Object, java::lang::Short, + abstract_init, 0, 0, 0, 0, 0); + } +} diff --git a/jcc/_jcc/java/lang/Short.h b/jcc/_jcc/java/lang/Short.h new file mode 100644 index 0000000..51db909 --- /dev/null +++ b/jcc/_jcc/java/lang/Short.h @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Short_H +#define _Short_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" + +namespace java { + namespace lang { + + class Short : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Short(jobject obj) : Object(obj) { + initializeClass(); + } + Short(jshort); + + jshort shortValue() const; + }; + + extern PyTypeObject PY_TYPE(Short); + + class t_Short { + public: + PyObject_HEAD + Short object; + static PyObject *wrap_Object(const Short& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Short_H */ diff --git a/jcc/_jcc/java/lang/String.cpp b/jcc/_jcc/java/lang/String.cpp new file mode 100644 index 0000000..b2636e1 --- /dev/null +++ b/jcc/_jcc/java/lang/String.cpp @@ -0,0 +1,118 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/String.h" + +namespace java { + namespace lang { + + enum { + mid__init_, + mid_toString, + mid_length, + max_mid + }; + + Class *String::class$ = NULL; + jmethodID *String::_mids = NULL; + + jclass String::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/String"); + + _mids = new jmethodID[max_mid]; + _mids[mid__init_] = + env->getMethodID(cls, "", + "()V"); + _mids[mid_toString] = + env->getMethodID(cls, "toString", + "()Ljava/lang/String;"); + _mids[mid_length] = + env->getMethodID(cls, "length", + "()I"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + String::String() : Object(env->newObject(initializeClass, &_mids, mid__init_)) { + } + + int String::length() const + { + return env->callIntMethod(this$, _mids[mid_length]); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static int t_String_init(t_String *self, + PyObject *args, PyObject *kwds); + static PyObject *t_String_length(t_String *self); + + static PyMethodDef t_String__methods_[] = { + DECLARE_METHOD(t_String, length, METH_NOARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(String, t_String, Object, java::lang::String, + t_String_init, 0, 0, 0, 0, 0); + + static int t_String_init(t_String *self, + PyObject *args, PyObject *kwds) + { + char *bytes; + + switch (PyTuple_Size(args)) { + case 0: + INT_CALL(self->object = String()); + break; + case 1: + if (!PyArg_ParseTuple(args, "s", &bytes)) + return -1; + INT_CALL(self->object = String(env->fromUTF(bytes))); + break; + default: + PyErr_SetString(PyExc_ValueError, "invalid args"); + return -1; + } + + return 0; + } + + static PyObject *t_String_length(t_String *self) + { + jint length; + + OBJ_CALL(length = self->object.length()); + return PyInt_FromLong(length); + } + } +} diff --git a/jcc/_jcc/java/lang/String.h b/jcc/_jcc/java/lang/String.h new file mode 100644 index 0000000..d9d432b --- /dev/null +++ b/jcc/_jcc/java/lang/String.h @@ -0,0 +1,56 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _String_H +#define _String_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "JArray.h" + +namespace java { + namespace lang { + + class String : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit String(jobject obj) : Object(obj) { + initializeClass(); + } + String(); + String(const String& obj) : Object(obj) {} + + String toString() const { + return *this; + } + int length() const; + }; + + extern PyTypeObject PY_TYPE(String); + + class t_String { + public: + PyObject_HEAD + String object; + static PyObject *wrap_Object(const String& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _String_H */ diff --git a/jcc/_jcc/java/lang/Throwable.cpp b/jcc/_jcc/java/lang/Throwable.cpp new file mode 100644 index 0000000..a24d4d3 --- /dev/null +++ b/jcc/_jcc/java/lang/Throwable.cpp @@ -0,0 +1,120 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/String.h" +#include "java/lang/Throwable.h" +#include "java/io/PrintWriter.h" + +namespace java { + namespace lang { + + enum { + mid_printStackTrace_0, + mid_printStackTrace_1, + mid_getMessage, + max_mid + }; + + Class *Throwable::class$ = NULL; + jmethodID *Throwable::_mids = NULL; + + jclass Throwable::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/Throwable"); + + _mids = new jmethodID[max_mid]; + _mids[mid_printStackTrace_0] = + env->getMethodID(cls, "printStackTrace", + "()V"); + _mids[mid_printStackTrace_1] = + env->getMethodID(cls, "printStackTrace", + "(Ljava/io/PrintWriter;)V"); + _mids[mid_getMessage] = + env->getMethodID(cls, "getMessage", + "()Ljava/lang/String;"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + void Throwable::printStackTrace() const + { + env->callVoidMethod(this$, _mids[mid_printStackTrace_0]); + } + + void Throwable::printStackTrace(java::io::PrintWriter writer) const + { + env->callVoidMethod(this$, _mids[mid_printStackTrace_1], + writer.this$); + } + + String Throwable::getMessage() const + { + return String(env->callObjectMethod(this$, _mids[mid_getMessage])); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + + static PyObject *t_Throwable_printStackTrace(t_Throwable *self, + PyObject *args); + + static PyMethodDef t_Throwable__methods_[] = { + DECLARE_METHOD(t_Throwable, printStackTrace, METH_VARARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Throwable, t_Throwable, Object, Throwable, + abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_Throwable_printStackTrace(t_Throwable *self, + PyObject *args) + { + switch (PyTuple_Size(args)) { + case 0: + OBJ_CALL(self->object.printStackTrace()); + Py_RETURN_NONE; + case 1: + { + java::io::PrintWriter writer((jobject) NULL); + + if (!parseArgs(args, "j", java::io::PrintWriter::class$, + &writer)) + { + OBJ_CALL(self->object.printStackTrace(writer)); + Py_RETURN_NONE; + } + } + default: + PyErr_SetString(PyExc_ValueError, "invalid args"); + return NULL; + } + } + } +} diff --git a/jcc/_jcc/java/lang/Throwable.h b/jcc/_jcc/java/lang/Throwable.h new file mode 100644 index 0000000..7132bb5 --- /dev/null +++ b/jcc/_jcc/java/lang/Throwable.h @@ -0,0 +1,59 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Throwable_H +#define _Throwable_H + +#include +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "JArray.h" + +namespace java { + + namespace io { + class PrintWriter; + } + + namespace lang { + class String; + + class Throwable : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Throwable(jobject obj) : Object(obj) { + initializeClass(); + } + + void printStackTrace() const; + void printStackTrace(java::io::PrintWriter) const; + String getMessage() const; + }; + + extern PyTypeObject PY_TYPE(Throwable); + + class t_Throwable { + public: + PyObject_HEAD + Throwable object; + static PyObject *wrap_Object(const Throwable& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Throwable_H */ diff --git a/jcc/_jcc/java/lang/__init__.cpp b/jcc/_jcc/java/lang/__init__.cpp new file mode 100644 index 0000000..8ead7f0 --- /dev/null +++ b/jcc/_jcc/java/lang/__init__.cpp @@ -0,0 +1,59 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "macros.h" + +namespace java { + namespace lang { + + extern PyTypeObject PY_TYPE(Object); + extern PyTypeObject PY_TYPE(String); + extern PyTypeObject PY_TYPE(Class); + extern PyTypeObject PY_TYPE(Throwable); + extern PyTypeObject PY_TYPE(Exception); + extern PyTypeObject PY_TYPE(RuntimeException); + extern PyTypeObject PY_TYPE(Boolean); + extern PyTypeObject PY_TYPE(Byte); + extern PyTypeObject PY_TYPE(Character); + extern PyTypeObject PY_TYPE(Integer); + extern PyTypeObject PY_TYPE(Double); + extern PyTypeObject PY_TYPE(Float); + extern PyTypeObject PY_TYPE(Long); + extern PyTypeObject PY_TYPE(Short); + + namespace reflect { + void __install__(PyObject *module); + } + + void __install__(PyObject *m) + { + INSTALL_TYPE(Object, m); + INSTALL_TYPE(String, m); + INSTALL_TYPE(Class, m); + INSTALL_TYPE(Throwable, m); + INSTALL_TYPE(Exception, m); + INSTALL_TYPE(RuntimeException, m); + INSTALL_TYPE(Boolean, m); + INSTALL_TYPE(Byte, m); + INSTALL_TYPE(Character, m); + INSTALL_TYPE(Double, m); + INSTALL_TYPE(Float, m); + INSTALL_TYPE(Integer, m); + INSTALL_TYPE(Long, m); + INSTALL_TYPE(Short, m); + reflect::__install__(m); + } + } +} diff --git a/jcc/_jcc/java/lang/reflect/Constructor.cpp b/jcc/_jcc/java/lang/reflect/Constructor.cpp new file mode 100644 index 0000000..0849d27 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Constructor.cpp @@ -0,0 +1,206 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "JArray.h" + +#include "java/lang/Class.h" +#include "java/lang/Object.h" +#include "java/lang/String.h" +#include "java/lang/reflect/Constructor.h" +#ifdef _java_generics +#include "java/lang/reflect/Type.h" +#include "java/lang/reflect/TypeVariable.h" +#endif + +namespace java { + namespace lang { + namespace reflect { + + enum { + mid_getModifiers, + mid_getSignature, + mid_getParameterTypes, + mid_getExceptionTypes, +#ifdef _java_generics + mid_getTypeParameters, + mid_getGenericExceptionTypes, + mid_getGenericParameterTypes, +#endif + max_mid + }; + + Class *Constructor::class$ = NULL; + jmethodID *Constructor::_mids = NULL; + + jclass Constructor::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/reflect/Constructor"); + + _mids = new jmethodID[max_mid]; + _mids[mid_getModifiers] = + env->getMethodID(cls, "getModifiers", + "()I"); + _mids[mid_getParameterTypes] = + env->getMethodID(cls, "getParameterTypes", + "()[Ljava/lang/Class;"); + _mids[mid_getExceptionTypes] = + env->getMethodID(cls, "getExceptionTypes", + "()[Ljava/lang/Class;"); + +#ifdef _java_generics + _mids[mid_getTypeParameters] = + env->getMethodID(cls, "getTypeParameters", + "()[Ljava/lang/reflect/TypeVariable;"); + _mids[mid_getGenericExceptionTypes] = + env->getMethodID(cls, "getGenericExceptionTypes", + "()[Ljava/lang/reflect/Type;"); + _mids[mid_getGenericParameterTypes] = + env->getMethodID(cls, "getGenericParameterTypes", + "()[Ljava/lang/reflect/Type;"); +#endif + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + int Constructor::getModifiers() const + { + return env->callIntMethod(this$, _mids[mid_getModifiers]); + } + + JArray Constructor::getParameterTypes() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getParameterTypes]); + + return JArray(array); + } + + JArray Constructor::getExceptionTypes() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getExceptionTypes]); + + return JArray(array); + } + +#ifdef _java_generics + JArray Constructor::getTypeParameters() const + { + return JArray(env->callObjectMethod(this$, _mids[mid_getTypeParameters])); + } + + JArray Constructor::getGenericExceptionTypes() const + { + return JArray(env->callObjectMethod(this$, _mids[mid_getGenericExceptionTypes])); + } + + JArray Constructor::getGenericParameterTypes() const + { + return JArray(env->callObjectMethod(this$, _mids[mid_getGenericParameterTypes])); + } +#endif + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + + static PyObject *t_Constructor_getModifiers(t_Constructor *self); + static PyObject *t_Constructor_getParameterTypes(t_Constructor *self); + static PyObject *t_Constructor_getExceptionTypes(t_Constructor *self); +#ifdef _java_generics + static PyObject *t_Constructor_getTypeParameters(t_Constructor *self); + static PyObject *t_Constructor_getGenericExceptionTypes(t_Constructor *self); + static PyObject *t_Constructor_getGenericParameterTypes(t_Constructor *self); +#endif + + static PyMethodDef t_Constructor__methods_[] = { + DECLARE_METHOD(t_Constructor, getModifiers, METH_NOARGS), + DECLARE_METHOD(t_Constructor, getParameterTypes, METH_NOARGS), + DECLARE_METHOD(t_Constructor, getExceptionTypes, METH_NOARGS), +#ifdef _java_generics + DECLARE_METHOD(t_Constructor, getTypeParameters, METH_NOARGS), + DECLARE_METHOD(t_Constructor, getGenericExceptionTypes, METH_NOARGS), + DECLARE_METHOD(t_Constructor, getGenericParameterTypes, METH_NOARGS), +#endif + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Constructor, t_Constructor, Object, Constructor, + abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_Constructor_getModifiers(t_Constructor *self) + { + jint modifiers; + + OBJ_CALL(modifiers = self->object.getModifiers()); + return PyInt_FromLong(modifiers); + } + + static PyObject *t_Constructor_getParameterTypes(t_Constructor *self) + { + JArray types((jobject) NULL); + OBJ_CALL(types = self->object.getParameterTypes()); + return types.toSequence(t_Class::wrap_Object); + } + + static PyObject *t_Constructor_getExceptionTypes(t_Constructor *self) + { + JArray types((jobject) NULL); + OBJ_CALL(types = self->object.getExceptionTypes()); + return types.toSequence(t_Class::wrap_Object); + } + +#ifdef _java_generics + static PyObject *t_Constructor_getTypeParameters(t_Constructor *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getTypeParameters()); + + return result.toSequence(t_TypeVariable::wrap_Object); + } + + static PyObject *t_Constructor_getGenericExceptionTypes(t_Constructor *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericExceptionTypes()); + + return result.toSequence(t_Type::wrap_Object); + } + + static PyObject *t_Constructor_getGenericParameterTypes(t_Constructor *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericParameterTypes()); + + return result.toSequence(t_Type::wrap_Object); + } +#endif + } + } +} diff --git a/jcc/_jcc/java/lang/reflect/Constructor.h b/jcc/_jcc/java/lang/reflect/Constructor.h new file mode 100644 index 0000000..a992675 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Constructor.h @@ -0,0 +1,66 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Constructor_H +#define _Constructor_H + +#include +#include "JArray.h" + +namespace java { + namespace lang { + class Class; + class String; + + namespace reflect { +#ifdef _java_generics + class Type; + class TypeVariable; +#endif + + class Constructor : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Constructor(jobject obj) : Object(obj) { + initializeClass(); + } + Constructor(const Constructor& obj) : Object(obj) {} + + int getModifiers() const; + JArray getParameterTypes() const; + JArray getExceptionTypes() const; +#ifdef _java_generics + JArray getTypeParameters() const; + JArray getGenericExceptionTypes() const; + JArray getGenericParameterTypes() const; +#endif + }; + + extern PyTypeObject PY_TYPE(Constructor); + + class t_Constructor { + public: + PyObject_HEAD + Constructor object; + static PyObject *wrap_Object(const Constructor& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } + } +} + +#endif /* _Constructor_H */ diff --git a/jcc/_jcc/java/lang/reflect/Field.cpp b/jcc/_jcc/java/lang/reflect/Field.cpp new file mode 100644 index 0000000..0718156 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Field.cpp @@ -0,0 +1,159 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Class.h" +#include "java/lang/Object.h" +#include "java/lang/String.h" +#include "java/lang/reflect/Field.h" +#ifdef _java_generics +#include "java/lang/reflect/Type.h" +#endif + +namespace java { + namespace lang { + namespace reflect { + + enum { + mid_getModifiers, + mid_getType, + mid_getName, +#ifdef _java_generics + mid_getGenericType, +#endif + max_mid + }; + + Class *Field::class$ = NULL; + jmethodID *Field::_mids = NULL; + + jclass Field::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/reflect/Field"); + + _mids = new jmethodID[max_mid]; + _mids[mid_getModifiers] = + env->getMethodID(cls, "getModifiers", + "()I"); + _mids[mid_getType] = + env->getMethodID(cls, "getType", + "()Ljava/lang/Class;"); + _mids[mid_getName] = + env->getMethodID(cls, "getName", + "()Ljava/lang/String;"); +#ifdef _java_generics + _mids[mid_getGenericType] = + env->getMethodID(cls, "getGenericType", + "()Ljava/lang/reflect/Type;"); +#endif + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + int Field::getModifiers() const + { + return env->callIntMethod(this$, _mids[mid_getModifiers]); + } + + Class Field::getType() const + { + return Class(env->callObjectMethod(this$, _mids[mid_getType])); + } + + String Field::getName() const + { + return String(env->callObjectMethod(this$, _mids[mid_getName])); + } + +#ifdef _java_generics + Type Field::getGenericType() const + { + return Type(env->callObjectMethod(this$, _mids[mid_getGenericType])); + } +#endif + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + + static PyObject *t_Field_getModifiers(t_Field *self); + static PyObject *t_Field_getType(t_Field *self); + static PyObject *t_Field_getName(t_Field *self); +#ifdef _java_generics + static PyObject *t_Field_getGenericType(t_Field *self); +#endif + + static PyMethodDef t_Field__methods_[] = { + DECLARE_METHOD(t_Field, getModifiers, METH_NOARGS), + DECLARE_METHOD(t_Field, getType, METH_NOARGS), + DECLARE_METHOD(t_Field, getName, METH_NOARGS), +#ifdef _java_generics + DECLARE_METHOD(t_Field, getGenericType, METH_NOARGS), +#endif + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Field, t_Field, Object, Field, + abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_Field_getModifiers(t_Field *self) + { + jint modifiers; + + OBJ_CALL(modifiers = self->object.getModifiers()); + return PyInt_FromLong(modifiers); + } + + static PyObject *t_Field_getType(t_Field *self) + { + Class cls((jobject) NULL); + + OBJ_CALL(cls = self->object.getType()); + return t_Class::wrap_Object(cls); + } + + static PyObject *t_Field_getName(t_Field *self) + { + String name((jobject) NULL); + + OBJ_CALL(name = self->object.getName()); + return j2p(name); + } + +#ifdef _java_generics + static PyObject *t_Field_getGenericType(t_Field *self) + { + Type result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericType()); + + return t_Type::wrap_Object(result); + } +#endif + } + } +} diff --git a/jcc/_jcc/java/lang/reflect/Field.h b/jcc/_jcc/java/lang/reflect/Field.h new file mode 100644 index 0000000..478afba --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Field.h @@ -0,0 +1,62 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Field_H +#define _Field_H + +#include + +namespace java { + namespace lang { + class Class; + class String; + + namespace reflect { +#ifdef _java_generics + class Type; +#endif + + class Field : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Field(jobject obj) : Object(obj) { + initializeClass(); + } + Field(const Field& obj) : Object(obj) {} + + int getModifiers() const; + Class getType() const; + String getName() const; +#ifdef _java_generics + Type getGenericType() const; +#endif + }; + + extern PyTypeObject PY_TYPE(Field); + + class t_Field { + public: + PyObject_HEAD + Field object; + static PyObject *wrap_Object(const Field& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } + } +} + +#endif /* _Field_H */ diff --git a/jcc/_jcc/java/lang/reflect/GenericArrayType.cpp b/jcc/_jcc/java/lang/reflect/GenericArrayType.cpp new file mode 100644 index 0000000..b725421 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/GenericArrayType.cpp @@ -0,0 +1,82 @@ +#ifdef _java_generics + +#include +#include "JCCEnv.h" +#include "java/lang/reflect/GenericArrayType.h" +#include "java/lang/Class.h" +#include "JArray.h" + +namespace java { + namespace lang { + namespace reflect { + + java::lang::Class *GenericArrayType::class$ = NULL; + jmethodID *GenericArrayType::mids$ = NULL; + + jclass GenericArrayType::initializeClass() + { + if (!class$) + { + + jclass cls = (jclass) env->findClass("java/lang/reflect/GenericArrayType"); + + mids$ = new jmethodID[max_mid]; + mids$[mid_getGenericComponentType_86037cf0] = env->getMethodID(cls, "getGenericComponentType", "()Ljava/lang/reflect/Type;"); + + class$ = (java::lang::Class *) new JObject(cls); + } + return (jclass) class$->this$; + } + + java::lang::reflect::Type GenericArrayType::getGenericComponentType() const + { + return java::lang::reflect::Type(env->callObjectMethod(this$, mids$[mid_getGenericComponentType_86037cf0])); + } + } + } +} + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + static PyObject *t_GenericArrayType_cast_(PyTypeObject *type, PyObject *arg); + static PyObject *t_GenericArrayType_instance_(PyTypeObject *type, PyObject *arg); + static PyObject *t_GenericArrayType_getGenericComponentType(t_GenericArrayType *self); + + static PyMethodDef t_GenericArrayType__methods_[] = { + DECLARE_METHOD(t_GenericArrayType, cast_, METH_O | METH_CLASS), + DECLARE_METHOD(t_GenericArrayType, instance_, METH_O | METH_CLASS), + DECLARE_METHOD(t_GenericArrayType, getGenericComponentType, METH_NOARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(GenericArrayType, t_GenericArrayType, java::lang::reflect::Type, GenericArrayType, abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_GenericArrayType_cast_(PyTypeObject *type, PyObject *arg) + { + if (!(arg = castCheck(arg, GenericArrayType::initializeClass, 1))) + return NULL; + return t_GenericArrayType::wrap_Object(GenericArrayType(((t_GenericArrayType *) arg)->object.this$)); + } + static PyObject *t_GenericArrayType_instance_(PyTypeObject *type, PyObject *arg) + { + if (!castCheck(arg, GenericArrayType::initializeClass, 0)) + Py_RETURN_FALSE; + Py_RETURN_TRUE; + } + + static PyObject *t_GenericArrayType_getGenericComponentType(t_GenericArrayType *self) + { + java::lang::reflect::Type result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericComponentType()); + return java::lang::reflect::t_Type::wrap_Object(result); + } + } + } +} + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/GenericArrayType.h b/jcc/_jcc/java/lang/reflect/GenericArrayType.h new file mode 100644 index 0000000..658b2d1 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/GenericArrayType.h @@ -0,0 +1,62 @@ +#ifdef _java_generics + +#ifndef java_lang_reflect_GenericArrayType_H +#define java_lang_reflect_GenericArrayType_H + +#include "java/lang/reflect/Type.h" + +namespace java { + namespace lang { + class Class; + } +} +template class JArray; + +namespace java { + namespace lang { + namespace reflect { + + class GenericArrayType : public java::lang::reflect::Type { + public: + enum { + mid_getGenericComponentType_86037cf0, + max_mid + }; + + static java::lang::Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit GenericArrayType(jobject obj) : java::lang::reflect::Type(obj) { + if (obj != NULL) + initializeClass(); + } + GenericArrayType(const GenericArrayType& obj) : java::lang::reflect::Type(obj) {} + + java::lang::reflect::Type getGenericComponentType() const; + }; + } + } +} + +#include + +namespace java { + namespace lang { + namespace reflect { + extern PyTypeObject PY_TYPE(GenericArrayType); + + class t_GenericArrayType { + public: + PyObject_HEAD + GenericArrayType object; + static PyObject *wrap_Object(const GenericArrayType&); + static PyObject *wrap_jobject(const jobject&); + }; + } + } +} + +#endif + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/GenericDeclaration.cpp b/jcc/_jcc/java/lang/reflect/GenericDeclaration.cpp new file mode 100644 index 0000000..9708751 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/GenericDeclaration.cpp @@ -0,0 +1,84 @@ +#ifdef _java_generics + +#include +#include "JCCEnv.h" +#include "java/lang/reflect/GenericDeclaration.h" +#include "java/lang/Class.h" +#include "java/lang/reflect/TypeVariable.h" +#include "JArray.h" + +namespace java { + namespace lang { + namespace reflect { + + java::lang::Class *GenericDeclaration::class$ = NULL; + jmethodID *GenericDeclaration::mids$ = NULL; + + jclass GenericDeclaration::initializeClass() + { + if (!class$) + { + + jclass cls = (jclass) env->findClass("java/lang/reflect/GenericDeclaration"); + + mids$ = new jmethodID[max_mid]; + mids$[mid_getTypeParameters_837d3468] = env->getMethodID(cls, "getTypeParameters", "()[Ljava/lang/reflect/TypeVariable;"); + + class$ = (java::lang::Class *) new JObject(cls); + } + return (jclass) class$->this$; + } + + JArray GenericDeclaration::getTypeParameters() const + { + return JArray(env->callObjectMethod(this$, mids$[mid_getTypeParameters_837d3468])); + } + } + } +} + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + static PyObject *t_GenericDeclaration_cast_(PyTypeObject *type, PyObject *arg); + static PyObject *t_GenericDeclaration_instance_(PyTypeObject *type, PyObject *arg); + static PyObject *t_GenericDeclaration_getTypeParameters(t_GenericDeclaration *self); + + static PyMethodDef t_GenericDeclaration__methods_[] = { + DECLARE_METHOD(t_GenericDeclaration, cast_, METH_O | METH_CLASS), + DECLARE_METHOD(t_GenericDeclaration, instance_, METH_O | METH_CLASS), + DECLARE_METHOD(t_GenericDeclaration, getTypeParameters, METH_NOARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(GenericDeclaration, t_GenericDeclaration, java::lang::Object, GenericDeclaration, abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_GenericDeclaration_cast_(PyTypeObject *type, PyObject *arg) + { + if (!(arg = castCheck(arg, GenericDeclaration::initializeClass, 1))) + return NULL; + return t_GenericDeclaration::wrap_Object(GenericDeclaration(((t_GenericDeclaration *) arg)->object.this$)); + } + static PyObject *t_GenericDeclaration_instance_(PyTypeObject *type, PyObject *arg) + { + if (!castCheck(arg, GenericDeclaration::initializeClass, 0)) + Py_RETURN_FALSE; + Py_RETURN_TRUE; + } + + static PyObject *t_GenericDeclaration_getTypeParameters(t_GenericDeclaration *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getTypeParameters()); + + return result.toSequence(t_TypeVariable::wrap_Object); + } + } + } +} + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/GenericDeclaration.h b/jcc/_jcc/java/lang/reflect/GenericDeclaration.h new file mode 100644 index 0000000..d436877 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/GenericDeclaration.h @@ -0,0 +1,65 @@ +#ifdef _java_generics + +#ifndef java_lang_reflect_GenericDeclaration_H +#define java_lang_reflect_GenericDeclaration_H + +#include "java/lang/Object.h" + +namespace java { + namespace lang { + namespace reflect { + class TypeVariable; + } + class Class; + } +} +template class JArray; + +namespace java { + namespace lang { + namespace reflect { + + class GenericDeclaration : public java::lang::Object { + public: + enum { + mid_getTypeParameters_837d3468, + max_mid + }; + + static java::lang::Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit GenericDeclaration(jobject obj) : java::lang::Object(obj) { + if (obj != NULL) + initializeClass(); + } + GenericDeclaration(const GenericDeclaration& obj) : java::lang::Object(obj) {} + + JArray getTypeParameters() const; + }; + } + } +} + +#include + +namespace java { + namespace lang { + namespace reflect { + extern PyTypeObject PY_TYPE(GenericDeclaration); + + class t_GenericDeclaration { + public: + PyObject_HEAD + GenericDeclaration object; + static PyObject *wrap_Object(const GenericDeclaration&); + static PyObject *wrap_jobject(const jobject&); + }; + } + } +} + +#endif + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/Method.cpp b/jcc/_jcc/java/lang/reflect/Method.cpp new file mode 100644 index 0000000..de8be9b --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Method.cpp @@ -0,0 +1,301 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "JArray.h" + +#include "java/lang/Class.h" +#include "java/lang/Object.h" +#include "java/lang/String.h" +#include "java/lang/reflect/Method.h" +#ifdef _java_generics +#include "java/lang/reflect/Type.h" +#include "java/lang/reflect/TypeVariable.h" +#endif + +namespace java { + namespace lang { + namespace reflect { + + enum { + mid_getModifiers, + mid_getReturnType, + mid_getName, + mid_getParameterTypes, + mid_getExceptionTypes, + mid_getDeclaringClass, +#ifdef _java_generics + mid_getTypeParameters, + mid_getGenericExceptionTypes, + mid_getGenericParameterTypes, + mid_getGenericReturnType, +#endif + max_mid + }; + + Class *Method::class$ = NULL; + jmethodID *Method::_mids = NULL; + + jclass Method::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/reflect/Method"); + + _mids = new jmethodID[max_mid]; + _mids[mid_getModifiers] = + env->getMethodID(cls, "getModifiers", + "()I"); + _mids[mid_getReturnType] = + env->getMethodID(cls, "getReturnType", + "()Ljava/lang/Class;"); + _mids[mid_getName] = + env->getMethodID(cls, "getName", + "()Ljava/lang/String;"); + + _mids[mid_getParameterTypes] = + env->getMethodID(cls, "getParameterTypes", + "()[Ljava/lang/Class;"); + _mids[mid_getExceptionTypes] = + env->getMethodID(cls, "getExceptionTypes", + "()[Ljava/lang/Class;"); + _mids[mid_getDeclaringClass] = + env->getMethodID(cls, "getDeclaringClass", + "()Ljava/lang/Class;"); +#ifdef _java_generics + _mids[mid_getTypeParameters] = + env->getMethodID(cls, "getTypeParameters", + "()[Ljava/lang/reflect/TypeVariable;"); + _mids[mid_getGenericExceptionTypes] = + env->getMethodID(cls, "getGenericExceptionTypes", + "()[Ljava/lang/reflect/Type;"); + _mids[mid_getGenericParameterTypes] = + env->getMethodID(cls, "getGenericParameterTypes", + "()[Ljava/lang/reflect/Type;"); + _mids[mid_getGenericReturnType] = + env->getMethodID(cls, "getGenericReturnType", + "()Ljava/lang/reflect/Type;"); +#endif + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + int Method::getModifiers() const + { + return env->callIntMethod(this$, _mids[mid_getModifiers]); + } + + Class Method::getReturnType() const + { + return Class(env->callObjectMethod(this$, _mids[mid_getReturnType])); + } + + String Method::getName() const + { + return String(env->callObjectMethod(this$, _mids[mid_getName])); + } + + JArray Method::getParameterTypes() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getParameterTypes]); + + return JArray(array); + } + + JArray Method::getExceptionTypes() const + { + jobjectArray array = (jobjectArray) + env->callObjectMethod(this$, _mids[mid_getExceptionTypes]); + + return JArray(array); + } + + Class Method::getDeclaringClass() const + { + return Class(env->callObjectMethod(this$, _mids[mid_getDeclaringClass])); + } + +#ifdef _java_generics + JArray Method::getTypeParameters() const + { + return JArray(env->callObjectMethod(this$, _mids[mid_getTypeParameters])); + } + + JArray Method::getGenericExceptionTypes() const + { + return JArray(env->callObjectMethod(this$, _mids[mid_getGenericExceptionTypes])); + } + + JArray Method::getGenericParameterTypes() const + { + return JArray(env->callObjectMethod(this$, _mids[mid_getGenericParameterTypes])); + } + + Type Method::getGenericReturnType() const + { + return Type(env->callObjectMethod(this$, _mids[mid_getGenericReturnType])); + } +#endif + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + + static PyObject *t_Method_cast_(PyTypeObject *type, PyObject *arg); + static PyObject *t_Method_instance_(PyTypeObject *type, PyObject *arg); + static PyObject *t_Method_getModifiers(t_Method *self); + static PyObject *t_Method_getReturnType(t_Method *self); + static PyObject *t_Method_getName(t_Method *self); + static PyObject *t_Method_getParameterTypes(t_Method *self); + static PyObject *t_Method_getExceptionTypes(t_Method *self); + static PyObject *t_Method_getDeclaringClass(t_Method *self); +#ifdef _java_generics + static PyObject *t_Method_getTypeParameters(t_Method *self); + static PyObject *t_Method_getGenericExceptionTypes(t_Method *self); + static PyObject *t_Method_getGenericParameterTypes(t_Method *self); + static PyObject *t_Method_getGenericReturnType(t_Method *self); +#endif + + static PyMethodDef t_Method__methods_[] = { + DECLARE_METHOD(t_Method, cast_, METH_O | METH_CLASS), + DECLARE_METHOD(t_Method, instance_, METH_O | METH_CLASS), + DECLARE_METHOD(t_Method, getModifiers, METH_NOARGS), + DECLARE_METHOD(t_Method, getReturnType, METH_NOARGS), + DECLARE_METHOD(t_Method, getName, METH_NOARGS), + DECLARE_METHOD(t_Method, getParameterTypes, METH_NOARGS), + DECLARE_METHOD(t_Method, getExceptionTypes, METH_NOARGS), + DECLARE_METHOD(t_Method, getDeclaringClass, METH_NOARGS), +#ifdef _java_generics + DECLARE_METHOD(t_Method, getTypeParameters, METH_NOARGS), + DECLARE_METHOD(t_Method, getGenericExceptionTypes, METH_NOARGS), + DECLARE_METHOD(t_Method, getGenericParameterTypes, METH_NOARGS), + DECLARE_METHOD(t_Method, getGenericReturnType, METH_NOARGS), +#endif + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Method, t_Method, Object, Method, + abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_Method_cast_(PyTypeObject *type, PyObject *arg) + { + if (!(arg = castCheck(arg, Method::initializeClass, 1))) + return NULL; + return t_Method::wrap_Object(Method(((t_Method *) arg)->object.this$)); + } + + static PyObject *t_Method_instance_(PyTypeObject *type, PyObject *arg) + { + if (!castCheck(arg, Method::initializeClass, 0)) + Py_RETURN_FALSE; + Py_RETURN_TRUE; + } + + static PyObject *t_Method_getModifiers(t_Method *self) + { + jint modifiers; + + OBJ_CALL(modifiers = self->object.getModifiers()); + return PyInt_FromLong(modifiers); + } + + static PyObject *t_Method_getReturnType(t_Method *self) + { + Class cls((jobject) NULL); + + OBJ_CALL(cls = self->object.getReturnType()); + return t_Class::wrap_Object(cls); + } + + static PyObject *t_Method_getName(t_Method *self) + { + String name((jobject) NULL); + + OBJ_CALL(name = self->object.getName()); + return j2p(name); + } + + static PyObject *t_Method_getParameterTypes(t_Method *self) + { + JArray types((jobject) NULL); + + OBJ_CALL(types = self->object.getParameterTypes()); + return types.toSequence(t_Class::wrap_Object); + } + + static PyObject *t_Method_getExceptionTypes(t_Method *self) + { + JArray types((jobject) NULL); + + OBJ_CALL(types = self->object.getExceptionTypes()); + return types.toSequence(t_Class::wrap_Object); + } + + static PyObject *t_Method_getDeclaringClass(t_Method *self) + { + Class cls((jobject) NULL); + + OBJ_CALL(cls = self->object.getDeclaringClass()); + return t_Class::wrap_Object(cls); + } + +#ifdef _java_generics + static PyObject *t_Method_getTypeParameters(t_Method *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getTypeParameters()); + + return result.toSequence(t_TypeVariable::wrap_Object); + } + + static PyObject *t_Method_getGenericExceptionTypes(t_Method *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericExceptionTypes()); + + return result.toSequence(t_Type::wrap_Object); + } + + static PyObject *t_Method_getGenericParameterTypes(t_Method *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericParameterTypes()); + + return result.toSequence(t_Type::wrap_Object); + } + + static PyObject *t_Method_getGenericReturnType(t_Method *self) + { + Type result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericReturnType()); + + return t_Type::wrap_Object(result); + } +#endif + } + } +} diff --git a/jcc/_jcc/java/lang/reflect/Method.h b/jcc/_jcc/java/lang/reflect/Method.h new file mode 100644 index 0000000..4555378 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Method.h @@ -0,0 +1,70 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Method_H +#define _Method_H + +#include +#include "JArray.h" + +namespace java { + namespace lang { + class Class; + class String; + + namespace reflect { +#ifdef _java_generics + class Type; + class TypeVariable; +#endif + + class Method : public Object { + public: + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + explicit Method(jobject obj) : Object(obj) { + initializeClass(); + } + Method(const Method& obj) : Object(obj) {} + + int getModifiers() const; + Class getReturnType() const; + String getName() const; + JArray getParameterTypes() const; + JArray getExceptionTypes() const; + Class getDeclaringClass() const; +#ifdef _java_generics + JArray getTypeParameters() const; + JArray getGenericExceptionTypes() const; + JArray getGenericParameterTypes() const; + Type getGenericReturnType() const; +#endif + }; + + extern PyTypeObject PY_TYPE(Method); + + class t_Method { + public: + PyObject_HEAD + Method object; + static PyObject *wrap_Object(const Method& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } + } +} + +#endif /* _Method_H */ diff --git a/jcc/_jcc/java/lang/reflect/Modifier.cpp b/jcc/_jcc/java/lang/reflect/Modifier.cpp new file mode 100644 index 0000000..4d76992 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Modifier.cpp @@ -0,0 +1,257 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" +#include "java/lang/Class.h" +#include "java/lang/Object.h" +#include "java/lang/String.h" +#include "java/lang/reflect/Modifier.h" + +namespace java { + namespace lang { + namespace reflect { + + enum { + mid_isPublic, + mid_isStatic, + mid_isNative, + mid_isFinal, + mid_isAbstract, + mid_isPrivate, + mid_isProtected, + max_mid + }; + + Class *Modifier::class$ = NULL; + jmethodID *Modifier::_mids = NULL; + + jclass Modifier::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/lang/reflect/Modifier"); + + _mids = new jmethodID[max_mid]; + _mids[mid_isPublic] = + env->getStaticMethodID(cls, "isPublic", + "(I)Z"); + _mids[mid_isStatic] = + env->getStaticMethodID(cls, "isStatic", + "(I)Z"); + _mids[mid_isNative] = + env->getStaticMethodID(cls, "isNative", + "(I)Z"); + _mids[mid_isFinal] = + env->getStaticMethodID(cls, "isFinal", + "(I)Z"); + _mids[mid_isAbstract] = + env->getStaticMethodID(cls, "isAbstract", + "(I)Z"); + _mids[mid_isPrivate] = + env->getStaticMethodID(cls, "isPrivate", + "(I)Z"); + _mids[mid_isProtected] = + env->getStaticMethodID(cls, "isProtected", + "(I)Z"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + int Modifier::isPublic(int mod) + { + jclass cls = initializeClass(); + return (int) env->callStaticBooleanMethod(cls, _mids[mid_isPublic], mod); + } + + int Modifier::isStatic(int mod) + { + jclass cls = initializeClass(); + return (int) env->callStaticBooleanMethod(cls, _mids[mid_isStatic], mod); + } + + int Modifier::isNative(int mod) + { + jclass cls = initializeClass(); + return (int) env->callStaticBooleanMethod(cls, _mids[mid_isNative], mod); + } + + int Modifier::isFinal(int mod) + { + jclass cls = initializeClass(); + return (int) env->callStaticBooleanMethod(cls, _mids[mid_isFinal], mod); + } + + int Modifier::isAbstract(int mod) + { + jclass cls = initializeClass(); + return (int) env->callStaticBooleanMethod(cls, _mids[mid_isAbstract], mod); + } + + int Modifier::isPrivate(int mod) + { + jclass cls = initializeClass(); + return (int) env->callStaticBooleanMethod(cls, _mids[mid_isPrivate], mod); + } + + int Modifier::isProtected(int mod) + { + jclass cls = initializeClass(); + return (int) env->callStaticBooleanMethod(cls, _mids[mid_isProtected], mod); + } + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + + static PyObject *t_Modifier_isPublic(PyTypeObject *type, PyObject *arg); + static PyObject *t_Modifier_isStatic(PyTypeObject *type, PyObject *arg); + static PyObject *t_Modifier_isNative(PyTypeObject *type, PyObject *arg); + static PyObject *t_Modifier_isFinal(PyTypeObject *type, PyObject *arg); + static PyObject *t_Modifier_isAbstract(PyTypeObject *type, PyObject *arg); + static PyObject *t_Modifier_isPrivate(PyTypeObject *type, PyObject *arg); + static PyObject *t_Modifier_isProtected(PyTypeObject *type, PyObject *arg); + + static PyMethodDef t_Modifier__methods_[] = { + DECLARE_METHOD(t_Modifier, isPublic, METH_O | METH_CLASS), + DECLARE_METHOD(t_Modifier, isStatic, METH_O | METH_CLASS), + DECLARE_METHOD(t_Modifier, isNative, METH_O | METH_CLASS), + DECLARE_METHOD(t_Modifier, isFinal, METH_O | METH_CLASS), + DECLARE_METHOD(t_Modifier, isAbstract, METH_O | METH_CLASS), + DECLARE_METHOD(t_Modifier, isPrivate, METH_O | METH_CLASS), + DECLARE_METHOD(t_Modifier, isProtected, METH_O | METH_CLASS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Modifier, t_Modifier, Object, Modifier, + abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_Modifier_isPublic(PyTypeObject *type, PyObject *arg) + { + if (!PyInt_Check(arg)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + int mod = PyInt_AsLong(arg); + int isPublic; + + OBJ_CALL(isPublic = Modifier::isPublic(mod)); + Py_RETURN_BOOL(isPublic); + } + + static PyObject *t_Modifier_isStatic(PyTypeObject *type, PyObject *arg) + { + if (!PyInt_Check(arg)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + int mod = PyInt_AsLong(arg); + int isStatic; + + OBJ_CALL(isStatic = Modifier::isStatic(mod)); + Py_RETURN_BOOL(isStatic); + } + + static PyObject *t_Modifier_isNative(PyTypeObject *type, PyObject *arg) + { + if (!PyInt_Check(arg)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + int mod = PyInt_AsLong(arg); + int isNative; + + OBJ_CALL(isNative = Modifier::isNative(mod)); + Py_RETURN_BOOL(isNative); + } + + static PyObject *t_Modifier_isFinal(PyTypeObject *type, PyObject *arg) + { + if (!PyInt_Check(arg)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + int mod = PyInt_AsLong(arg); + int isFinal; + + OBJ_CALL(isFinal = Modifier::isFinal(mod)); + Py_RETURN_BOOL(isFinal); + } + + static PyObject *t_Modifier_isAbstract(PyTypeObject *type, PyObject *arg) + { + if (!PyInt_Check(arg)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + int mod = PyInt_AsLong(arg); + int isAbstract; + + OBJ_CALL(isAbstract = Modifier::isAbstract(mod)); + Py_RETURN_BOOL(isAbstract); + } + + static PyObject *t_Modifier_isPrivate(PyTypeObject *type, PyObject *arg) + { + if (!PyInt_Check(arg)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + int mod = PyInt_AsLong(arg); + int isPrivate; + + OBJ_CALL(isPrivate = Modifier::isPrivate(mod)); + Py_RETURN_BOOL(isPrivate); + } + + static PyObject *t_Modifier_isProtected(PyTypeObject *type, PyObject *arg) + { + if (!PyInt_Check(arg)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + int mod = PyInt_AsLong(arg); + int isProtected; + + OBJ_CALL(isProtected = Modifier::isProtected(mod)); + Py_RETURN_BOOL(isProtected); + } + } + } +} diff --git a/jcc/_jcc/java/lang/reflect/Modifier.h b/jcc/_jcc/java/lang/reflect/Modifier.h new file mode 100644 index 0000000..de317b8 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Modifier.h @@ -0,0 +1,60 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Modifier_H +#define _Modifier_H + +#include +#include "JArray.h" + +namespace java { + namespace lang { + class Class; + class String; + + namespace reflect { + class Modifier : public Object { + private: + explicit Modifier(); + public: + explicit Modifier(jobject obj) : Object(obj) { + initializeClass(); + } + static Class *class$; + static jmethodID *_mids; + static jclass initializeClass(); + + static int isPublic(int mod); + static int isStatic(int mod); + static int isNative(int mod); + static int isFinal(int mod); + static int isAbstract(int mod); + static int isPrivate(int mod); + static int isProtected(int mod); + }; + + extern PyTypeObject PY_TYPE(Modifier); + + class t_Modifier { + public: + PyObject_HEAD + Modifier object; + static PyObject *wrap_Object(const Modifier& object); + static PyObject *wrap_jobject(const jobject& object); + }; + } + } +} + +#endif /* _Modifier_H */ diff --git a/jcc/_jcc/java/lang/reflect/ParameterizedType.cpp b/jcc/_jcc/java/lang/reflect/ParameterizedType.cpp new file mode 100644 index 0000000..fc40e82 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/ParameterizedType.cpp @@ -0,0 +1,113 @@ +#ifdef _java_generics + +#include +#include "JCCEnv.h" +#include "java/lang/reflect/ParameterizedType.h" +#include "java/lang/Class.h" +#include "JArray.h" + +namespace java { + namespace lang { + namespace reflect { + + java::lang::Class *ParameterizedType::class$ = NULL; + jmethodID *ParameterizedType::mids$ = NULL; + + jclass ParameterizedType::initializeClass() + { + if (!class$) + { + + jclass cls = (jclass) env->findClass("java/lang/reflect/ParameterizedType"); + + mids$ = new jmethodID[max_mid]; + mids$[mid_getActualTypeArguments_6f565a00] = env->getMethodID(cls, "getActualTypeArguments", "()[Ljava/lang/reflect/Type;"); + mids$[mid_getOwnerType_86037cf0] = env->getMethodID(cls, "getOwnerType", "()Ljava/lang/reflect/Type;"); + mids$[mid_getRawType_86037cf0] = env->getMethodID(cls, "getRawType", "()Ljava/lang/reflect/Type;"); + + class$ = (java::lang::Class *) new JObject(cls); + } + return (jclass) class$->this$; + } + + JArray ParameterizedType::getActualTypeArguments() const + { + return JArray(env->callObjectMethod(this$, mids$[mid_getActualTypeArguments_6f565a00])); + } + + java::lang::reflect::Type ParameterizedType::getOwnerType() const + { + return java::lang::reflect::Type(env->callObjectMethod(this$, mids$[mid_getOwnerType_86037cf0])); + } + + java::lang::reflect::Type ParameterizedType::getRawType() const + { + return java::lang::reflect::Type(env->callObjectMethod(this$, mids$[mid_getRawType_86037cf0])); + } + } + } +} + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + static PyObject *t_ParameterizedType_cast_(PyTypeObject *type, PyObject *arg); + static PyObject *t_ParameterizedType_instance_(PyTypeObject *type, PyObject *arg); + static PyObject *t_ParameterizedType_getActualTypeArguments(t_ParameterizedType *self); + static PyObject *t_ParameterizedType_getOwnerType(t_ParameterizedType *self); + static PyObject *t_ParameterizedType_getRawType(t_ParameterizedType *self); + + static PyMethodDef t_ParameterizedType__methods_[] = { + DECLARE_METHOD(t_ParameterizedType, cast_, METH_O | METH_CLASS), + DECLARE_METHOD(t_ParameterizedType, instance_, METH_O | METH_CLASS), + DECLARE_METHOD(t_ParameterizedType, getActualTypeArguments, METH_NOARGS), + DECLARE_METHOD(t_ParameterizedType, getOwnerType, METH_NOARGS), + DECLARE_METHOD(t_ParameterizedType, getRawType, METH_NOARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(ParameterizedType, t_ParameterizedType, java::lang::reflect::Type, ParameterizedType, abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_ParameterizedType_cast_(PyTypeObject *type, PyObject *arg) + { + if (!(arg = castCheck(arg, ParameterizedType::initializeClass, 1))) + return NULL; + return t_ParameterizedType::wrap_Object(ParameterizedType(((t_ParameterizedType *) arg)->object.this$)); + } + static PyObject *t_ParameterizedType_instance_(PyTypeObject *type, PyObject *arg) + { + if (!castCheck(arg, ParameterizedType::initializeClass, 0)) + Py_RETURN_FALSE; + Py_RETURN_TRUE; + } + + static PyObject *t_ParameterizedType_getActualTypeArguments(t_ParameterizedType *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getActualTypeArguments()); + + return result.toSequence(t_Type::wrap_Object); + } + + static PyObject *t_ParameterizedType_getOwnerType(t_ParameterizedType *self) + { + java::lang::reflect::Type result((jobject) NULL); + OBJ_CALL(result = self->object.getOwnerType()); + return java::lang::reflect::t_Type::wrap_Object(result); + } + + static PyObject *t_ParameterizedType_getRawType(t_ParameterizedType *self) + { + java::lang::reflect::Type result((jobject) NULL); + OBJ_CALL(result = self->object.getRawType()); + return java::lang::reflect::t_Type::wrap_Object(result); + } + } + } +} + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/ParameterizedType.h b/jcc/_jcc/java/lang/reflect/ParameterizedType.h new file mode 100644 index 0000000..69472fe --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/ParameterizedType.h @@ -0,0 +1,66 @@ +#ifdef _java_generics + +#ifndef java_lang_reflect_ParameterizedType_H +#define java_lang_reflect_ParameterizedType_H + +#include "java/lang/reflect/Type.h" + +namespace java { + namespace lang { + class Class; + } +} +template class JArray; + +namespace java { + namespace lang { + namespace reflect { + + class ParameterizedType : public java::lang::reflect::Type { + public: + enum { + mid_getActualTypeArguments_6f565a00, + mid_getOwnerType_86037cf0, + mid_getRawType_86037cf0, + max_mid + }; + + static java::lang::Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit ParameterizedType(jobject obj) : java::lang::reflect::Type(obj) { + if (obj != NULL) + initializeClass(); + } + ParameterizedType(const ParameterizedType& obj) : java::lang::reflect::Type(obj) {} + + JArray getActualTypeArguments() const; + java::lang::reflect::Type getOwnerType() const; + java::lang::reflect::Type getRawType() const; + }; + } + } +} + +#include + +namespace java { + namespace lang { + namespace reflect { + extern PyTypeObject PY_TYPE(ParameterizedType); + + class t_ParameterizedType { + public: + PyObject_HEAD + ParameterizedType object; + static PyObject *wrap_Object(const ParameterizedType&); + static PyObject *wrap_jobject(const jobject&); + }; + } + } +} + +#endif + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/Type.cpp b/jcc/_jcc/java/lang/reflect/Type.cpp new file mode 100644 index 0000000..2bfd72c --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Type.cpp @@ -0,0 +1,65 @@ +#ifdef _java_generics + +#include +#include "JCCEnv.h" +#include "java/lang/reflect/Type.h" +#include "java/lang/Class.h" +#include "JArray.h" + +namespace java { + namespace lang { + namespace reflect { + + java::lang::Class *Type::class$ = NULL; + jmethodID *Type::mids$ = NULL; + + jclass Type::initializeClass() + { + if (!class$) + { + + jclass cls = (jclass) env->findClass("java/lang/reflect/Type"); + + class$ = (java::lang::Class *) new JObject(cls); + } + return (jclass) class$->this$; + } + } + } +} + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + static PyObject *t_Type_cast_(PyTypeObject *type, PyObject *arg); + static PyObject *t_Type_instance_(PyTypeObject *type, PyObject *arg); + + static PyMethodDef t_Type__methods_[] = { + DECLARE_METHOD(t_Type, cast_, METH_O | METH_CLASS), + DECLARE_METHOD(t_Type, instance_, METH_O | METH_CLASS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Type, t_Type, java::lang::Object, Type, abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_Type_cast_(PyTypeObject *type, PyObject *arg) + { + if (!(arg = castCheck(arg, Type::initializeClass, 1))) + return NULL; + return t_Type::wrap_Object(Type(((t_Type *) arg)->object.this$)); + } + static PyObject *t_Type_instance_(PyTypeObject *type, PyObject *arg) + { + if (!castCheck(arg, Type::initializeClass, 0)) + Py_RETURN_FALSE; + Py_RETURN_TRUE; + } + } + } +} + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/Type.h b/jcc/_jcc/java/lang/reflect/Type.h new file mode 100644 index 0000000..6f57d3f --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/Type.h @@ -0,0 +1,55 @@ +#ifdef _java_generics + +#ifndef java_lang_reflect_Type_H +#define java_lang_reflect_Type_H + +#include "java/lang/Object.h" + +namespace java { + namespace lang { + class Class; + } +} +template class JArray; + +namespace java { + namespace lang { + namespace reflect { + + class Type : public java::lang::Object { + public: + + static java::lang::Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit Type(jobject obj) : java::lang::Object(obj) { + if (obj != NULL) + initializeClass(); + } + Type(const Type& obj) : java::lang::Object(obj) {} + }; + } + } +} + +#include + +namespace java { + namespace lang { + namespace reflect { + extern PyTypeObject PY_TYPE(Type); + + class t_Type { + public: + PyObject_HEAD + Type object; + static PyObject *wrap_Object(const Type&); + static PyObject *wrap_jobject(const jobject&); + }; + } + } +} + +#endif +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/TypeVariable.cpp b/jcc/_jcc/java/lang/reflect/TypeVariable.cpp new file mode 100644 index 0000000..d4e3dea --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/TypeVariable.cpp @@ -0,0 +1,115 @@ +#ifdef _java_generics + +#include +#include "JCCEnv.h" +#include "java/lang/reflect/TypeVariable.h" +#include "java/lang/reflect/GenericDeclaration.h" +#include "java/lang/Class.h" +#include "java/lang/String.h" +#include "JArray.h" + +namespace java { + namespace lang { + namespace reflect { + + java::lang::Class *TypeVariable::class$ = NULL; + jmethodID *TypeVariable::mids$ = NULL; + + jclass TypeVariable::initializeClass() + { + if (!class$) + { + + jclass cls = (jclass) env->findClass("java/lang/reflect/TypeVariable"); + + mids$ = new jmethodID[max_mid]; + mids$[mid_getBounds_6f565a00] = env->getMethodID(cls, "getBounds", "()[Ljava/lang/reflect/Type;"); + mids$[mid_getGenericDeclaration_2dc62edd] = env->getMethodID(cls, "getGenericDeclaration", "()Ljava/lang/reflect/GenericDeclaration;"); + mids$[mid_getName_14c7b5c5] = env->getMethodID(cls, "getName", "()Ljava/lang/String;"); + + class$ = (java::lang::Class *) new JObject(cls); + } + return (jclass) class$->this$; + } + + JArray TypeVariable::getBounds() const + { + return JArray(env->callObjectMethod(this$, mids$[mid_getBounds_6f565a00])); + } + + java::lang::reflect::GenericDeclaration TypeVariable::getGenericDeclaration() const + { + return java::lang::reflect::GenericDeclaration(env->callObjectMethod(this$, mids$[mid_getGenericDeclaration_2dc62edd])); + } + + java::lang::String TypeVariable::getName() const + { + return java::lang::String(env->callObjectMethod(this$, mids$[mid_getName_14c7b5c5])); + } + } + } +} + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + static PyObject *t_TypeVariable_cast_(PyTypeObject *type, PyObject *arg); + static PyObject *t_TypeVariable_instance_(PyTypeObject *type, PyObject *arg); + static PyObject *t_TypeVariable_getBounds(t_TypeVariable *self); + static PyObject *t_TypeVariable_getGenericDeclaration(t_TypeVariable *self); + static PyObject *t_TypeVariable_getName(t_TypeVariable *self); + + static PyMethodDef t_TypeVariable__methods_[] = { + DECLARE_METHOD(t_TypeVariable, cast_, METH_O | METH_CLASS), + DECLARE_METHOD(t_TypeVariable, instance_, METH_O | METH_CLASS), + DECLARE_METHOD(t_TypeVariable, getBounds, METH_NOARGS), + DECLARE_METHOD(t_TypeVariable, getGenericDeclaration, METH_NOARGS), + DECLARE_METHOD(t_TypeVariable, getName, METH_NOARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(TypeVariable, t_TypeVariable, java::lang::reflect::Type, TypeVariable, abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_TypeVariable_cast_(PyTypeObject *type, PyObject *arg) + { + if (!(arg = castCheck(arg, TypeVariable::initializeClass, 1))) + return NULL; + return t_TypeVariable::wrap_Object(TypeVariable(((t_TypeVariable *) arg)->object.this$)); + } + static PyObject *t_TypeVariable_instance_(PyTypeObject *type, PyObject *arg) + { + if (!castCheck(arg, TypeVariable::initializeClass, 0)) + Py_RETURN_FALSE; + Py_RETURN_TRUE; + } + + static PyObject *t_TypeVariable_getBounds(t_TypeVariable *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getBounds()); + + return result.toSequence(t_Type::wrap_Object); + } + + static PyObject *t_TypeVariable_getGenericDeclaration(t_TypeVariable *self) + { + java::lang::reflect::GenericDeclaration result((jobject) NULL); + OBJ_CALL(result = self->object.getGenericDeclaration()); + return java::lang::reflect::t_GenericDeclaration::wrap_Object(result); + } + + static PyObject *t_TypeVariable_getName(t_TypeVariable *self) + { + java::lang::String result((jobject) NULL); + OBJ_CALL(result = self->object.getName()); + return j2p(result); + } + } + } +} + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/TypeVariable.h b/jcc/_jcc/java/lang/reflect/TypeVariable.h new file mode 100644 index 0000000..c2cf334 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/TypeVariable.h @@ -0,0 +1,70 @@ +#ifdef _java_generics + +#ifndef java_lang_reflect_TypeVariable_H +#define java_lang_reflect_TypeVariable_H + +#include "java/lang/reflect/Type.h" + +namespace java { + namespace lang { + namespace reflect { + class GenericDeclaration; + } + class Class; + class String; + } +} +template class JArray; + +namespace java { + namespace lang { + namespace reflect { + + class TypeVariable : public java::lang::reflect::Type { + public: + enum { + mid_getBounds_6f565a00, + mid_getGenericDeclaration_2dc62edd, + mid_getName_14c7b5c5, + max_mid + }; + + static java::lang::Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit TypeVariable(jobject obj) : java::lang::reflect::Type(obj) { + if (obj != NULL) + initializeClass(); + } + TypeVariable(const TypeVariable& obj) : java::lang::reflect::Type(obj) {} + + JArray getBounds() const; + java::lang::reflect::GenericDeclaration getGenericDeclaration() const; + java::lang::String getName() const; + }; + } + } +} + +#include + +namespace java { + namespace lang { + namespace reflect { + extern PyTypeObject PY_TYPE(TypeVariable); + + class t_TypeVariable { + public: + PyObject_HEAD + TypeVariable object; + static PyObject *wrap_Object(const TypeVariable&); + static PyObject *wrap_jobject(const jobject&); + }; + } + } +} + +#endif + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/WildcardType.cpp b/jcc/_jcc/java/lang/reflect/WildcardType.cpp new file mode 100644 index 0000000..498d22e --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/WildcardType.cpp @@ -0,0 +1,99 @@ +#ifdef _java_generics + +#include +#include "JCCEnv.h" +#include "java/lang/reflect/WildcardType.h" +#include "java/lang/Class.h" +#include "JArray.h" + +namespace java { + namespace lang { + namespace reflect { + + java::lang::Class *WildcardType::class$ = NULL; + jmethodID *WildcardType::mids$ = NULL; + + jclass WildcardType::initializeClass() + { + if (!class$) + { + + jclass cls = (jclass) env->findClass("java/lang/reflect/WildcardType"); + + mids$ = new jmethodID[max_mid]; + mids$[mid_getLowerBounds_6f565a00] = env->getMethodID(cls, "getLowerBounds", "()[Ljava/lang/reflect/Type;"); + mids$[mid_getUpperBounds_6f565a00] = env->getMethodID(cls, "getUpperBounds", "()[Ljava/lang/reflect/Type;"); + + class$ = (java::lang::Class *) new JObject(cls); + } + return (jclass) class$->this$; + } + + JArray WildcardType::getLowerBounds() const + { + return JArray(env->callObjectMethod(this$, mids$[mid_getLowerBounds_6f565a00])); + } + + JArray WildcardType::getUpperBounds() const + { + return JArray(env->callObjectMethod(this$, mids$[mid_getUpperBounds_6f565a00])); + } + } + } +} + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + static PyObject *t_WildcardType_cast_(PyTypeObject *type, PyObject *arg); + static PyObject *t_WildcardType_instance_(PyTypeObject *type, PyObject *arg); + static PyObject *t_WildcardType_getLowerBounds(t_WildcardType *self); + static PyObject *t_WildcardType_getUpperBounds(t_WildcardType *self); + + static PyMethodDef t_WildcardType__methods_[] = { + DECLARE_METHOD(t_WildcardType, cast_, METH_O | METH_CLASS), + DECLARE_METHOD(t_WildcardType, instance_, METH_O | METH_CLASS), + DECLARE_METHOD(t_WildcardType, getLowerBounds, METH_NOARGS), + DECLARE_METHOD(t_WildcardType, getUpperBounds, METH_NOARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(WildcardType, t_WildcardType, java::lang::reflect::Type, WildcardType, abstract_init, 0, 0, 0, 0, 0); + + static PyObject *t_WildcardType_cast_(PyTypeObject *type, PyObject *arg) + { + if (!(arg = castCheck(arg, WildcardType::initializeClass, 1))) + return NULL; + return t_WildcardType::wrap_Object(WildcardType(((t_WildcardType *) arg)->object.this$)); + } + static PyObject *t_WildcardType_instance_(PyTypeObject *type, PyObject *arg) + { + if (!castCheck(arg, WildcardType::initializeClass, 0)) + Py_RETURN_FALSE; + Py_RETURN_TRUE; + } + + static PyObject *t_WildcardType_getLowerBounds(t_WildcardType *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getLowerBounds()); + + return result.toSequence(java::lang::reflect::t_Type::wrap_Object); + } + + static PyObject *t_WildcardType_getUpperBounds(t_WildcardType *self) + { + JArray result((jobject) NULL); + OBJ_CALL(result = self->object.getUpperBounds()); + + return result.toSequence(java::lang::reflect::t_Type::wrap_Object); + } + } + } +} + +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/WildcardType.h b/jcc/_jcc/java/lang/reflect/WildcardType.h new file mode 100644 index 0000000..b9fcc45 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/WildcardType.h @@ -0,0 +1,63 @@ +#ifdef _java_generics + +#ifndef java_lang_reflect_WildcardType_H +#define java_lang_reflect_WildcardType_H + +#include "java/lang/reflect/Type.h" + +namespace java { + namespace lang { + class Class; + } +} +template class JArray; + +namespace java { + namespace lang { + namespace reflect { + + class WildcardType : public java::lang::reflect::Type { + public: + enum { + mid_getLowerBounds_6f565a00, + mid_getUpperBounds_6f565a00, + max_mid + }; + + static java::lang::Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit WildcardType(jobject obj) : java::lang::reflect::Type(obj) { + if (obj != NULL) + initializeClass(); + } + WildcardType(const WildcardType& obj) : java::lang::reflect::Type(obj) {} + + JArray getLowerBounds() const; + JArray getUpperBounds() const; + }; + } + } +} + +#include + +namespace java { + namespace lang { + namespace reflect { + extern PyTypeObject PY_TYPE(WildcardType); + + class t_WildcardType { + public: + PyObject_HEAD + WildcardType object; + static PyObject *wrap_Object(const WildcardType&); + static PyObject *wrap_jobject(const jobject&); + }; + } + } +} + +#endif +#endif /* _java_generics */ diff --git a/jcc/_jcc/java/lang/reflect/__init__.cpp b/jcc/_jcc/java/lang/reflect/__init__.cpp new file mode 100644 index 0000000..20c0cf2 --- /dev/null +++ b/jcc/_jcc/java/lang/reflect/__init__.cpp @@ -0,0 +1,52 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "macros.h" + +namespace java { + namespace lang { + namespace reflect { + + extern PyTypeObject PY_TYPE(Constructor); + extern PyTypeObject PY_TYPE(Method); + extern PyTypeObject PY_TYPE(Modifier); + extern PyTypeObject PY_TYPE(Field); +#ifdef _java_generics + extern PyTypeObject PY_TYPE(Type); + extern PyTypeObject PY_TYPE(ParameterizedType); + extern PyTypeObject PY_TYPE(TypeVariable); + extern PyTypeObject PY_TYPE(GenericArrayType); + extern PyTypeObject PY_TYPE(WildcardType); + extern PyTypeObject PY_TYPE(GenericDeclaration); +#endif + + void __install__(PyObject *m) + { + INSTALL_TYPE(Constructor, m); + INSTALL_TYPE(Method, m); + INSTALL_TYPE(Modifier, m); + INSTALL_TYPE(Field, m); +#ifdef _java_generics + INSTALL_TYPE(Type, m); + INSTALL_TYPE(ParameterizedType, m); + INSTALL_TYPE(TypeVariable, m); + INSTALL_TYPE(GenericArrayType, m); + INSTALL_TYPE(WildcardType, m); + INSTALL_TYPE(GenericDeclaration, m); +#endif + } + } + } +} diff --git a/jcc/_jcc/java/util/Enumeration.cpp b/jcc/_jcc/java/util/Enumeration.cpp new file mode 100644 index 0000000..b555e26 --- /dev/null +++ b/jcc/_jcc/java/util/Enumeration.cpp @@ -0,0 +1,110 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" + +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/util/Enumeration.h" + +namespace java { + namespace util { + enum { + mid_hasMoreElements, + mid_nextElement, + max_mid + }; + + Class *Enumeration::class$ = NULL; + jmethodID *Enumeration::mids$ = NULL; + + jclass Enumeration::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/util/Enumeration"); + + mids$ = new jmethodID[max_mid]; + mids$[mid_hasMoreElements] = env->getMethodID(cls, "hasMoreElements", "()Z"); + mids$[mid_nextElement] = env->getMethodID(cls, "nextElement", "()Ljava/lang/Object;"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + jboolean Enumeration::hasMoreElements() const + { + return env->callBooleanMethod(this$, mids$[mid_hasMoreElements]); + } + + Object Enumeration::nextElement() const + { + return Object(env->callObjectMethod(this$, mids$[mid_nextElement])); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace util { + + static PyObject *t_Enumeration_hasMoreElements(t_Enumeration *self); + static PyObject *t_Enumeration_nextElement(t_Enumeration *self); + + static PyMethodDef t_Enumeration__methods_[] = { + DECLARE_METHOD(t_Enumeration, hasMoreElements, METH_NOARGS), + DECLARE_METHOD(t_Enumeration, nextElement, METH_NOARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Enumeration, t_Enumeration, JObject, + java::util::Enumeration, abstract_init, 0, 0, 0, 0, 0); + +#ifdef _java_generics + PyObject *t_Enumeration::wrap_Object(const Enumeration& object, + PyTypeObject *T) + { + PyObject *obj = t_Enumeration::wrap_Object(object); + if (obj != Py_None) + { + t_Enumeration *self = (t_Enumeration *) obj; + self->parameters[0] = T; + } + return obj; + } +#endif + static PyObject *t_Enumeration_hasMoreElements(t_Enumeration *self) + { + jboolean b; + + OBJ_CALL(b = self->object.hasMoreElements()); + Py_RETURN_BOOL(b); + } + + static PyObject *t_Enumeration_nextElement(t_Enumeration *self) + { + Object nextElement((jobject) NULL); + + OBJ_CALL(nextElement = self->object.nextElement()); + return t_Object::wrap_Object(nextElement); + } + } +} diff --git a/jcc/_jcc/java/util/Enumeration.h b/jcc/_jcc/java/util/Enumeration.h new file mode 100644 index 0000000..001e4e4 --- /dev/null +++ b/jcc/_jcc/java/util/Enumeration.h @@ -0,0 +1,66 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Enumeration_H +#define _Enumeration_H + +#include +#include "JObject.h" + +namespace java { + namespace lang { + class Class; + class Object; + } + namespace util { + using namespace java::lang; + + class Enumeration : public JObject { + public: + static Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit Enumeration(jobject obj) : JObject(obj) { + initializeClass(); + } + + jboolean hasMoreElements() const; + Object nextElement() const; + }; + + extern PyTypeObject PY_TYPE(Enumeration); + + class t_Enumeration { + public: + PyObject_HEAD + Enumeration object; +#ifdef _java_generics + PyTypeObject *parameters[1]; + static PyTypeObject **parameters_(t_Enumeration *self) + { + return (PyTypeObject **) &(self->parameters); + } +#endif + static PyObject *wrap_Object(const Enumeration& object); +#ifdef _java_generics + static PyObject *wrap_Object(const Enumeration& object, + PyTypeObject *T); +#endif + static PyObject *wrap_jobject(const jobject& object); + }; + } +} + +#endif /* _Enumeration_H */ diff --git a/jcc/_jcc/java/util/Iterator.cpp b/jcc/_jcc/java/util/Iterator.cpp new file mode 100644 index 0000000..acb8b6e --- /dev/null +++ b/jcc/_jcc/java/util/Iterator.cpp @@ -0,0 +1,124 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" + +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/util/Iterator.h" + +namespace java { + namespace util { + enum { + mid_hasNext, + mid_next, + max_mid + }; + + Class *Iterator::class$ = NULL; + jmethodID *Iterator::mids$ = NULL; + + jclass Iterator::initializeClass() + { + if (!class$) + { + jclass cls = env->findClass("java/util/Iterator"); + + mids$ = new jmethodID[max_mid]; + mids$[mid_hasNext] = env->getMethodID(cls, "hasNext", + "()Z"); + mids$[mid_next] = env->getMethodID(cls, "next", + "()Ljava/lang/Object;"); + + class$ = (Class *) new JObject(cls); + } + + return (jclass) class$->this$; + } + + jboolean Iterator::hasNext() const + { + return env->callBooleanMethod(this$, mids$[mid_hasNext]); + } + + Object Iterator::next() const + { + return Object(env->callObjectMethod(this$, mids$[mid_next])); + } + } +} + + +#include "structmember.h" +#include "functions.h" +#include "macros.h" + +namespace java { + namespace util { + + static PyObject *t_Iterator_hasNext(t_Iterator *self); + static PyObject *t_Iterator_next(t_Iterator *self); + + static PyMethodDef t_Iterator__methods_[] = { + DECLARE_METHOD(t_Iterator, hasNext, METH_NOARGS), + DECLARE_METHOD(t_Iterator, next, METH_NOARGS), + { NULL, NULL, 0, NULL } + }; + + DECLARE_TYPE(Iterator, t_Iterator, JObject, java::util::Iterator, + abstract_init, 0, 0, 0, 0, 0); + +#ifdef _java_generics + PyObject *t_Iterator::wrap_Object(const Iterator& object, + PyTypeObject *T) + { + PyObject *obj = t_Iterator::wrap_Object(object); + if (obj != Py_None) + { + t_Iterator *self = (t_Iterator *) obj; + self->parameters[0] = T; + } + return obj; + } + + PyObject *t_Iterator::wrap_jobject(const jobject& object, + PyTypeObject *T) + { + PyObject *obj = t_Iterator::wrap_jobject(object); + if (obj != Py_None) + { + t_Iterator *self = (t_Iterator *) obj; + self->parameters[0] = T; + } + return obj; + } +#endif + static PyObject *t_Iterator_hasNext(t_Iterator *self) + { + jboolean b; + + OBJ_CALL(b = self->object.hasNext()); + Py_RETURN_BOOL(b); + } + + static PyObject *t_Iterator_next(t_Iterator *self) + { + Object next((jobject) NULL); + + OBJ_CALL(next = self->object.next()); + return t_Object::wrap_Object(next); + } + } +} diff --git a/jcc/_jcc/java/util/Iterator.h b/jcc/_jcc/java/util/Iterator.h new file mode 100644 index 0000000..0e3b4be --- /dev/null +++ b/jcc/_jcc/java/util/Iterator.h @@ -0,0 +1,68 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _Iterator_H +#define _Iterator_H + +#include +#include "JObject.h" + +namespace java { + namespace lang { + class Class; + class Object; + } + namespace util { + using namespace java::lang; + + class Iterator : public JObject { + public: + static Class *class$; + static jmethodID *mids$; + static jclass initializeClass(); + + explicit Iterator(jobject obj) : JObject(obj) { + initializeClass(); + } + + jboolean hasNext() const; + Object next() const; + }; + + extern PyTypeObject PY_TYPE(Iterator); + + class t_Iterator { + public: + PyObject_HEAD + Iterator object; +#ifdef _java_generics + PyTypeObject *parameters[1]; + static PyTypeObject **parameters_(t_Iterator *self) + { + return (PyTypeObject **) &(self->parameters); + } +#endif + static PyObject *wrap_Object(const Iterator& object); + static PyObject *wrap_jobject(const jobject& object); +#ifdef _java_generics + static PyObject *wrap_Object(const Iterator& object, + PyTypeObject *T); + static PyObject *wrap_jobject(const jobject& object, + PyTypeObject *T); +#endif + }; + } +} + +#endif /* _Iterator_H */ diff --git a/jcc/helpers/__init__.py b/jcc/helpers/__init__.py new file mode 100644 index 0000000..5cfb789 --- /dev/null +++ b/jcc/helpers/__init__.py @@ -0,0 +1 @@ +# helpers package diff --git a/jcc/helpers/build.py b/jcc/helpers/build.py new file mode 100644 index 0000000..038395a --- /dev/null +++ b/jcc/helpers/build.py @@ -0,0 +1,46 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from distutils.command.build_py import build_py +from distutils import log + +class jcc_build_py(build_py): + config_text = None + config_file = None + + def run(self): + self.write_jcc_config() + return build_py.run(self) + + def write_jcc_config(self): + # only write jcc's config.py file if it doesn't exist or a build + # command is given + write = False + if not os.path.isfile(self.config_file): + write = True + else: + for command in self.distribution.commands: + if command.startswith("build"): + write = True + break + + if write: + log.info("writing %s" %(self.config_file)) + config = open(self.config_file, 'w') + try: + config.write(self.config_text) + finally: + config.close() + else: + log.info("not writing %s" %(self.config_file)) diff --git a/jcc/helpers/darwin.py b/jcc/helpers/darwin.py new file mode 100644 index 0000000..77eb4dd --- /dev/null +++ b/jcc/helpers/darwin.py @@ -0,0 +1,33 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys, os + +global JAVAHOME +JAVAHOME = None + +if sys.platform == "darwin": + + # figure out where the JDK lives + import platform, re + _os_version = re.match("[0-9]+\.[0-9]+", platform.mac_ver()[0]).group(0) + + # this is where Apple says we should look for headers + _path = "/System/Library/Frameworks/JavaVM.framework" + if os.path.exists(os.path.join(_path, "Headers", "jni.h")): + JAVAHOME = _path + else: + # but their updates don't always match their documentation, + # so look up the same path in the OS's /Developer tree + _path = "/Developer/SDKs/MacOSX%s.sdk%s" %(_os_version, _path) + if os.path.exists(os.path.join(_path, "Headers", "jni.h")): + JAVAHOME = _path diff --git a/jcc/helpers/linux.py b/jcc/helpers/linux.py new file mode 100644 index 0000000..5d65f2e --- /dev/null +++ b/jcc/helpers/linux.py @@ -0,0 +1,69 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + + +def patch_st_dir(patch_version, st_egg, jccdir): + return ''' + +Shared mode is disabled, setuptools patch.43.%s must be applied to enable it +or the NO_SHARED environment variable must be set to turn off this error. + + sudo patch -d %s -Nup0 < %s/jcc/patches/patch.43.%s + +See %s/INSTALL for more information about shared mode. +''' %(patch_version, st_egg, jccdir, patch_version, jccdir) + + +def patch_st_zip(patch_version, st_egg, jccdir): + return ''' + +Shared mode is disabled, setuptools patch.43.%s must be applied to enable it +or the NO_SHARED environment variable must be set to turn off this error. + + mkdir tmp + cd tmp + unzip -q %s + patch -Nup0 < %s/jcc/patches/patch.43.%s + sudo zip %s -f + cd .. + rm -rf tmp + +See %s/INSTALL for more information about shared mode. +''' %(patch_version, st_egg, jccdir, patch_version, st_egg, jccdir) + + +def patch_setuptools(with_setuptools): + + with_setuptools_c11 = ('00000000', '00000006', '*c', '00000011', '*final') + + try: + from setuptools.command.build_ext import sh_link_shared_object + enable_shared = True # jcc/patches/patch.43 was applied + except ImportError: + import setuptools + jccdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + st_egg = os.path.dirname(setuptools.__path__[0]) + if with_setuptools < with_setuptools_c11: + patch_version = '0.6c7' + else: + patch_version = '0.6c11' + + if os.path.isdir(st_egg): + raise NotImplementedError, patch_st_dir(patch_version, st_egg, + jccdir) + else: + raise NotImplementedError, patch_st_zip(patch_version, st_egg, + jccdir) + + return enable_shared diff --git a/jcc/helpers/mingw32.py b/jcc/helpers/mingw32.py new file mode 100644 index 0000000..ebc00d6 --- /dev/null +++ b/jcc/helpers/mingw32.py @@ -0,0 +1,40 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, copy +from distutils.cygwinccompiler import Mingw32CCompiler + +class JCCMinGW32CCompiler(Mingw32CCompiler): + + def link(self, target_desc, objects, output_filename, output_dir=None, + libraries=None, library_dirs=None, runtime_library_dirs=None, + export_symbols=None, debug=0, extra_preargs=None, + extra_postargs=None, build_temp=None, target_lang=None): + + # use separate copies, so we can modify the lists + extra_preargs = copy.copy(extra_preargs or []) + + (dll_name, dll_extension) = os.path.splitext(output_filename) + if dll_extension.lower() == ".dll": + extra_preargs.extend(["-Wl,--out-implib,%s" %(os.path.join(os.path.dirname(dll_name), "jcc", "jcc.lib"))]) + + Mingw32CCompiler.link(self, target_desc=target_desc, + objects=objects, + output_filename=output_filename, + output_dir=output_dir, libraries=libraries, + library_dirs=library_dirs, + runtime_library_dirs=runtime_library_dirs, + export_symbols=export_symbols, debug=debug, + extra_preargs=extra_preargs, + extra_postargs=extra_postargs, + build_temp=build_temp, + target_lang=target_lang) diff --git a/jcc/helpers/windows.py b/jcc/helpers/windows.py new file mode 100644 index 0000000..57b91ae --- /dev/null +++ b/jcc/helpers/windows.py @@ -0,0 +1,56 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +global JAVAHOME +JAVAHOME = None + +if sys.platform == "win32": + # figure out where the JDK lives + + try: + import _winreg as wreg + + class WindowsRegistry: + # see the Python Cookbook, #146305, Dirk Holtwick + + def __init__(self, keyname): + " handle registry access " + self.reg = wreg.ConnectRegistry(None, wreg.HKEY_LOCAL_MACHINE) + self.key = wreg.OpenKey(self.reg, keyname) + + def get(self, name): + " get value out of registry " + v, t = wreg.QueryValueEx(self.key, name) + return v, t + + def close(self): + " close the key finally " + if hasattr(self, 'key'): + self.key.Close() + if hasattr(self, 'reg'): + self.reg.Close() + + def __del__(self): + self.close() + + def get_registry_value(vname, subname): + r = WindowsRegistry(vname) + v, t = r.get(subname) + return v + + javaversion = get_registry_value(r"SOFTWARE\JavaSoft\Java Development Kit", "CurrentVersion") + JAVAHOME = get_registry_value(r"SOFTWARE\JavaSoft\Java Development Kit\%s" % javaversion, "JavaHome") + + except: + JAVAHOME = 'c:/Program Files/Java/jdk1.6.0_18' diff --git a/jcc/java/org/apache/jcc/PythonException.java b/jcc/java/org/apache/jcc/PythonException.java new file mode 100644 index 0000000..4eae86b --- /dev/null +++ b/jcc/java/org/apache/jcc/PythonException.java @@ -0,0 +1,57 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.jcc; + + +public class PythonException extends RuntimeException { + public boolean withTrace = true; + protected String message, errorName, traceback; + + public PythonException(String message) + { + super(message); + getErrorInfo(); // sets errorName, message and traceback + } + + public String getMessage(boolean trace) + { + if (message == null) + message = super.getMessage(); + + if (trace) + return message + "\n" + traceback; + + return message; + } + + public String getMessage() + { + return getMessage(withTrace); + } + + public String getErrorName() + { + return errorName; + } + + public String getTraceback() + { + return traceback; + } + + protected native void getErrorInfo(); + public native void clear(); +} diff --git a/jcc/java/org/apache/jcc/PythonVM.java b/jcc/java/org/apache/jcc/PythonVM.java new file mode 100644 index 0000000..88020e8 --- /dev/null +++ b/jcc/java/org/apache/jcc/PythonVM.java @@ -0,0 +1,112 @@ +/* ==================================================================== + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.jcc; + + +public class PythonVM { + static protected PythonVM vm; + + static { + System.loadLibrary("jcc"); + } + + /** + * Start the embedded Python interpreter. The specified + * program name and args are set into the Python variable sys.argv. + * This returns an instance of the Python VM; it may be called + * multiple times, and will return the same VM instance each time. + * + * @param programName the name of the Python program, typically + * /usr/bin/python. This is informational; the program is not + * actually executed. + * @param args additional arguments to be put into sys.argv. + * @return a singleton instance of PythonVM + */ + static public PythonVM start(String programName, String[] args) + { + if (vm == null) + { + vm = new PythonVM(); + vm.init(programName, args); + } + + return vm; + } + + /** + * Start the embedded Python interpreter. The specified + * program name is set into the Python variable sys.argv[0]. + * This returns an instance of the Python VM; it may be called + * multiple times, and will return the same VM instance each time. + * + * @param programName the name of the Python program, typically + * /usr/bin/python. This is informational; the program is not + * actually executed. + * @return a singleton instance of PythonVM + */ + static public PythonVM start(String programName) + { + return start(programName, null); + } + + /** + * Obtain the PythonVM instance, or null if the Python VM + * has not yet been started. + * + * @return a singleton instance of PythonVM, or null + */ + static public PythonVM get() + { + return vm; + } + + protected PythonVM() + { + } + + protected native void init(String programName, String[] args); + + /** + * Instantiate the specified Python class, and return the instance. + * + * @param moduleName the Python module the class is defined in + * @param className the Python class to instantiate. + * @return a handle on the Python instance. + */ + public native Object instantiate(String moduleName, String className) + throws PythonException; + + /** + * Bump the Python thread state counter. Every thread should + * do this before calling into Python, to prevent the Python + * thread state from being inadvertently collected (and causing loss + * of thread-local variables) + * + * @return the Python thread state counter. A return value less + * than zero signals an error. + */ + public native int acquireThreadState(); + + /** + * Release the Python thread state counter. Every thread that has + * called acquireThreadState() should call this before + * terminating. + * + * @return the Python thread state counter. A return value less + * than zero signals an error. + */ + public native int releaseThreadState(); +} diff --git a/jcc/jcc/__init__.py b/jcc/jcc/__init__.py new file mode 100644 index 0000000..4822fcb --- /dev/null +++ b/jcc/jcc/__init__.py @@ -0,0 +1,37 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# jcc package + +import os, sys + +if sys.platform == 'win32': + + if '--find-jvm-dll' in sys.argv: + from windows import add_jvm_dll_directory_to_path + add_jvm_dll_directory_to_path() + + from jcc.config import SHARED + if SHARED: + path = os.environ['Path'].split(os.pathsep) + eggpath = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + if eggpath not in path: + path.insert(0, eggpath) + os.environ['Path'] = os.pathsep.join(path) + +if __name__ == '__main__': + import jcc.__main__ +else: + from _jcc import initVM + +CLASSPATH=os.path.join(os.path.abspath(os.path.dirname(__file__)), "classes") +_jcc.CLASSPATH = CLASSPATH diff --git a/jcc/jcc/__main__.py b/jcc/jcc/__main__.py new file mode 100644 index 0000000..90f4854 --- /dev/null +++ b/jcc/jcc/__main__.py @@ -0,0 +1,98 @@ + +import sys + +from jcc import cpp + +if len(sys.argv) == 1 or '--help' in sys.argv: + help = ''' + JCC - C++/Python Java Native Interface Code Generator + + Usage: python -m jcc.__main__ [options] [actions] + + Input options: + --jar JARFILE - make JCC wrap all public classes found in + JARFILE, add it to the module's CLASSPATH and + include it in the distribution + --include JARFILE - include JARFILE in the distribution and add + it to the module's CLASSPATH + --import MODULE - link against the wrappers to classes shared + with MODULE instead of generating duplicate + and incompatible wrappers + --exclude CLASS - explicitly don't wrap CLASS + --package PACKAGE - add PACKAGE to the list of packages from + which dependencies are automatically wrapped + --classpath [PATH|JAR] - add [PATH|JAR] to CLASSPATH while generating + wrappers + --libpath [PATH] - add [PATH] to java.library.path while generating + wrappers + --module MODULE - include Python MODULE in the distribution + --reserved SYMBOL - mark SYMBOL as a reserved word that will be + mangled in the generated C++ code to avoid + clashes with C/C++ reserved words or header + file definitions + --vmarg - add extra Java VM initialization parameter + --resources - include resource directory in distribution as + package data + + Python wrapper generation options: + --python NAME - generate wrappers for use from Python in a module + called NAME + --version VERSION - the generated module's version number + --shared - generate a module that links against a shared + library version of the JCC runtime so that + multiple JCC-wrapped modules can be used within + the same Python runtime + --sequence CLASS METHODSIGNATURE + - generate a pythonic sequence protocol wrapper for + CLASS + --mapping CLASS METHODSIGNATURE1 METHODSIGNATURE2 + - generate a pythonic map protocol wrapper for CLASS + --rename CLASS1=NAME1,CLASS2=NAME2,... + - rename one or more Python wrapper classes to + avoid name clashes due to the flattening of + the Java package namespaces as mapped into + Python + --no-generics - disable support for Java generics + + If you're planning to use pythonic wrappers you should read the relevant + documentation first: + http://lucene.apache.org/pylucene/jcc/documentation/readme.html#python + + Output options: + --debug - generate a module using the C++ compiler's + debug options + --output OUTPUTDIR - the wrapper will be generated in OUTPUTDIR, + 'build' by default + --files N - split the generated wrapper file into at least + N files to workaround C++ compiler file size + limitations + --arch - Mac OS X only: filter the -arch parameters + Python was configured with to build leaner + binaries, faster + --find-jvm-dll - Windows only: extract the directory containing + jvm.dll from the registry and append it to the + Path at runtime + + Actions: + --build - generate the wrapper and compile it + --compile - recompile the (previously generated) module + --install - install the wrapper in the local site-packages + + Distribution actions: + --use-distutils - use distutils even when setuptools is available + --bdist - generate a binary distutils-based distribution + or a setuptools-based .egg + --wininst - create an installer application for Microsoft + Windows + + Other distutils/setuptools options (there are passed right through): + --compiler COMPILER - use COMPILER instead of the platform default + --root ROOTDIR + --install-dir INSTALLDIR + --prefix PREFIX + --home HOMEDIR +''' + print help + sys.exit(0) + +cpp.jcc(sys.argv) diff --git a/jcc/jcc/cpp.py b/jcc/jcc/cpp.py new file mode 100644 index 0000000..7550e59 --- /dev/null +++ b/jcc/jcc/cpp.py @@ -0,0 +1,1152 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, sys, zipfile, _jcc + +python_ver = '%d.%d.%d' %(sys.version_info[0:3]) +if python_ver < '2.4': + from sets import Set as set + + def split_pkg(string, sep): + parts = string.split(sep) + if len(parts) > 1: + return sep.join(parts[:-1]), parts[-1] + return parts + + def sort(list, fn=None, key=None): + if fn: + list.sort(fn) + elif key: + def fn(x, y): + return cmp(key(x), key(y)) + list.sort(fn) + else: + list.sort() + +else: + def split_pkg(string, sep): + return string.rsplit(sep, 1) + + def sort(list, fn=None, key=None): + if fn: + list.sort(cmp=fn) + elif key: + list.sort(key=key) + else: + list.sort() + + +class JavaError(Exception): + + def getJavaException(self): + return self.args[0] + + def __str__(self): + writer = StringWriter() + self.getJavaException().printStackTrace(PrintWriter(writer)) + + return '\n'.join((super(JavaError, self).__str__(), + "Java stacktrace:", str(writer))) + + +class InvalidArgsError(Exception): + pass + + +_jcc._set_exception_types(JavaError, InvalidArgsError) +from _jcc import findClass as _findClass +from _jcc import * + + +def findClass(className): + + try: + cls = _findClass(className) + except: + print >>sys.stderr, "While loading", className + raise + + if cls is None: + raise ValueError, className + + return cls + + +INDENT = ' ' +HALF_INDENT = ' ' + +PRIMITIVES = { 'boolean': 'Z', + 'byte': 'B', + 'char': 'C', + 'double': 'D', + 'float': 'F', + 'int': 'I', + 'long': 'J', + 'short': 'S', + 'void': 'V' } + +RESERVED = set(['delete', 'and', 'or', 'not', 'xor', 'union', 'register', + 'const', 'bool', 'operator', 'typeof', 'asm', + 'NULL', 'DOMAIN', 'IGNORE']) + +def cppname(name): + + if name in RESERVED: + return name + '$' + + return name + + +def cppnames(names): + + return [cppname(name) for name in names] + + +def absname(names): + + if names: + return "::%s" %('::'.join(names)) + + return '' + + +def typename(cls, current, const): + + if cls.isArray(): + componentType = cls.getComponentType() + name = 'JArray< %s >' %(typename(componentType, current, False)) + + elif cls.isPrimitive(): + name = cls.getName() + if name != 'void': + name = 'j' + name + const = False + + elif cls == current: + name = cppname(cls.getName().split('.')[-1]) + + else: + name = absname([cppname(name) for name in cls.getName().split('.')]) + + if const: + return "const %s &" %(name) + + return name + + +def argnames(params, cls): + + if not params: + return '', '' + + count = len(params) + decls = ', '.join(["%s a%d" %(typename(params[i], cls, True), i) + for i in xrange(count)]) + args = ', '.join(['a%d%s' %(i, not params[i].isPrimitive() and '.this$' or '') + for i in xrange(count)]) + + return decls, ', ' + args + + +def line(out, indent=0, string='', *args): + + out.write(INDENT * indent) + out.write(string % args) + out.write('\n') + + +def known(cls, typeset, declares, packages, excludes, generics): + + if generics: + if Class.instance_(cls): + cls = Class.cast_(cls) + elif ParameterizedType.instance_(cls): + pt = ParameterizedType.cast_(cls) + if not known(pt.getRawType(), typeset, declares, packages, excludes, + True): + return False + for ta in pt.getActualTypeArguments(): + if TypeVariable.instance_(ta): + continue + if not known(ta, typeset, declares, packages, excludes, True): + return False + return True + elif WildcardType.instance_(cls): + wc = WildcardType.cast_(cls) + for ub in wc.getUpperBounds(): + if not known(ub, typeset, declares, packages, excludes, True): + return False + return True + elif TypeVariable.instance_(cls): + for bounds in TypeVariable.cast_(cls).getBounds(): + if not known(bounds, typeset, declares, packages, excludes, + True): + return False + return True + elif GenericArrayType.instance_(cls): + return known(GenericArrayType.cast_(cls).getGenericComponentType(), + typeset, declares, packages, excludes, True) + else: + raise TypeError, (cls, cls.getClass()) + + while cls.isArray(): + cls = cls.getComponentType() + + className = cls.getName() + if className.split('$', 1)[0] in excludes or className in excludes: + return False + + if cls.isPrimitive(): + return True + + if cls in typeset: + declares.add(cls) + return True + + if split_pkg(className, '.')[0] in packages: + typeset.add(cls) + declares.add(cls) + cls = cls.getSuperclass() + while cls and cls not in typeset: + typeset.add(cls) + cls = cls.getSuperclass() + return True + + return False + + +def addRequiredTypes(cls, typeset, generics): + + if generics: + if Class.instance_(cls): + cls = Class.cast_(cls) + if not (cls.isPrimitive() or cls in typeset): + typeset.add(cls) + cls = cls.getGenericSuperclass() + if cls is not None: + addRequiredTypes(cls, typeset, True) + elif ParameterizedType.instance_(cls): + pt = ParameterizedType.cast_(cls) + addRequiredTypes(pt.getRawType(), typeset, True) + for ta in pt.getActualTypeArguments(): + addRequiredTypes(ta, typeset, True) + elif GenericArrayType.instance_(cls): + gat = GenericArrayType.cast_(cls) + addRequiredTypes(gat.getGenericComponentType(), typeset, True) + elif not (TypeVariable.instance_(cls) or WildcardType.instance_(cls)): + raise NotImplementedError, repr(cls) + else: + if cls not in typeset: + typeset.add(cls) + cls = cls.getSuperclass() + if cls is not None: + addRequiredTypes(cls, typeset, False) + + +def find_method(cls, name, params): + + declared = False + while True: + try: + if declared: + method = cls.getDeclaredMethod(name, params) + else: + method = cls.getMethod(name, params) + break + except JavaError, e: + if (e.getJavaException().getClass().getName() == 'java.lang.NoSuchMethodException'): + if not declared: + declared = True + else: + cls = cls.getSuperclass() + if not cls: + return None + continue + raise + + modifiers = method.getModifiers() + if Modifier.isAbstract(modifiers): + return None + if Modifier.isPrivate(modifiers): + return None + + return method + + +def signature(fn, argsOnly=False): + + def typename(cls): + array = '' + while cls.isArray(): + array += '[' + cls = cls.getComponentType() + if cls.isPrimitive(): + return array + PRIMITIVES[cls.getName()] + return '%sL%s;' %(array, cls.getName().replace('.', '/')) + + if isinstance(fn, Constructor): + returnType = 'V' + elif isinstance(fn, Method): + returnType = typename(fn.getReturnType()) + elif isinstance(fn, Field): + return typename(fn.getType()) + + if argsOnly: + return '(%s)' %(''.join([typename(param) + for param in fn.getParameterTypes()])) + + return '(%s)%s' %(''.join([typename(param) + for param in fn.getParameterTypes()]), + returnType) + + +def forward(out, namespace, indent): + + for name, entries in namespace.iteritems(): + if entries is True: + line(out, indent, 'class %s;', cppname(name)) + else: + line(out, indent, 'namespace %s {', cppname(name)) + forward(out, entries, indent + 1) + line(out, indent, '}') + + +def expandjar(path): + + jar = zipfile.ZipFile(path, 'r') + + for member in jar.infolist(): + f = member.filename + if f.endswith('.class'): + yield f.split('.')[0].replace('/', '.') + + jar.close() + + +def jcc(args): + + classNames = set() + listedClassNames = set() + packages = set() + jars = [] + classpath = [_jcc.CLASSPATH] + libpath = [] + vmargs = ['-Djava.awt.headless=true'] + moduleName = None + modules = [] + build = False + install = False + recompile = False + output = 'build' + debug = False + excludes = [] + version = '' + mappings = {} + sequences = {} + renames = {} + env = None + wrapperFiles = 1 + prefix = None + root = None + install_dir = None + home_dir = None + use_distutils = False + shared = False + dist = False + wininst = False + find_jvm_dll = False + compiler = None + generics = hasattr(_jcc, "Type") + arch = [] + resources = [] + imports = {} + + i = 1 + while i < len(args): + arg = args[i] + if arg.startswith('-'): + if arg == '--jar': + i += 1 + classpath.append(args[i]) + classNames.update(expandjar(args[i])) + jars.append(args[i]) + elif arg == '--include': + i += 1 + classpath.append(args[i]) + jars.append(args[i]) + elif arg == '--package': + i += 1 + packages.add(args[i]) + elif arg == '--classpath': + i += 1 + classpath.append(args[i]) + elif arg == '--libpath': + i += 1 + libpath.append(args[i]) + elif arg == '--vmarg': + i += 1 + vmargs.append(args[i]) + elif arg == '--python': + from python import python, module + i += 1 + moduleName = args[i] + elif arg == '--module': + i += 1 + modules.append(args[i]) + elif arg == '--build': + from python import compile + build = True + elif arg == '--install': + from python import compile + install = True + elif arg == '--compile': + from python import compile + recompile = True + elif arg == '--output': + i += 1 + output = args[i] + elif arg == '--debug': + debug = True + elif arg == '--exclude': + i += 1 + excludes.append(args[i]) + elif arg == '--version': + i += 1 + version = args[i] + elif arg == '--mapping': + mappings[args[i + 1]] = args[i + 2] + i += 2 + elif arg == '--sequence': + sequences[args[i + 1]] = (args[i + 2], args[i + 3]) + i += 3 + elif arg == '--rename': + i += 1 + renames.update(dict([arg.split('=') + for arg in args[i].split(',')])) + elif arg == '--files': + i += 1 + wrapperFiles = args[i] + if wrapperFiles != 'separate': + wrapperFiles = int(wrapperFiles) + elif arg == '--prefix': + i += 1 + prefix = args[i] + elif arg == '--root': + i += 1 + root = args[i] + elif arg == '--install-dir': + i += 1 + install_dir = args[i] + elif arg == '--home': + i += 1 + home_dir = args[i] + elif arg == '--use-distutils': + use_distutils = True + elif arg == '--shared': + shared = True + elif arg == '--bdist': + from python import compile + dist = True + elif arg == '--wininst': + from python import compile + wininst = True + dist = True + elif arg == '--compiler': + i += 1 + compiler = args[i] + elif arg == '--reserved': + i += 1 + RESERVED.update(args[i].split(',')) + elif arg == '--arch': + i += 1 + arch.append(args[i]) + elif arg == '--no-generics': + generics = False + elif arg == '--find-jvm-dll': + find_jvm_dll = True + elif arg == '--resources': + i += 1 + resources.append(args[i]) + elif arg == '--import': + i += 1 + imports[args[i]] = () + else: + raise ValueError, "Invalid argument: %s" %(arg) + else: + classNames.add(arg) + listedClassNames.add(arg) + i += 1 + + if libpath: + vmargs.append('-Djava.library.path=' + os.pathsep.join(libpath)) + + env = initVM(os.pathsep.join(classpath) or None, + maxstack='512k', vmargs=' '.join(vmargs)) + + typeset = set() + excludes = set(excludes) + + if imports: + if shared: + imports = dict((__import__(import_), set()) for import_ in imports) + else: + raise ValueError, "--shared must be used when using --import" + + if recompile or not build and (install or dist): + if moduleName is None: + raise ValueError, 'module name not specified (use --python)' + else: + compile(env, os.path.dirname(args[0]), output, moduleName, + install, dist, debug, jars, version, + prefix, root, install_dir, home_dir, use_distutils, + shared, compiler, modules, wininst, find_jvm_dll, + arch, generics, resources, imports) + else: + if imports: + def walk((include, importset), dirname, names): + for name in names: + if name.endswith('.h'): + className = os.path.join(dirname[len(include) + 1:], + name[:-2]) + if os.path.sep != '/': + className = className.replace(os.path.sep, '/') + importset.add(findClass(className)) + for import_, importset in imports.iteritems(): + env._addClassPath(import_.CLASSPATH) + include = os.path.join(import_.__dir__, 'include') + os.path.walk(include, walk, (include, importset)) + typeset.update(importset) + typeset.add(findClass('java/lang/Object')) + typeset.add(findClass('java/lang/Class')) + typeset.add(findClass('java/lang/String')) + typeset.add(findClass('java/lang/Throwable')) + typeset.add(findClass('java/lang/Exception')) + typeset.add(findClass('java/lang/RuntimeException')) + if moduleName: + typeset.add(findClass('java/lang/Number')) + typeset.add(findClass('java/lang/Boolean')) + typeset.add(findClass('java/lang/Byte')) + typeset.add(findClass('java/lang/Character')) + typeset.add(findClass('java/lang/Double')) + typeset.add(findClass('java/lang/Float')) + typeset.add(findClass('java/lang/Integer')) + typeset.add(findClass('java/lang/Long')) + typeset.add(findClass('java/lang/Short')) + typeset.add(findClass('java/util/Iterator')) + typeset.add(findClass('java/util/Enumeration')) + typeset.add(findClass('java/io/StringWriter')) + typeset.add(findClass('java/io/PrintWriter')) + typeset.add(findClass('java/io/Writer')) + packages.add('java.lang') + + for className in classNames: + if className.split('$', 1)[0] in excludes or className in excludes: + continue + cls = findClass(className.replace('.', '/')) + if (Modifier.isPublic(cls.getModifiers()) or + className in listedClassNames): + addRequiredTypes(cls, typeset, generics) + + _dll_export = '' + if moduleName: + cppdir = os.path.join(output, '_%s' %(moduleName)) + if shared and sys.platform == 'win32': + _dll_export = "_dll_%s " %(moduleName) + else: + cppdir = output + + allInOne = wrapperFiles != 'separate' + if allInOne: + if not os.path.isdir(cppdir): + os.makedirs(cppdir) + if wrapperFiles <= 1: + out_cpp = file(os.path.join(cppdir, '__wrap__.cpp'), 'w') + else: + fileCount = 1 + fileName = '__wrap%02d__.cpp' %(fileCount) + out_cpp = file(os.path.join(cppdir, fileName), 'w') + + done = set() + for importset in imports.itervalues(): + done.update(importset) + + todo = typeset - done + if allInOne and wrapperFiles > 1: + classesPerFile = max(1, len(todo) / wrapperFiles) + classCount = 0 + while todo: + for cls in todo: + classCount += 1 + className = cls.getName() + names = className.split('.') + dir = os.path.join(cppdir, *names[:-1]) + if not os.path.isdir(dir): + os.makedirs(dir) + + fileName = os.path.join(dir, names[-1]) + out_h = file(fileName + '.h', "w") + line(out_h, 0, '#ifndef %s_H', '_'.join(names)) + line(out_h, 0, '#define %s_H', '_'.join(names)) + + (superCls, constructors, methods, protectedMethods, + fields, instanceFields, declares) = \ + header(env, out_h, cls, typeset, packages, excludes, + generics, _dll_export) + + if not allInOne: + out_cpp = file(fileName + '.cpp', 'w') + names, superNames = code(env, out_cpp, + cls, superCls, constructors, + methods, protectedMethods, + fields, instanceFields, + declares, typeset) + if moduleName: + python(env, out_h, out_cpp, + cls, superCls, names, superNames, + constructors, methods, protectedMethods, + fields, instanceFields, + mappings.get(className), sequences.get(className), + renames.get(className), + declares, typeset, moduleName, generics, + _dll_export) + + line(out_h) + line(out_h, 0, '#endif') + out_h.close() + + if not allInOne: + out_cpp.close() + elif wrapperFiles > 1: + if classCount >= classesPerFile: + out_cpp.close() + fileCount += 1 + fileName = '__wrap%02d__.cpp' %(fileCount) + out_cpp = file(os.path.join(cppdir, fileName), 'w') + classCount = 0 + + done.update(todo) + todo = typeset - done + + if allInOne: + out_cpp.close() + + if moduleName: + out = file(os.path.join(cppdir, moduleName) + '.cpp', 'w') + module(out, allInOne, done, imports, cppdir, moduleName, + shared, generics) + out.close() + if build or install or dist: + compile(env, os.path.dirname(args[0]), output, moduleName, + install, dist, debug, jars, version, + prefix, root, install_dir, home_dir, use_distutils, + shared, compiler, modules, wininst, find_jvm_dll, + arch, generics, resources, imports) + + +def header(env, out, cls, typeset, packages, excludes, generics, _dll_export): + + names = cls.getName().split('.') + superCls = cls.getSuperclass() + declares = set([cls.getClass()]) + + interfaces = [] + if generics: + for interface in cls.getGenericInterfaces(): + if Class.instance_(interface): + pt = None + interface = Class.cast_(interface) + elif ParameterizedType.instance_(interface): + pt = ParameterizedType.cast_(interface) + interface = Class.cast_(pt.getRawType()) + else: + raise NotImplementedError, repr(interface) + if superCls and interface.isAssignableFrom(superCls): + continue + if known(interface, typeset, declares, packages, excludes, False): + interfaces.append(interface) + if pt is not None: + for ta in pt.getActualTypeArguments(): + addRequiredTypes(ta, typeset, True) + else: + for interface in cls.getInterfaces(): + if superCls and interface.isAssignableFrom(superCls): + continue + if known(interface, typeset, declares, packages, excludes, False): + interfaces.append(interface) + + if cls.isInterface(): + if interfaces: + superCls = interfaces.pop(0) + else: + superCls = findClass('java/lang/Object') + superClsName = superCls.getName() + elif superCls: + superClsName = superCls.getName() + else: + superClsName = 'JObject' + + constructors = [] + for constructor in cls.getDeclaredConstructors(): + if Modifier.isPublic(constructor.getModifiers()): + if generics: + genericParams = constructor.getGenericParameterTypes() + params = constructor.getParameterTypes() + # It appears that the implicit instance-of-the-declaring-class + # parameter of a non-static inner class is missing from + # getGenericParameterTypes() + if len(params) == len(genericParams) + 1: + params[1:] = genericParams + else: + params = genericParams + if len(params) == 1: + if params[0] == cls: + continue + if ParameterizedType.instance_(params[0]): + param = ParameterizedType.cast_(params[0]) + if param.getRawType() == cls: + continue + else: + params = constructor.getParameterTypes() + if len(params) == 1 and params[0] == cls: + continue + for param in params: + if not known(param, typeset, declares, packages, excludes, + generics): + break + else: + constructors.append(constructor) + sort(constructors, key=lambda x: len(x.getParameterTypes())) + + methods = {} + protectedMethods = [] + for method in cls.getDeclaredMethods(): + modifiers = method.getModifiers() + if Modifier.isPublic(modifiers): + if generics: + returnType = method.getGenericReturnType() + else: + returnType = method.getReturnType() + if not known(returnType, typeset, declares, packages, excludes, + generics): + continue + sig = "%s:%s" %(method.getName(), signature(method, True)) + if sig in methods and returnType != cls: + continue + if generics: + params = method.getGenericParameterTypes() + else: + params = method.getParameterTypes() + for param in params: + if not known(param, typeset, declares, packages, excludes, + generics): + break + else: + methods[sig] = method + elif Modifier.isProtected(modifiers): + protectedMethods.append(method) + + def _compare(m0, m1): + value = cmp(m0.getName(), m1.getName()) + if value == 0: + value = len(m0.getParameterTypes()) - len(m1.getParameterTypes()) + return value + + methods = methods.values() + sort(methods, fn=_compare) + + for constructor in constructors: + if generics: + exceptions = constructor.getGenericExceptionTypes() + else: + exceptions = constructor.getExceptionTypes() + for exception in exceptions: + known(exception, typeset, declares, packages, excludes, generics) + for method in methods: + if generics: + exceptions = method.getGenericExceptionTypes() + else: + exceptions = method.getExceptionTypes() + for exception in exceptions: + known(exception, typeset, declares, packages, excludes, generics) + + fields = [] + instanceFields = [] + for field in cls.getDeclaredFields(): + modifiers = field.getModifiers() + if Modifier.isPublic(modifiers): + if generics: + fieldType = field.getGenericType() + else: + fieldType = field.getType() + if not known(fieldType, typeset, declares, packages, excludes, + generics): + continue + if Modifier.isStatic(modifiers): + fields.append(field) + else: + instanceFields.append(field) + sort(fields, key=lambda x: x.getName()) + sort(instanceFields, key=lambda x: x.getName()) + + line(out) + superNames = superClsName.split('.') + line(out, 0, '#include "%s.h"', '/'.join(superNames)) + + line(out, 0) + namespaces = {} + for declare in declares: + namespace = namespaces + if declare not in (cls, superCls): + declareNames = declare.getName().split('.') + for declareName in declareNames[:-1]: + namespace = namespace.setdefault(declareName, {}) + namespace[declareNames[-1]] = True + forward(out, namespaces, 0) + line(out, 0, 'template class JArray;') + + indent = 0; + line(out) + for name in names[:-1]: + line(out, indent, 'namespace %s {', cppname(name)) + indent += 1 + + line(out) + if superClsName == 'JObject': + line(out, indent, 'class %s%s : public JObject {', + _dll_export, cppname(names[-1])) + else: + line(out, indent, 'class %s%s : public %s {', + _dll_export, cppname(names[-1]), absname(cppnames(superNames))) + + line(out, indent, 'public:') + indent += 1 + + if methods or protectedMethods or constructors: + line(out, indent, 'enum {') + for constructor in constructors: + line(out, indent + 1, 'mid_init$_%s,', + env.strhash(signature(constructor))) + for method in methods: + line(out, indent + 1, 'mid_%s_%s,', method.getName(), + env.strhash(signature(method))) + for method in protectedMethods: + line(out, indent + 1, 'mid_%s_%s,', method.getName(), + env.strhash(signature(method))) + line(out, indent + 1, 'max_mid') + line(out, indent, '};') + + if instanceFields: + line(out) + line(out, indent, 'enum {') + for field in instanceFields: + line(out, indent + 1, 'fid_%s,', field.getName()) + line(out, indent + 1, 'max_fid') + line(out, indent, '};') + + line(out) + line(out, indent, 'static ::java::lang::Class *class$;'); + line(out, indent, 'static jmethodID *mids$;'); + if instanceFields: + line(out, indent, 'static jfieldID *fids$;'); + line(out, indent, 'static jclass initializeClass();'); + line(out) + + line(out, indent, 'explicit %s(jobject obj) : %s(obj) {', + cppname(names[-1]), absname(cppnames(superNames))) + line(out, indent + 1, 'if (obj != NULL)'); + line(out, indent + 2, 'initializeClass();') + line(out, indent, '}') + line(out, indent, '%s(const %s& obj) : %s(obj) {}', + cppname(names[-1]), cppname(names[-1]), + absname(cppnames(superNames))) + + if fields: + line(out) + for field in fields: + fieldType = field.getType() + fieldName = cppname(field.getName()) + if fieldType.isPrimitive(): + line(out, indent, 'static %s %s;', + typename(fieldType, cls, False), fieldName) + else: + line(out, indent, 'static %s *%s;', + typename(fieldType, cls, False), fieldName) + + if instanceFields: + line(out) + for field in instanceFields: + fieldType = field.getType() + fieldName = field.getName() + modifiers = field.getModifiers() + line(out, indent, '%s _get_%s() const;', + typename(fieldType, cls, False), fieldName) + if not Modifier.isFinal(modifiers): + line(out, indent, 'void _set_%s(%s) const;', + fieldName, typename(fieldType, cls, True)) + + if constructors: + line(out) + for constructor in constructors: + params = [typename(param, cls, True) + for param in constructor.getParameterTypes()] + line(out, indent, '%s(%s);', cppname(names[-1]), ', '.join(params)) + + if methods: + line(out) + for method in methods: + modifiers = method.getModifiers() + if Modifier.isStatic(modifiers): + prefix = 'static ' + const = '' + else: + prefix = '' + const = ' const' + params = [typename(param, cls, True) + for param in method.getParameterTypes()] + methodName = cppname(method.getName()) + line(out, indent, '%s%s %s(%s)%s;', + prefix, typename(method.getReturnType(), cls, False), + methodName, ', '.join(params), const) + + indent -= 1 + line(out, indent, '};') + + while indent: + indent -= 1 + line(out, indent, '}') + + return (superCls, constructors, methods, protectedMethods, + fields, instanceFields, declares) + + +def code(env, out, cls, superCls, constructors, methods, protectedMethods, + fields, instanceFields, declares, typeset): + + className = cls.getName() + names = className.split('.') + + if superCls: + superClsName = superCls.getName() + else: + superClsName = 'JObject' + superNames = superClsName.split('.') + + line(out, 0, '#include ') + line(out, 0, '#include "JCCEnv.h"') + line(out, 0, '#include "%s.h"', className.replace('.', '/')) + for declare in declares: + if declare not in (cls, superCls): + line(out, 0, '#include "%s.h"', declare.getName().replace('.', '/')) + line(out, 0, '#include "JArray.h"') + + indent = 0 + line(out) + for name in names[:-1]: + line(out, indent, 'namespace %s {', cppname(name)) + indent += 1 + + line(out) + line(out, indent, '::java::lang::Class *%s::class$ = NULL;', + cppname(names[-1])) + line(out, indent, 'jmethodID *%s::mids$ = NULL;', cppname(names[-1])) + if instanceFields: + line(out, indent, 'jfieldID *%s::fids$ = NULL;', cppname(names[-1])) + + for field in fields: + fieldType = field.getType() + fieldName = cppname(field.getName()) + typeName = typename(fieldType, cls, False) + if fieldType.isPrimitive(): + line(out, indent, '%s %s::%s = (%s) 0;', + typeName, cppname(names[-1]), fieldName, typeName) + else: + line(out, indent, '%s *%s::%s = NULL;', + typeName, cppname(names[-1]), fieldName) + + line(out) + line(out, indent, 'jclass %s::initializeClass()', cppname(names[-1])) + line(out, indent, '{') + line(out, indent + 1, 'if (!class$)') + line(out, indent + 1, '{') + line(out) + line(out, indent + 2, 'jclass cls = (jclass) env->findClass("%s");', + className.replace('.', '/')) + + if methods or protectedMethods or constructors: + line(out) + line(out, indent + 2, 'mids$ = new jmethodID[max_mid];') + for constructor in constructors: + sig = signature(constructor) + line(out, indent + 2, + 'mids$[mid_init$_%s] = env->getMethodID(cls, "", "%s");', + env.strhash(sig), sig) + isExtension = False + for method in methods: + methodName = method.getName() + if methodName == 'pythonExtension': + isExtension = True + sig = signature(method) + line(out, indent + 2, + 'mids$[mid_%s_%s] = env->get%sMethodID(cls, "%s", "%s");', + methodName, env.strhash(sig), + Modifier.isStatic(method.getModifiers()) and 'Static' or '', + methodName, sig) + for method in protectedMethods: + methodName = method.getName() + sig = signature(method) + line(out, indent + 2, + 'mids$[mid_%s_%s] = env->get%sMethodID(cls, "%s", "%s");', + methodName, env.strhash(sig), + Modifier.isStatic(method.getModifiers()) and 'Static' or '', + methodName, sig) + + if instanceFields: + line(out) + line(out, indent + 2, 'fids$ = new jfieldID[max_fid];') + for field in instanceFields: + fieldName = field.getName() + line(out, indent + 2, + 'fids$[fid_%s] = env->getFieldID(cls, "%s", "%s");', + fieldName, fieldName, signature(field)) + + line(out) + line(out, indent + 2, 'class$ = (::java::lang::Class *) new JObject(cls);') + + if fields: + line(out, indent + 2, 'cls = (jclass) class$->this$;') + line(out) + for field in fields: + fieldType = field.getType() + fieldName = field.getName() + if fieldType.isPrimitive(): + line(out, indent + 2, + '%s = env->getStatic%sField(cls, "%s");', + cppname(fieldName), fieldType.getName().capitalize(), + fieldName) + else: + line(out, indent + 2, + '%s = new %s(env->getStaticObjectField(cls, "%s", "%s"));', + cppname(fieldName), typename(fieldType, cls, False), + fieldName, signature(field)) + + line(out, indent + 1, '}') + line(out, indent + 1, 'return (jclass) class$->this$;') + line(out, indent, '}') + + for constructor in constructors: + line(out) + sig = signature(constructor) + decls, args = argnames(constructor.getParameterTypes(), cls) + + line(out, indent, "%s::%s(%s) : %s(env->newObject(initializeClass, &mids$, mid_init$_%s%s)) {}", + cppname(names[-1]), cppname(names[-1]), decls, + absname(cppnames(superNames)), + env.strhash(sig), args) + + for method in methods: + modifiers = method.getModifiers() + returnType = method.getReturnType() + params = method.getParameterTypes() + methodName = method.getName() + superMethod = None + isStatic = Modifier.isStatic(modifiers) + + if (isExtension and not isStatic and superCls and + Modifier.isNative(modifiers)): + superMethod = find_method(superCls, methodName, params) + if superMethod is None: + continue + + if isStatic: + qualifier = 'Static' + this = 'cls' + midns = '' + const = '' + else: + isStatic = False + if superMethod is not None: + qualifier = 'Nonvirtual' + this = 'this$, (jclass) %s::class$->this$' %(absname(cppnames(superNames))) + declaringClass = superMethod.getDeclaringClass() + midns = '%s::' %(typename(declaringClass, cls, False)) + else: + qualifier = '' + this = 'this$' + midns = '' + const = ' const' + + sig = signature(method) + decls, args = argnames(params, cls) + + line(out) + line(out, indent, '%s %s::%s(%s)%s', + typename(returnType, cls, False), cppname(names[-1]), + cppname(methodName), decls, const) + line(out, indent, '{') + if isStatic: + line(out, indent + 1, 'jclass cls = initializeClass();'); + if returnType.isPrimitive(): + line(out, indent + 1, + '%senv->call%s%sMethod(%s, %smids$[%smid_%s_%s]%s);', + not returnType.getName() == 'void' and 'return ' or '', + qualifier, returnType.getName().capitalize(), this, + midns, midns, methodName, env.strhash(sig), args) + else: + line(out, indent + 1, + 'return %s(env->call%sObjectMethod(%s, %smids$[%smid_%s_%s]%s));', + typename(returnType, cls, False), qualifier, this, + midns, midns, methodName, env.strhash(sig), args) + line(out, indent, '}') + + if instanceFields: + for field in instanceFields: + fieldType = field.getType() + fieldName = field.getName() + line(out) + line(out, indent, '%s %s::_get_%s() const', + typename(fieldType, cls, False), cppname(names[-1]), fieldName) + line(out, indent, '{') + if fieldType.isPrimitive(): + line(out, indent + 1, + 'return env->get%sField(this$, fids$[fid_%s]);', + fieldType.getName().capitalize(), fieldName) + else: + line(out, indent + 1, + 'return %s(env->getObjectField(this$, fids$[fid_%s]));', + typename(fieldType, cls, False), fieldName) + line(out, indent, '}') + + if not Modifier.isFinal(field.getModifiers()): + line(out) + line(out, indent, 'void %s::_set_%s(%s a0) const', + cppname(names[-1]), fieldName, + typename(fieldType, cls, True)) + line(out, indent, '{') + if fieldType.isPrimitive(): + line(out, indent + 1, + 'env->set%sField(this$, fids$[fid_%s], a0);', + fieldType.getName().capitalize(), fieldName) + else: + line(out, indent + 1, + 'env->setObjectField(this$, fids$[fid_%s], a0.this$);', + fieldName) + line(out, indent, '}') + + while indent: + indent -= 1 + line(out, indent, '}') + + return names, superNames + + +if __name__ == '__main__': + jcc(sys.argv) diff --git a/jcc/jcc/patches/patch.4195 b/jcc/jcc/patches/patch.4195 new file mode 100644 index 0000000..88b8088 --- /dev/null +++ b/jcc/jcc/patches/patch.4195 @@ -0,0 +1,18 @@ +--- runpy.py.old 2008-10-19 16:02:18.000000000 -0700 ++++ runpy.py 2008-10-19 16:13:44.000000000 -0700 +@@ -79,10 +79,13 @@ + loader = get_loader(mod_name) + if loader is None: + raise ImportError("No module named %s" % mod_name) + if loader.is_package(mod_name): +- raise ImportError(("%s is a package and cannot " + +- "be directly executed") % mod_name) ++ try: ++ return _get_module_details('.'.join((mod_name, '__main__'))) ++ except ImportError, e: ++ raise ImportError(("%s; %s is a package and cannot " + ++ "be directly executed") %(e, mod_name)) + code = loader.get_code(mod_name) + if code is None: + raise ImportError("No code object available for %s" % mod_name) + filename = _get_filename(loader, mod_name) diff --git a/jcc/jcc/patches/patch.43.0.6c11 b/jcc/jcc/patches/patch.43.0.6c11 new file mode 100644 index 0000000..a81cbfe --- /dev/null +++ b/jcc/jcc/patches/patch.43.0.6c11 @@ -0,0 +1,135 @@ +Index: setuptools/extension.py +=================================================================== +--- setuptools/extension.py (revision 75864) ++++ setuptools/extension.py (working copy) +@@ -28,6 +28,11 @@ + class Library(Extension): + """Just like a regular Extension, but built as a library instead""" + ++ def __init__(self, *args, **kwds): ++ self.force_shared = kwds.pop('force_shared', False) ++ Extension.__init__(self, *args, **kwds) ++ ++ + import sys, distutils.core, distutils.extension + distutils.core.Extension = Extension + distutils.extension.Extension = Extension +Index: setuptools/command/build_ext.py +=================================================================== +--- setuptools/command/build_ext.py (revision 75864) ++++ setuptools/command/build_ext.py (working copy) +@@ -85,8 +85,12 @@ + if fullname in self.ext_map: + ext = self.ext_map[fullname] + if isinstance(ext,Library): ++ if ext.force_shared and not use_stubs: ++ _libtype = 'shared' ++ else: ++ _libtype = libtype + fn, ext = os.path.splitext(filename) +- return self.shlib_compiler.library_filename(fn,libtype) ++ return self.shlib_compiler.library_filename(fn,_libtype) + elif use_stubs and ext._links_to_dynamic: + d,fn = os.path.split(filename) + return os.path.join(d,'dl-'+fn) +@@ -170,14 +174,22 @@ + def build_extension(self, ext): + _compiler = self.compiler + try: ++ force_shared = False + if isinstance(ext,Library): + self.compiler = self.shlib_compiler ++ force_shared = ext.force_shared and not use_stubs ++ if force_shared: ++ self.compiler.link_shared_object = \ ++ sh_link_shared_object.__get__(self.compiler) + _build_ext.build_extension(self,ext) + if ext._needs_stub: + self.write_stub( + self.get_finalized_command('build_py').build_lib, ext + ) + finally: ++ if force_shared: ++ self.compiler.link_shared_object = \ ++ link_shared_object.__get__(self.compiler) + self.compiler = _compiler + + def links_to_dynamic(self, ext): +@@ -244,44 +256,41 @@ + os.unlink(stub_file) + + +-if use_stubs or os.name=='nt': +- # Build shared libraries +- # +- def link_shared_object(self, objects, output_libname, output_dir=None, +- libraries=None, library_dirs=None, runtime_library_dirs=None, +- export_symbols=None, debug=0, extra_preargs=None, +- extra_postargs=None, build_temp=None, target_lang=None +- ): self.link( +- self.SHARED_LIBRARY, objects, output_libname, +- output_dir, libraries, library_dirs, runtime_library_dirs, +- export_symbols, debug, extra_preargs, extra_postargs, +- build_temp, target_lang +- ) +-else: +- # Build static libraries everywhere else +- libtype = 'static' ++def sh_link_shared_object(self, objects, output_libname, output_dir=None, ++ libraries=None, library_dirs=None, runtime_library_dirs=None, ++ export_symbols=None, debug=0, extra_preargs=None, ++ extra_postargs=None, build_temp=None, target_lang=None ++): self.link(self.SHARED_LIBRARY, objects, output_libname, ++ output_dir, libraries, library_dirs, runtime_library_dirs, ++ export_symbols, debug, extra_preargs, extra_postargs, ++ build_temp, target_lang) + +- def link_shared_object(self, objects, output_libname, output_dir=None, +- libraries=None, library_dirs=None, runtime_library_dirs=None, +- export_symbols=None, debug=0, extra_preargs=None, +- extra_postargs=None, build_temp=None, target_lang=None +- ): +- # XXX we need to either disallow these attrs on Library instances, +- # or warn/abort here if set, or something... +- #libraries=None, library_dirs=None, runtime_library_dirs=None, +- #export_symbols=None, extra_preargs=None, extra_postargs=None, +- #build_temp=None ++def st_link_shared_object(self, objects, output_libname, output_dir=None, ++ libraries=None, library_dirs=None, runtime_library_dirs=None, ++ export_symbols=None, debug=0, extra_preargs=None, ++ extra_postargs=None, build_temp=None, target_lang=None ++): ++ # XXX we need to either disallow these attrs on Library instances, ++ # or warn/abort here if set, or something... ++ #libraries=None, library_dirs=None, runtime_library_dirs=None, ++ #export_symbols=None, extra_preargs=None, extra_postargs=None, ++ #build_temp=None + +- assert output_dir is None # distutils build_ext doesn't pass this +- output_dir,filename = os.path.split(output_libname) +- basename, ext = os.path.splitext(filename) +- if self.library_filename("x").startswith('lib'): +- # strip 'lib' prefix; this is kludgy if some platform uses +- # a different prefix +- basename = basename[3:] ++ assert output_dir is None # distutils build_ext doesn't pass this ++ output_dir,filename = os.path.split(output_libname) ++ basename, ext = os.path.splitext(filename) ++ if self.library_filename("x").startswith('lib'): ++ # strip 'lib' prefix; this is kludgy if some platform uses ++ # a different prefix ++ basename = basename[3:] + +- self.create_static_lib( +- objects, basename, output_dir, debug, target_lang +- ) ++ self.create_static_lib(objects, basename, output_dir, debug, target_lang) + + ++if use_stubs or os.name=='nt': ++ # Build shared libraries ++ link_shared_object = sh_link_shared_object ++else: ++ # Build static libraries everywhere else (unless force_shared) ++ libtype = 'static' ++ link_shared_object = st_link_shared_object diff --git a/jcc/jcc/patches/patch.43.0.6c7 b/jcc/jcc/patches/patch.43.0.6c7 new file mode 100644 index 0000000..f34d7c2 --- /dev/null +++ b/jcc/jcc/patches/patch.43.0.6c7 @@ -0,0 +1,135 @@ +Index: setuptools/extension.py +=================================================================== +--- setuptools/extension.py (revision 66382) ++++ setuptools/extension.py (working copy) +@@ -28,6 +28,11 @@ + class Library(Extension): + """Just like a regular Extension, but built as a library instead""" + ++ def __init__(self, *args, **kwds): ++ self.force_shared = kwds.pop('force_shared', False) ++ Extension.__init__(self, *args, **kwds) ++ ++ + import sys, distutils.core, distutils.extension + distutils.core.Extension = Extension + distutils.extension.Extension = Extension +Index: setuptools/command/build_ext.py +=================================================================== +--- setuptools/command/build_ext.py (revision 66382) ++++ setuptools/command/build_ext.py (working copy) +@@ -84,8 +84,12 @@ + filename = _build_ext.get_ext_filename(self,fullname) + ext = self.ext_map[fullname] + if isinstance(ext,Library): ++ if ext.force_shared and not use_stubs: ++ _libtype = 'shared' ++ else: ++ _libtype = libtype + fn, ext = os.path.splitext(filename) +- return self.shlib_compiler.library_filename(fn,libtype) ++ return self.shlib_compiler.library_filename(fn,_libtype) + elif use_stubs and ext._links_to_dynamic: + d,fn = os.path.split(filename) + return os.path.join(d,'dl-'+fn) +@@ -170,14 +174,22 @@ + def build_extension(self, ext): + _compiler = self.compiler + try: ++ force_shared = False + if isinstance(ext,Library): + self.compiler = self.shlib_compiler ++ force_shared = ext.force_shared and not use_stubs ++ if force_shared: ++ self.compiler.link_shared_object = \ ++ sh_link_shared_object.__get__(self.compiler) + _build_ext.build_extension(self,ext) + if ext._needs_stub: + self.write_stub( + self.get_finalized_command('build_py').build_lib, ext + ) + finally: ++ if force_shared: ++ self.compiler.link_shared_object = \ ++ link_shared_object.__get__(self.compiler) + self.compiler = _compiler + + def links_to_dynamic(self, ext): +@@ -244,44 +256,41 @@ + os.unlink(stub_file) + + +-if use_stubs or os.name=='nt': +- # Build shared libraries +- # +- def link_shared_object(self, objects, output_libname, output_dir=None, +- libraries=None, library_dirs=None, runtime_library_dirs=None, +- export_symbols=None, debug=0, extra_preargs=None, +- extra_postargs=None, build_temp=None, target_lang=None +- ): self.link( +- self.SHARED_LIBRARY, objects, output_libname, +- output_dir, libraries, library_dirs, runtime_library_dirs, +- export_symbols, debug, extra_preargs, extra_postargs, +- build_temp, target_lang +- ) +-else: +- # Build static libraries everywhere else +- libtype = 'static' ++def sh_link_shared_object(self, objects, output_libname, output_dir=None, ++ libraries=None, library_dirs=None, runtime_library_dirs=None, ++ export_symbols=None, debug=0, extra_preargs=None, ++ extra_postargs=None, build_temp=None, target_lang=None ++): self.link(self.SHARED_LIBRARY, objects, output_libname, ++ output_dir, libraries, library_dirs, runtime_library_dirs, ++ export_symbols, debug, extra_preargs, extra_postargs, ++ build_temp, target_lang) + +- def link_shared_object(self, objects, output_libname, output_dir=None, +- libraries=None, library_dirs=None, runtime_library_dirs=None, +- export_symbols=None, debug=0, extra_preargs=None, +- extra_postargs=None, build_temp=None, target_lang=None +- ): +- # XXX we need to either disallow these attrs on Library instances, +- # or warn/abort here if set, or something... +- #libraries=None, library_dirs=None, runtime_library_dirs=None, +- #export_symbols=None, extra_preargs=None, extra_postargs=None, +- #build_temp=None ++def st_link_shared_object(self, objects, output_libname, output_dir=None, ++ libraries=None, library_dirs=None, runtime_library_dirs=None, ++ export_symbols=None, debug=0, extra_preargs=None, ++ extra_postargs=None, build_temp=None, target_lang=None ++): ++ # XXX we need to either disallow these attrs on Library instances, ++ # or warn/abort here if set, or something... ++ #libraries=None, library_dirs=None, runtime_library_dirs=None, ++ #export_symbols=None, extra_preargs=None, extra_postargs=None, ++ #build_temp=None + +- assert output_dir is None # distutils build_ext doesn't pass this +- output_dir,filename = os.path.split(output_libname) +- basename, ext = os.path.splitext(filename) +- if self.library_filename("x").startswith('lib'): +- # strip 'lib' prefix; this is kludgy if some platform uses +- # a different prefix +- basename = basename[3:] ++ assert output_dir is None # distutils build_ext doesn't pass this ++ output_dir,filename = os.path.split(output_libname) ++ basename, ext = os.path.splitext(filename) ++ if self.library_filename("x").startswith('lib'): ++ # strip 'lib' prefix; this is kludgy if some platform uses ++ # a different prefix ++ basename = basename[3:] + +- self.create_static_lib( +- objects, basename, output_dir, debug, target_lang +- ) ++ self.create_static_lib(objects, basename, output_dir, debug, target_lang) + + ++if use_stubs or os.name=='nt': ++ # Build shared libraries ++ link_shared_object = sh_link_shared_object ++else: ++ # Build static libraries everywhere else (unless force_shared) ++ libtype = 'static' ++ link_shared_object = st_link_shared_object diff --git a/jcc/jcc/python.py b/jcc/jcc/python.py new file mode 100644 index 0000000..e74ce55 --- /dev/null +++ b/jcc/jcc/python.py @@ -0,0 +1,1850 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os, sys, platform, shutil, _jcc +from itertools import izip + +from cpp import PRIMITIVES, INDENT, HALF_INDENT +from cpp import cppname, cppnames, absname, typename, findClass +from cpp import line, signature, find_method, split_pkg, sort +from cpp import Modifier, Class, Method +from config import INCLUDES, CFLAGS, DEBUG_CFLAGS, LFLAGS, IMPLIB_LFLAGS, \ + SHARED, VERSION as JCC_VER + +try: + from cpp import ParameterizedType, TypeVariable +except ImportError: + pass + +python_ver = '%d.%d.%d' %(sys.version_info[0:3]) +if python_ver < '2.4': + from sets import Set as set + + +RESULTS = { 'boolean': 'Py_RETURN_BOOL(%s);', + 'byte': 'return PyInt_FromLong((long) %s);', + 'char': 'return PyUnicode_FromUnicode((Py_UNICODE *) &%s, 1);', + 'double': 'return PyFloat_FromDouble((double) %s);', + 'float': 'return PyFloat_FromDouble((double) %s);', + 'int': 'return PyInt_FromLong((long) %s);', + 'long': 'return PyLong_FromLongLong((PY_LONG_LONG) %s);', + 'short': 'return PyInt_FromLong((long) %s);', + 'java.lang.String': 'return j2p(%s);' } + +CALLARGS = { 'boolean': ('O', '(%s ? Py_True : Py_False)', False), + 'byte': ('O', 'PyInt_FromLong(%s)', True), + 'char': ('O', 'PyUnicode_FromUnicode((Py_UNICODE *) &%s, 1)', True), + 'double': ('d', '(double) %s', False), + 'float': ('f', '(float) %s', False), + 'int': ('i', '(int) %s', False), + 'long': ('L', '(long long) %s', False), + 'short': ('i', '(int) %s', False), + 'java.lang.String': ('O', 'env->fromJString((jstring) %s, 0)', True) } + +BOXED = { 'java.lang.Boolean': (True, True), + 'java.lang.Byte': (True, True), + 'java.lang.Character': (True, True), + 'java.lang.CharSequence': (True, False), + 'java.lang.Double': (True, True), + 'java.lang.Float': (True, True), + 'java.lang.Integer': (True, True), + 'java.lang.Long': (True, True), + 'java.lang.Number': (True, False), + 'java.lang.Short': (True, True), + 'java.lang.String': (True, True) } + + +def is_boxed(clsName): + return BOXED.get(clsName, (False, False))[0] + +def is_unboxed(clsName): + return BOXED.get(clsName, (False, False))[1] + + +def getTypeParameters(cls): + + while True: + parameters = cls.getTypeParameters() + if parameters: + return parameters + cls = cls.getDeclaringClass() + if cls is None: + return [] + + +def getActualTypeArguments(pt): + + while True: + arguments = pt.getActualTypeArguments() + if arguments: + return arguments + pt = pt.getOwnerType() + if pt is None or not ParameterizedType.instance_(pt): + return [] + pt = ParameterizedType.cast_(pt) + + +def parseArgs(params, current, generics, genericParams=None): + + def signature(cls, genericPT=None): + if generics and TypeVariable.instance_(genericPT): + if cls.getName() == 'java.lang.Object': + gd = TypeVariable.cast_(genericPT).getGenericDeclaration() + if gd == current: + for clsParam in getTypeParameters(gd): + if genericPT == clsParam: + return 'O' + array = '' + while cls.isArray(): + array += '[' + cls = cls.getComponentType() + clsName = cls.getName() + if cls.isPrimitive(): + return array + PRIMITIVES[clsName] + if clsName == 'java.lang.String': + return array + 's' + if clsName == 'java.lang.Object': + return array + 'o' + if is_boxed(clsName): + return array + 'O' + if generics and getTypeParameters(cls): + return array + 'K' + else: + return array + 'k' + + def checkarg(cls, genericPT=None): + if generics and TypeVariable.instance_(genericPT): + if cls.getName() == 'java.lang.Object': + gd = TypeVariable.cast_(genericPT).getGenericDeclaration() + if gd == current: + i = 0 + for clsParam in getTypeParameters(gd): + if genericPT == clsParam: + return ', self->parameters[%d]' %(i) + i += 1 + while cls.isArray(): + cls = cls.getComponentType() + clsName = cls.getName() + if (cls.isPrimitive() or + clsName in ('java.lang.String', 'java.lang.Object')): + return '' + if is_boxed(clsName): + clsNames = clsName.split('.') + return ', &%s::PY_TYPE(%s)' %(absname(cppnames(clsNames[:-1])), cppname(clsNames[-1])) + return ', %s::initializeClass' %(typename(cls, current, False)) + + def callarg(cls, i): + if generics: + while cls.isArray(): + cls = cls.getComponentType() + if getTypeParameters(cls): + ns, sep, n = rpartition(typename(cls, current, False), '::') + return ', &a%d, &p%d, %s%st_%s::parameters_' %(i, i, ns, sep, n) + return ', &a%d' %(i) + + if genericParams: + sig = ''.join([signature(param, genericParam) + for param, genericParam in izip(params, genericParams)]) + chk = ''.join([checkarg(param, genericParam) + for param, genericParam in izip(params, genericParams)]) + else: + sig = ''.join([signature(param) for param in params]) + chk = ''.join([checkarg(param) for param in params]) + + return (sig, chk, + ''.join([callarg(params[i], i) for i in xrange(len(params))])) + + +def declareVars(out, indent, params, current, generics, typeParams): + + for i in xrange(len(params)): + param = params[i] + line(out, indent, '%s a%d%s;', + typename(param, current, False), i, + not param.isPrimitive() and '((jobject) NULL)' or '') + if generics: + while param.isArray(): + param = param.getComponentType() + if getTypeParameters(param): + line(out, indent, 'PyTypeObject **p%d;', i) + typeParams.add(i) + + +def construct(out, indent, cls, inCase, constructor, names, generics): + + if inCase: + line(out, indent, '{') + indent += 1 + + params = constructor.getParameterTypes() + if generics: + typeParams = set() + else: + typeParams = None + + count = len(params) + + declareVars(out, indent, params, cls, generics, typeParams) + line(out, indent, '%s object((jobject) NULL);', cppname(names[-1])) + + line(out) + if count: + line(out, indent, 'if (!parseArgs(args, "%s"%s%s))', + *parseArgs(params, cls, generics)) + line(out, indent, '{') + indent += 1 + + line(out, indent, 'INT_CALL(object = %s(%s));', + cppname(names[-1]), ', '.join(['a%d' %(i) for i in xrange(count)])) + line(out, indent, 'self->object = object;') + if inCase: + line(out, indent, 'break;') + + if count: + indent -= 1 + line(out, indent, '}') + + if inCase: + indent -= 1 + line(out, indent, '}') + + +def rpartition(string, sep): + + if python_ver >= '2.5.0': + return string.rpartition(sep) + else: + parts = split_pkg(string, sep) + if len(parts) == 1: + return ('', '', parts[0]) + return (parts[0], sep, parts[1]) + + +def fieldValue(cls, value, fieldType): + + if fieldType.isArray(): + fieldType = fieldType.getComponentType() + if fieldType.isArray(): + result = 'JArray(%s->this$).wrap(NULL)' + elif fieldType.isPrimitive(): + result = '%s->wrap()' + elif fieldType.getName() == 'java.lang.String': + result = 'JArray(%s->this$).wrap()' + else: + parts = rpartition(typename(fieldType, cls, False), '::') + result = 'JArray(%%s->this$).wrap(%s%st_%s::wrap_jobject)' %(parts) + + elif fieldType.getName() == 'java.lang.String': + result = 'j2p(*%s)' + + elif not fieldType.isPrimitive(): + parts = rpartition(typename(fieldType, cls, False), '::') + result = '%s%st_%s::wrap_Object(*%%s)' %(parts) + + else: + return value + + return result %(value) + + +def returnValue(cls, returnType, value, genericRT=None, typeParams=None): + + result = RESULTS.get(returnType.getName()) + if result: + return result %(value) + + if returnType.isArray(): + returnType = returnType.getComponentType() + depth = 1 + while returnType.isArray(): + returnType = returnType.getComponentType() + depth += 1 + if depth > 1: + return 'return JArray(%s.this$).wrap(NULL);' %(value) + elif returnType.isPrimitive(): + return 'return %s.wrap();' %(value) + elif returnType.getName() == 'java.lang.String': + return 'return JArray(%s.this$).wrap();' %(value) + + ns, sep, n = rpartition(typename(returnType, cls, False), '::') + return 'return JArray(%s.this$).wrap(%s%st_%s::wrap_jobject);' %(value, ns, sep, n) + + ns, sep, n = rpartition(typename(returnType, cls, False), '::') + if genericRT is not None: + if ParameterizedType.instance_(genericRT): + genericRT = ParameterizedType.cast_(genericRT) + clsArgs = [] + for clsArg in getActualTypeArguments(genericRT): + if Class.instance_(clsArg): + clsNames = Class.cast_(clsArg).getName().split('.') + clsArg = '&%s::PY_TYPE(%s)' %(absname(cppnames(clsNames[:-1])), cppname(clsNames[-1])) + clsArgs.append(clsArg) + elif TypeVariable.instance_(clsArg): + gd = TypeVariable.cast_(clsArg).getGenericDeclaration() + if Class.instance_(gd): + i = 0 + for clsParam in getTypeParameters(gd): + if clsArg == clsParam: + clsArgs.append('self->parameters[%d]' %(i)) + break + i += 1 + else: + break + else: + break + else: + break + else: + return 'return %s%st_%s::wrap_Object(%s, %s);' %(ns, sep, n, value, ', '.join(clsArgs)) + + elif TypeVariable.instance_(genericRT): + gd = TypeVariable.cast_(genericRT).getGenericDeclaration() + i = 0 + if Class.instance_(gd): + for clsParam in getTypeParameters(gd): + if genericRT == clsParam: + return 'return self->parameters[%d] != NULL ? wrapType(self->parameters[%d], %s.this$) : %s%st_%s::wrap_Object(%s);' %(i, i, value, ns, sep, n, value) + i += 1 + elif Method.instance_(gd): + for clsParam in getTypeParameters(gd): + if genericRT == clsParam and i in typeParams: + return 'return p%d != NULL && p%d[0] != NULL ? wrapType(p%d[0], %s.this$) : %s%st_%s::wrap_Object(%s);' %(i, i, i, value, ns, sep, n, value) + i += 1 + + return 'return %s%st_%s::wrap_Object(%s);' %(ns, sep, n, value) + + +def call(out, indent, cls, inCase, method, names, cardinality, isExtension, + generics): + + if inCase: + line(out, indent, '{') + indent += 1 + + name = method.getName() + modifiers = method.getModifiers() + params = method.getParameterTypes() + returnType = method.getReturnType() + if generics: + genericRT = method.getGenericReturnType() + genericParams = method.getGenericParameterTypes() + typeParams = set() + else: + genericRT = None + genericParams = None + typeParams = None + count = len(params) + + declareVars(out, indent, params, cls, generics, typeParams) + + returnName = returnType.getName() + if returnName != 'void': + line(out, indent, '%s result%s;', + typename(returnType, cls, False), + not returnType.isPrimitive() and '((jobject) NULL)' or '') + result = 'result = ' + else: + result = '' + + if cardinality and (count or not inCase): + s = cardinality > 1 and 's' or '' + line(out) + if isExtension and name == 'clone' and Modifier.isNative(modifiers): + line(out, indent, 'if (arg)') + else: + line(out, indent, 'if (!parseArg%s(arg%s, "%s"%s%s))', + s, s, *parseArgs(params, cls, generics, genericParams)) + line(out, indent, '{') + indent += 1 + + name = cppname(name) + if Modifier.isStatic(modifiers): + line(out, indent, 'OBJ_CALL(%s%s::%s(%s));', + result, absname(cppnames(names)), name, + ', '.join(['a%d' %(i) for i in xrange(count)])) + else: + line(out, indent, 'OBJ_CALL(%sself->object.%s(%s));', + result, name, ', '.join(['a%d' %(i) for i in xrange(count)])) + + if isExtension and name == 'clone' and Modifier.isNative(modifiers): + line(out) + line(out, indent, '%s object(result.this$);', typename(cls, cls, False)) + line(out, indent, 'if (PyObject_TypeCheck(arg, &PY_TYPE(FinalizerProxy)) &&') + line(out, indent, ' PyObject_TypeCheck(((t_fp *) arg)->object, self->ob_type))') + line(out, indent, '{') + line(out, indent + 1, 'PyObject *_arg = ((t_fp *) arg)->object;') + line(out, indent + 1, '((t_JObject *) _arg)->object = object;') + line(out, indent + 1, 'Py_INCREF(_arg);') + line(out, indent + 1, 'object.pythonExtension((jlong) (Py_intptr_t) (void *) _arg);') + line(out, indent + 1, 'Py_INCREF(arg);') + line(out, indent + 1, 'return arg;') + line(out, indent, '}') + line(out, indent, 'return PyErr_SetArgsError("%s", arg);' %(name)) + elif returnName != 'void': + line(out, indent, returnValue(cls, returnType, 'result', + genericRT, typeParams)) + else: + line(out, indent, 'Py_RETURN_NONE;') + if cardinality and (count or not inCase): + indent -= 1 + line(out, indent, '}') + + if inCase: + indent -= 1 + line(out, indent, '}') + + +def methodargs(methods, superMethods): + + if len(methods) == 1 and methods[0].getName() not in superMethods: + count = len(methods[0].getParameterTypes()) + if count == 0: + return '', '', 0 + elif count == 1: + return ', PyObject *arg', ', arg', 1 + + return ', PyObject *args', ', args', 2 + + +def jniname(cls): + + if cls.isPrimitive(): + name = cls.getName() + if name != 'void': + name = 'j' + name + else: + name = 'jobject' + + return name + + +def jniargs(params): + + count = len(params) + decls = ', '.join(['%s a%d' %(jniname(params[i]), i) + for i in xrange(count)]) + if decls: + return ', ' + decls + + return '' + + +def extension(env, out, indent, cls, names, name, count, method, generics): + + line(out, indent, 'jlong ptr = jenv->CallLongMethod(jobj, %s::mids$[%s::mid_pythonExtension_%s]);', + cppname(names[-1]), cppname(names[-1]), env.strhash('()J')) + line(out, indent, 'PyObject *obj = (PyObject *) (Py_intptr_t) ptr;') + + if name == 'pythonDecRef': + line(out) + line(out, indent, 'if (obj != NULL)') + line(out, indent, '{') + line(out, indent + 1, 'jenv->CallVoidMethod(jobj, %s::mids$[%s::mid_pythonExtension_%s], (jlong) 0);', + cppname(names[-1]), cppname(names[-1]), env.strhash('(J)V')) + line(out, indent + 1, 'env->finalizeObject(jenv, obj);') + line(out, indent, '}') + return + + line(out, indent, 'PythonGIL gil(jenv);') + + returnType = method.getReturnType() + returnName = returnType.getName() + if returnName != 'void': + line(out, indent, '%s value%s;', + typename(returnType, cls, False), + not returnType.isPrimitive() and '((jobject) NULL)' or '') + + sigs = [] + decrefs = [] + args = [] + i = 0 + for param in method.getParameterTypes(): + typeName = param.getName() + if typeName in CALLARGS: + sig, code, decref = CALLARGS[typeName] + elif param.isArray(): + param = param.getComponentType() + if param.isPrimitive(): + code = 'JArray(%%s).wrap()' %(param.getName()) + elif param.isArray(): + code = 'JArray(%s).wrap(NULL)' + elif param.getName() == 'java.lang.String': + code = 'JArray(%s).wrap()' + else: + parts = rpartition(typename(param, cls, False), '::') + code = 'JArray(%%s).wrap(%s%st_%s::wrap_jobject)' %(parts) + sig, decref = 'O', True + elif param.getName() == 'java.lang.String': + sig, code, decref = 'O', 'j2p(%%s))', True + else: + parts = rpartition(typename(param, cls, False), '::') + sig, code, decref = 'O', '%s%st_%s::wrap_Object(%s%s%s(%%s))' %(parts*2), True + if sig == 'O': + line(out, indent, 'PyObject *o%d = %s;', i, code %('a%d' %(i))) + args.append('o%d' %(i)) + else: + args.append(code %('a%d' %(i))) + sigs.append(sig) + decrefs.append(decref) + i += 1 + + args = ', '.join(args) + if args: + args = ', ' + args + line(out, indent, 'PyObject *result = PyObject_CallMethod(obj, "%s", "%s"%s);', + name, ''.join(sigs), args) + i = 0 + for decref in decrefs: + if decref: + line(out, indent, 'Py_DECREF(o%d);', i) + i += 1 + line(out, indent, 'if (!result)') + line(out, indent + 1, 'throwPythonError();') + if returnName == 'void': + line(out, indent, 'else') + line(out, indent + 1, 'Py_DECREF(result);') + else: + signature, check, x = parseArgs([returnType], cls, False) + line(out, indent, 'else if (parseArg(result, "%s"%s, &value))', + signature, check) + line(out, indent, '{') + line(out, indent + 1, 'throwTypeError("%s", result);', name) + line(out, indent + 1, 'Py_DECREF(result);') + line(out, indent, '}') + line(out, indent, 'else') + line(out, indent, '{') + if not returnType.isPrimitive(): + line(out, indent + 1, 'jobj = jenv->NewLocalRef(value.this$);') + line(out, indent + 1, 'Py_DECREF(result);') + if returnType.isPrimitive(): + line(out, indent + 1, 'return value;') + else: + line(out, indent + 1, 'return jobj;') + line(out, indent, '}') + line(out) + if returnType.isPrimitive(): + line(out, indent, 'return (j%s) 0;', returnName) + else: + line(out, indent, 'return (jobject) NULL;') + + +def python(env, out_h, out, cls, superCls, names, superNames, + constructors, methods, protectedMethods, fields, instanceFields, + mapping, sequence, rename, declares, typeset, moduleName, generics, + _dll_export): + + line(out_h) + line(out_h, 0, '#include ') + line(out_h) + + indent = 0 + for name in names[:-1]: + line(out_h, indent, 'namespace %s {', cppname(name)) + indent += 1 + line(out_h, indent, '%sextern PyTypeObject PY_TYPE(%s);', + _dll_export, names[-1]) + + if generics: + clsParams = getTypeParameters(cls) + else: + clsParams = None + + line(out_h) + line(out_h, indent, 'class %st_%s {', _dll_export, names[-1]) + line(out_h, indent, 'public:') + line(out_h, indent + 1, 'PyObject_HEAD') + line(out_h, indent + 1, '%s object;', cppname(names[-1])) + if clsParams: + line(out_h, indent + 1, 'PyTypeObject *parameters[%d];', len(clsParams)) + line(out_h, indent + 1, 'static PyTypeObject **parameters_(t_%s *self)', + cppname(names[-1])) + line(out_h, indent + 1, '{') + line(out_h, indent + 2, 'return (PyTypeObject **) &(self->parameters);') + line(out_h, indent + 1, '}') + + line(out_h, indent + 1, 'static PyObject *wrap_Object(const %s&);', + cppname(names[-1])) + line(out_h, indent + 1, 'static PyObject *wrap_jobject(const jobject&);') + if clsParams: + _clsParams = ', '.join(['PyTypeObject *'] * len(clsParams)) + line(out_h, indent + 1, 'static PyObject *wrap_Object(const %s&, %s);', + cppname(names[-1]), _clsParams) + line(out_h, indent + 1, 'static PyObject *wrap_jobject(const jobject&, %s);', _clsParams) + line(out_h, indent + 1, 'static void install(PyObject *module);') + line(out_h, indent + 1, 'static void initialize(PyObject *module);') + line(out_h, indent, '};') + + if env.java_version >= '1.5': + iterable = findClass('java/lang/Iterable') + iterator = findClass('java/util/Iterator') + else: + iterable = iterator = None + + enumeration = findClass('java/util/Enumeration') + + while indent: + indent -= 1 + line(out_h, indent, '}') + + line(out) + line(out, 0, '#include "structmember.h"') + line(out, 0, '#include "functions.h"') + line(out, 0, '#include "macros.h"') + + for inner in cls.getDeclaredClasses(): + if inner in typeset and not inner in declares: + if Modifier.isStatic(inner.getModifiers()): + line(out, 0, '#include "%s.h"', + inner.getName().replace('.', '/')) + + for method in methods: + if method.getName() == 'pythonExtension': + isExtension = True + break + else: + isExtension = False + + line(out) + indent = 0 + for name in names[:-1]: + line(out, indent, 'namespace %s {', cppname(name)) + indent += 1 + + line(out, indent, 'static PyObject *t_%s_cast_(PyTypeObject *type, PyObject *arg);', names[-1]) + line(out, indent, 'static PyObject *t_%s_instance_(PyTypeObject *type, PyObject *arg);', names[-1]) + if clsParams: + line(out, indent, + 'static PyObject *t_%s_of_(t_%s *self, PyObject *args);', + names[-1], names[-1]) + + if constructors: + line(out, indent, 'static int t_%s_init_(t_%s *self, PyObject *args, PyObject *kwds);', names[-1], names[-1]) + constructorName = 't_%s_init_' %(names[-1]) + else: + constructorName = 'abstract_init' + + if superCls: + superMethods = set([method.getName() + for method in superCls.getMethods()]) + else: + superMethods = () + + allMethods = {} + extMethods = {} + propMethods = {} + + if methods: + for method in methods: + modifiers = method.getModifiers() + name = method.getName() + params = method.getParameterTypes() + superMethod = None + isNative = Modifier.isNative(modifiers) + isStatic = Modifier.isStatic(modifiers) + + if (isExtension and not isStatic and superCls and isNative): + superMethod = find_method(superCls, name, params) + + if isExtension and isNative and not isStatic: + extMethods.setdefault(name, []).append(method) + + if superMethod or not (isExtension and isNative and not isStatic): + if isStatic: + if name in allMethods: + if Modifier.isStatic(allMethods[name][0].getModifiers()): + allMethods[name].append(method) + elif name + '_' in allMethods: + allMethods[name + '_'].append(method) + else: + print >>sys.stderr, " Warning: renaming static method '%s' on class %s to '%s_' since it is shadowed by non-static method of same name." %(name, '.'.join(names), name) + allMethods[name + '_'] = [method] + else: + allMethods[name] = [method] + else: + if name in allMethods: + if Modifier.isStatic(allMethods[name][0].getModifiers()): + print >>sys.stderr, " Warning: renaming static method '%s' on class %s to '%s_' since it is shadowed by non-static method of same name." %(name, '.'.join(names), name) + allMethods[name + '_'] = allMethods[name] + allMethods[name] = [method] + else: + allMethods[name].append(method) + else: + allMethods[name] = [method] + + if not (isExtension and isNative): + nameLen = len(name) + paramsLen = len(params) + if nameLen > 3 and paramsLen == 0 and name.startswith('get'): + if method.getReturnType().getName() != 'void': + propMethods.setdefault(name[3].lower() + name[4:], + []).append(method) + elif nameLen > 3 and paramsLen == 1 and name.startswith('set'): + propMethods.setdefault(name[3].lower() + name[4:], + []).append(method) + elif nameLen > 2 and paramsLen == 0 and name.startswith('is'): + if method.getReturnType().getName() != 'void': + propMethods.setdefault(name[2].lower() + name[3:], + []).append(method) + + properties = set([name for name in propMethods.iterkeys() + if name not in allMethods]) + propMethods = [(name, propMethods[name]) for name in properties] + sort(propMethods, key=lambda x: x[0]) + + extMethods = extMethods.items() + sort(extMethods, key=lambda x: x[0]) + allMethods = allMethods.items() + sort(allMethods, key=lambda x: x[0]) + + iteratorMethod = None + iteratorExt = False + nextMethod = None + nextExt = False + nextElementMethod = None + nextElementExt = False + + mappingMethod = None + if mapping: + mappingName, mappingSig = mapping.split(':') + + sequenceLenMethod = None + sequenceGetMethod = None + if sequence: + sequenceLenName, sequenceLenSig = sequence[0].split(':') + sequenceGetName, sequenceGetSig = sequence[1].split(':') + + for name, methods in allMethods: + args, x, cardinality = methodargs(methods, superMethods) + sort(methods, key=lambda x: len(x.getParameterTypes())) + method = methods[0] + modifiers = method.getModifiers() + if name == 'iterator' and iteratorMethod is None: + if (iterable is not None and + not method.getParameterTypes() and + iterable.isAssignableFrom(cls) and + iterator.isAssignableFrom(method.getReturnType())): + iteratorMethod = method + elif name == 'next' and nextMethod is None: + if (not method.getParameterTypes() and + not method.getReturnType().isPrimitive()): + nextMethod = method + elif name == 'nextElement' and nextElementMethod is None: + if (not method.getParameterTypes() and + not method.getReturnType().isPrimitive()): + nextElementMethod = method + elif mapping and name == mappingName and mappingMethod is None: + if signature(method) == mappingSig: + mappingMethod = (method, cardinality) + elif sequence and name == sequenceLenName and sequenceLenMethod is None: + if signature(method) == sequenceLenSig: + sequenceLenMethod = (method, cardinality) + elif sequence and name == sequenceGetName and sequenceGetMethod is None: + if signature(method) == sequenceGetSig: + sequenceGetMethod = (method, cardinality) + elif isExtension and name == 'clone' and Modifier.isNative(modifiers): + args, x, cardinality = ', PyObject *arg', ', arg', 1 + + if Modifier.isStatic(modifiers): + line(out, indent, 'static PyObject *t_%s_%s(PyTypeObject *type%s);', + names[-1], name, args) + else: + line(out, indent, 'static PyObject *t_%s_%s(t_%s *self%s);', + names[-1], name, names[-1], args) + + for name, methods in extMethods: + args, x, cardinality = methodargs(methods, superMethods) + sort(methods, key=lambda x: len(x.getParameterTypes())) + method = methods[0] + modifiers = method.getModifiers() + if name == 'iterator' and iteratorMethod is None: + if (iterable is not None and + not method.getParameterTypes() and + iterable.isAssignableFrom(cls) and + iterator.isAssignableFrom(method.getReturnType())): + iteratorMethod = method + iteratorExt = True + elif name == 'next' and nextMethod is None: + if (not method.getParameterTypes() and + not method.getReturnType().isPrimitive()): + nextMethod = method + nextExt = True + elif name == 'nextElement' and nextElementMethod is None: + if (not method.getParameterTypes() and + not method.getReturnType().isPrimitive()): + nextElementMethod = method + nextElementExt = True + + if isExtension: + count = 0 + for name, methods in extMethods: + for method in methods: + line(out, indent, + 'static %s JNICALL t_%s_%s%d(JNIEnv *jenv, jobject jobj%s);', + jniname(method.getReturnType()), names[-1], name, count, + jniargs(method.getParameterTypes())) + count += 1 + line(out, indent, 'static PyObject *t_%s_get__self(t_%s *self, void *data);', names[-1], names[-1]) + + if instanceFields: + for field in instanceFields: + fieldName = field.getName() + if fieldName not in properties: + line(out, indent, 'static PyObject *t_%s_get__%s(t_%s *self, void *data);', + names[-1], fieldName, names[-1]) + if not Modifier.isFinal(field.getModifiers()): + line(out, indent, 'static int t_%s_set__%s(t_%s *self, PyObject *arg, void *data);', + names[-1], field.getName(), names[-1]) + line(out) + + for fieldName, methods in propMethods: + getter = False + setter = False + for method in methods: + methodName = method.getName() + if not getter and (methodName.startswith('get') or + methodName.startswith('is')): + getter = True + line(out, indent, 'static PyObject *t_%s_get__%s(t_%s *self, void *data);', + names[-1], fieldName, names[-1]) + elif not setter and methodName.startswith('set'): + setter = True + line(out, indent, 'static int t_%s_set__%s(t_%s *self, PyObject *arg, void *data);', + names[-1], fieldName, names[-1]) + if clsParams: + line(out, indent, 'static PyObject *t_%s_get__parameters_(t_%s *self, void *data);', names[-1], names[-1]) + + if instanceFields or propMethods or isExtension or clsParams: + line(out, indent, 'static PyGetSetDef t_%s__fields_[] = {', names[-1]) + for field in instanceFields: + fieldName = field.getName() + if fieldName not in properties: + if Modifier.isFinal(field.getModifiers()): + line(out, indent + 1, 'DECLARE_GET_FIELD(t_%s, %s),', + names[-1], fieldName) + else: + line(out, indent + 1, 'DECLARE_GETSET_FIELD(t_%s, %s),', + names[-1], fieldName) + for fieldName, methods in propMethods: + getter = False + setter = False + for method in methods: + methodName = method.getName() + if not getter and (methodName.startswith('get') or + methodName.startswith('is')): + getter = True + elif not setter and methodName.startswith('set'): + setter = True + if getter and setter: + op = 'GETSET' + elif getter: + op = 'GET' + elif setter: + op = 'SET' + line(out, indent + 1, 'DECLARE_%s_FIELD(t_%s, %s),', + op, names[-1], fieldName) + if isExtension: + line(out, indent + 1, 'DECLARE_GET_FIELD(t_%s, self),', names[-1]) + if clsParams: + line(out, indent + 1, 'DECLARE_GET_FIELD(t_%s, parameters_),', + names[-1]) + + line(out, indent + 1, '{ NULL, NULL, NULL, NULL, NULL }') + line(out, indent, '};') + + line(out) + line(out, indent, 'static PyMethodDef t_%s__methods_[] = {', names[-1]) + + line(out, indent + 1, + 'DECLARE_METHOD(t_%s, cast_, METH_O | METH_CLASS),', names[-1]) + line(out, indent + 1, + 'DECLARE_METHOD(t_%s, instance_, METH_O | METH_CLASS),', names[-1]) + if clsParams: + line(out, indent + 1, + 'DECLARE_METHOD(t_%s, of_, METH_VARARGS),', names[-1]) + + for name, methods in allMethods: + modifiers = methods[0].getModifiers() + if len(methods) == 1 and not name in superMethods: + count = len(methods[0].getParameterTypes()) + if count == 0: + args = 'METH_NOARGS' + elif count == 1: + args = 'METH_O' + else: + args = 'METH_VARARGS' + elif isExtension and name == 'clone' and Modifier.isNative(modifiers): + args = 'METH_O' + else: + args = 'METH_VARARGS' + if Modifier.isStatic(modifiers): + args += ' | METH_CLASS' + + line(out, indent + 1, 'DECLARE_METHOD(t_%s, %s, %s),', + names[-1], name, args) + line(out, indent + 1, '{ NULL, NULL, 0, NULL }') + line(out, indent, '};') + + if instanceFields or propMethods or isExtension or clsParams: + tp_getset = 't_%s__fields_' %(names[-1]) + else: + tp_getset = '0' + + if iteratorMethod: + if iteratorExt: + tp_iter = 'get_extension_iterator' + else: + tp_iter = '((PyObject *(*)(t_%s *)) get_%siterator< t_%s >)' %(names[-1], clsParams and 'generic_' or '', names[-1]) + tp_iternext = '0' + elif nextMethod and iterable is not None and iterator.isAssignableFrom(cls): + tp_iter = 'PyObject_SelfIter' + returnName = typename(nextMethod.getReturnType(), cls, False) + ns, sep, n = rpartition(returnName, '::') + if nextExt: + tp_iternext = 'get_extension_next' + else: + tp_iternext = '((PyObject *(*)(::java::util::t_Iterator *)) get_%siterator_next< ::java::util::t_Iterator,%s%st_%s >)' %(clsParams and 'generic_' or '', ns, sep, n) + elif nextElementMethod and enumeration.isAssignableFrom(cls): + tp_iter = 'PyObject_SelfIter' + returnName = typename(nextElementMethod.getReturnType(), cls, False) + ns, sep, n = rpartition(returnName, '::') + if nextElementExt: + tp_iternext = 'get_extension_nextElement' + else: + tp_iternext = '((PyObject *(*)(::java::util::t_Enumeration *)) get_%senumeration_next< ::java::util::t_Enumeration,%s%st_%s >)' %(clsParams and 'generic_' or '', ns, sep, n) + elif nextMethod: + tp_iter = 'PyObject_SelfIter' + returnName = typename(nextMethod.getReturnType(), cls, False) + ns, sep, n = rpartition(returnName, '::') + if nextExt: + tp_iternext = 'get_extension_next' + else: + tp_iternext = '((PyObject *(*)(t_%s *)) get_%snext< t_%s,%s%st_%s,%s >)' %(names[-1], clsParams and 'generic_' or '', names[-1], ns, sep, n, returnName) + else: + tp_iter = '0' + tp_iternext = '0' + + if mappingMethod: + method, cardinality = mappingMethod + if cardinality > 1: + getName = 't_%s_%s_map_' %(names[-1], method.getName()) + line(out, indent, 'static PyObject *%s(t_%s *self, PyObject *key);', + getName, names[-1]) + else: + getName = 't_%s_%s' %(names[-1], method.getName()) + line(out) + line(out, indent, 'static PyMappingMethods t_%s_as_mapping = {', + names[-1]) + line(out, indent + 1, '0,') + line(out, indent + 1, '(binaryfunc) %s,', getName) + line(out, indent + 1, '0,') + line(out, indent, '};') + tp_as_mapping = '&t_%s_as_mapping' %(names[-1]) + else: + tp_as_mapping = '0' + + if sequenceLenMethod or sequenceGetMethod: + if sequenceLenMethod: + method, cardinality = sequenceLenMethod + lenName = 't_%s_%s_seq_' %(names[-1], method.getName()) + line(out, indent, 'static int %s(t_%s *self);', lenName, names[-1]) + else: + lenName = '0' + + if sequenceGetMethod: + method, cardinality = sequenceGetMethod + getName = 't_%s_%s_seq_' %(names[-1], method.getName()) + line(out, indent, 'static PyObject *%s(t_%s *self, int n);', + getName, names[-1]) + else: + getName = '0' + + line(out) + line(out, indent, 'static PySequenceMethods t_%s_as_sequence = {', + names[-1]) + if python_ver < '2.5.0': + line(out, indent + 1, '(inquiry) %s,', lenName) + line(out, indent + 1, '0,') + line(out, indent + 1, '0,') + line(out, indent + 1, '(intargfunc) %s', getName) + line(out, indent, '};') + else: + line(out, indent + 1, '(lenfunc) %s,', lenName) + line(out, indent + 1, '0,') + line(out, indent + 1, '0,') + line(out, indent + 1, '(ssizeargfunc) %s', getName) + line(out, indent, '};') + tp_as_sequence = '&t_%s_as_sequence' %(names[-1]) + else: + tp_as_sequence = '0' + + if len(superNames) > 1: + base = '::'.join((absname(cppnames(superNames[:-1])), superNames[-1])) + else: + base = superNames[-1] + line(out) + line(out, indent, 'DECLARE_TYPE(%s, t_%s, %s, %s, %s, %s, %s, %s, %s, %s);', + names[-1], names[-1], base, cppname(names[-1]), constructorName, + tp_iter, tp_iternext, tp_getset, tp_as_mapping, tp_as_sequence) + + if clsParams: + clsArgs = [] + for clsParam in clsParams: + clsArgs.append("PyTypeObject *%s" %(clsParam.getName())) + line(out, indent, + "PyObject *t_%s::wrap_Object(const %s& object, %s)", + cppname(names[-1]), names[-1], ', '.join(clsArgs)) + line(out, indent, "{") + line(out, indent + 1, "PyObject *obj = t_%s::wrap_Object(object);", + names[-1]) + line(out, indent + 1, "if (obj != NULL && obj != Py_None)") + line(out, indent + 1, "{") + line(out, indent + 2, "t_%s *self = (t_%s *) obj;", + names[-1], names[-1]) + i = 0; + for clsParam in clsParams: + line(out, indent + 2, "self->parameters[%d] = %s;", + i, clsParam.getName()) + i += 1 + line(out, indent + 1, "}") + line(out, indent + 1, "return obj;"); + line(out, indent, "}") + + line(out) + line(out, indent, + "PyObject *t_%s::wrap_jobject(const jobject& object, %s)", + cppname(names[-1]), ', '.join(clsArgs)) + line(out, indent, "{") + line(out, indent + 1, "PyObject *obj = t_%s::wrap_jobject(object);", + names[-1]) + line(out, indent + 1, "if (obj != NULL && obj != Py_None)") + line(out, indent + 1, "{") + line(out, indent + 2, "t_%s *self = (t_%s *) obj;", + names[-1], names[-1]) + i = 0; + for clsParam in clsParams: + line(out, indent + 2, "self->parameters[%d] = %s;", + i, clsParam.getName()) + i += 1 + line(out, indent + 1, "}") + line(out, indent + 1, "return obj;"); + line(out, indent, "}") + + line(out) + line(out, indent, 'void t_%s::install(PyObject *module)', names[-1]) + line(out, indent, '{') + line(out, indent + 1, 'installType(&PY_TYPE(%s), module, "%s", %d);', + names[-1], rename or names[-1], isExtension and 1 or 0) + for inner in cls.getDeclaredClasses(): + if inner in typeset: + if Modifier.isStatic(inner.getModifiers()): + innerName = inner.getName().split('.')[-1] + line(out, indent + 1, 'PyDict_SetItemString(PY_TYPE(%s).tp_dict, "%s", make_descriptor(&PY_TYPE(%s)));', + names[-1], innerName[len(names[-1])+1:], innerName) + line(out, indent, '}') + + line(out) + line(out, indent, 'void t_%s::initialize(PyObject *module)', names[-1]) + line(out, indent, '{') + line(out, indent + 1, 'PyDict_SetItemString(PY_TYPE(%s).tp_dict, "class_", make_descriptor(%s::initializeClass, %s));', + names[-1], cppname(names[-1]), generics and 1 or 0) + + if is_unboxed(cls.getName()): + wrapfn_ = "unbox%s" %(names[-1]) + boxfn_ = "box%s" %(names[-1]) + else: + wrapfn_ = "t_%s::wrap_jobject" %(names[-1]) + boxfn_ = "boxObject" + + line(out, indent + 1, 'PyDict_SetItemString(PY_TYPE(%s).tp_dict, "wrapfn_", make_descriptor(%s));', names[-1], wrapfn_) + line(out, indent + 1, 'PyDict_SetItemString(PY_TYPE(%s).tp_dict, "boxfn_", make_descriptor(%s));', names[-1], boxfn_) + + if isExtension: + line(out, indent + 1, 'jclass cls = %s::initializeClass();', + cppname(names[-1])) + elif fields: + line(out, indent + 1, '%s::initializeClass();', cppname(names[-1])) + + if isExtension: + count = 0 + line(out, indent + 1, 'JNINativeMethod methods[] = {') + for name, methods in extMethods: + for method in methods: + line(out, indent + 2, '{ "%s", "%s", (void *) t_%s_%s%d },', + name, signature(method), names[-1], name, count) + count += 1 + line(out, indent + 1, '};') + line(out, indent + 1, 'env->registerNatives(cls, methods, %d);', + count) + + for field in fields: + fieldType = field.getType() + fieldName = field.getName() + value = '%s::%s' %(cppname(names[-1]), cppname(fieldName)) + value = fieldValue(cls, value, fieldType) + line(out, indent + 1, 'PyDict_SetItemString(PY_TYPE(%s).tp_dict, "%s", make_descriptor(%s));', + names[-1], fieldName, value) + line(out, indent, '}') + + line(out) + line(out, indent, 'static PyObject *t_%s_cast_(PyTypeObject *type, PyObject *arg)', names[-1]) + line(out, indent, '{') + line(out, indent + 1, 'if (!(arg = castCheck(arg, %s::initializeClass, 1)))', cppname(names[-1])) + line(out, indent + 2, 'return NULL;') + line(out, indent + 1, 'return t_%s::wrap_Object(%s(((t_%s *) arg)->object.this$));', names[-1], cppname(names[-1]), names[-1]) + line(out, indent, '}') + + line(out, indent, 'static PyObject *t_%s_instance_(PyTypeObject *type, PyObject *arg)', names[-1]) + line(out, indent, '{') + line(out, indent + 1, 'if (!castCheck(arg, %s::initializeClass, 0))', cppname(names[-1])) + line(out, indent + 2, 'Py_RETURN_FALSE;') + line(out, indent + 1, 'Py_RETURN_TRUE;') + line(out, indent, '}') + + if clsParams: + line(out) + line(out, indent, + 'static PyObject *t_%s_of_(t_%s *self, PyObject *args)', + names[-1], names[-1]) + line(out, indent, '{') + line(out, indent + 1, + 'if (!parseArg(args, "T", %d, &(self->parameters)))', + len(clsParams)) + line(out, indent + 2, 'Py_RETURN_SELF;'); + line(out, indent + 1, + 'return PyErr_SetArgsError((PyObject *) self, "of_", args);') + line(out, indent, '}') + + if constructors: + line(out) + line(out, indent, 'static int t_%s_init_(t_%s *self, PyObject *args, PyObject *kwds)', names[-1], names[-1]) + line(out, indent, '{') + if len(constructors) > 1: + currLen = -1 + line(out, indent + 1, 'switch (PyTuple_GET_SIZE(args)) {') + withErr = False + for constructor in constructors: + params = constructor.getParameterTypes() + if len(params) != currLen: + if currLen >= 0: + withErr = True + line(out, indent + 2, 'goto err;') + currLen = len(params) + line(out, indent + 1, '%scase %d:', HALF_INDENT, currLen) + construct(out, indent + 2, cls, True, constructor, names, + generics) + line(out, indent + 1, '%sdefault:', HALF_INDENT) + if withErr: + line(out, indent + 1, '%serr:', HALF_INDENT) + line(out, indent + 2, 'PyErr_SetArgsError((PyObject *) self, "__init__", args);') + line(out, indent + 2, 'return -1;') + line(out, indent + 1, '}') + else: + construct(out, indent + 1, cls, False, constructors[0], names, + generics) + if constructors[0].getParameterTypes(): + line(out, indent + 1, 'else') + line(out, indent + 1, '{') + line(out, indent + 2, 'PyErr_SetArgsError((PyObject *) self, "__init__", args);') + line(out, indent + 2, 'return -1;') + line(out, indent + 1, '}') + + if isExtension: + line(out) + line(out, indent + 1, 'Py_INCREF((PyObject *) self);') + line(out, indent + 1, 'self->object.pythonExtension((jlong) (Py_intptr_t) (void *) self);') + + line(out) + line(out, indent + 1, 'return 0;') + line(out, indent , '}') + + for name, methods in allMethods: + line(out) + modifiers = methods[0].getModifiers() + + if isExtension and name == 'clone' and Modifier.isNative(modifiers): + declargs, args, cardinality = ', PyObject *arg', ', arg', 1 + else: + declargs, args, cardinality = methodargs(methods, superMethods) + + static = Modifier.isStatic(modifiers) + if static: + line(out, indent, 'static PyObject *t_%s_%s(PyTypeObject *type%s)', + names[-1], name, declargs) + else: + line(out, indent, 'static PyObject *t_%s_%s(t_%s *self%s)', + names[-1], name, names[-1], declargs) + + line(out, indent, '{') + if len(methods) > 1: + currLen = -1 + line(out, indent + 1, 'switch (PyTuple_GET_SIZE(args)) {') + for method in methods: + params = method.getParameterTypes() + if len(params) != currLen: + if currLen >= 0: + line(out, indent + 2, 'break;') + currLen = len(params) + line(out, indent + 1, '%scase %d:', HALF_INDENT, currLen) + call(out, indent + 2, cls, True, method, names, cardinality, + isExtension, generics) + line(out, indent + 1, '}') + else: + call(out, indent + 1, cls, False, methods[0], names, cardinality, + isExtension, generics) + + if args: + line(out) + if name in superMethods: + if static: + line(out, indent + 1, 'return callSuper(type, "%s"%s, %d);', + name, args, cardinality) + else: + line(out, indent + 1, 'return callSuper(&PY_TYPE(%s), (PyObject *) self, "%s"%s, %d);', + names[-1], name, args, cardinality) + else: + line(out, indent + 1, 'PyErr_SetArgsError(%s, "%s"%s);', + static and 'type' or '(PyObject *) self', name, args) + line(out, indent + 1, 'return NULL;') + + line(out, indent, '}') + + if isExtension: + count = 0 + for name, methods in extMethods: + for method in methods: + line(out) + line(out, indent, + 'static %s JNICALL t_%s_%s%d(JNIEnv *jenv, jobject jobj%s)', + jniname(method.getReturnType()), names[-1], name, count, + jniargs(method.getParameterTypes())) + count += 1 + line(out, indent, '{') + extension(env, out, indent + 1, cls, names, name, count, method, + generics) + line(out, indent, '}') + line(out) + line(out, indent, 'static PyObject *t_%s_get__self(t_%s *self, void *data)', + names[-1], names[-1]) + line(out, indent, '{') + indent += 1 + line(out, indent, 'jlong ptr;') + line(out, indent, 'OBJ_CALL(ptr = self->object.pythonExtension());') + line(out, indent, 'PyObject *obj = (PyObject *) (Py_intptr_t) ptr;') + line(out) + line(out, indent, 'if (obj != NULL)') + line(out, indent, '{') + line(out, indent + 1, 'Py_INCREF(obj);') + line(out, indent + 1, 'return obj;') + line(out, indent, '}') + line(out, indent, 'else') + line(out, indent + 1, 'Py_RETURN_NONE;') + indent -= 1 + line(out, indent, '}') + + if clsParams: + line(out, indent, 'static PyObject *t_%s_get__parameters_(t_%s *self, void *data)', names[-1], names[-1]) + line(out, indent, '{') + line(out, indent + 1, 'return typeParameters(self->parameters, sizeof(self->parameters));') + line(out, indent, '}') + + if instanceFields: + for field in instanceFields: + fieldName = field.getName() + if fieldName not in properties: + line(out) + fieldType = field.getType() + typeName = typename(fieldType, cls, False) + line(out, indent, 'static PyObject *t_%s_get__%s(t_%s *self, void *data)', + names[-1], fieldName, names[-1]) + line(out, indent, '{') + line(out, indent + 1, '%s value%s;', typeName, + not fieldType.isPrimitive() and '((jobject) NULL)' or '') + line(out, indent + 1, 'OBJ_CALL(value = self->object._get_%s());', + fieldName) + line(out, indent + 1, returnValue(cls, fieldType, 'value')) + line(out, indent, '}') + + if not Modifier.isFinal(field.getModifiers()): + line(out, indent, 'static int t_%s_set__%s(t_%s *self, PyObject *arg, void *data)', + names[-1], fieldName, names[-1]) + line(out, indent, '{') + line(out, indent + 1, '%s value%s;', typeName, + not fieldType.isPrimitive() and '((jobject) NULL)' or '') + sig, check, x = parseArgs([fieldType], cls, False) + line(out, indent + 1, 'if (!parseArg(arg, "%s"%s, &value))', + sig, check) + line(out, indent + 1, '{') + line(out, indent + 2, 'INT_CALL(self->object._set_%s(value));', + fieldName) + line(out, indent + 2, 'return 0;') + line(out, indent + 1, '}') + line(out, indent + 1, 'PyErr_SetArgsError((PyObject *) self, "%s", arg);', + fieldName) + line(out, indent + 1, 'return -1;') + line(out, indent, '}') + + if propMethods: + for fieldName, methods in propMethods: + line(out) + getter = None + setters = [] + sort(methods, key=lambda x: x.getName()) + for method in methods: + methodName = method.getName() + if not getter and (methodName.startswith('get') or + methodName.startswith('is')): + getter = method + elif methodName.startswith('set'): + setters.append(method) + + if getter: + methodName = getter.getName() + returnType = getter.getReturnType() + typeName = typename(returnType, cls, False) + line(out, indent, 'static PyObject *t_%s_get__%s(t_%s *self, void *data)', + names[-1], fieldName, names[-1]) + line(out, indent, '{') + line(out, indent + 1, '%s value%s;', typeName, + not returnType.isPrimitive() and '((jobject) NULL)' or '') + line(out, indent + 1, 'OBJ_CALL(value = self->object.%s());', + methodName) + line(out, indent + 1, returnValue(cls, returnType, 'value')) + line(out, indent, '}') + + if setters: + line(out, indent, 'static int t_%s_set__%s(t_%s *self, PyObject *arg, void *data)', + names[-1], fieldName, names[-1]) + line(out, indent, '{') + methodName = setters[0].getName() + for method in setters: + argType = method.getParameterTypes()[0] + typeName = typename(argType, cls, False) + line(out, indent + 1, '{') + line(out, indent + 2, '%s value%s;', typeName, + not argType.isPrimitive() and '((jobject) NULL)' or '') + sig, check, x = parseArgs([argType], cls, False) + line(out, indent + 2, 'if (!parseArg(arg, "%s"%s, &value))', + sig, check) + line(out, indent + 2, '{') + line(out, indent + 3, 'INT_CALL(self->object.%s(value));', + methodName) + line(out, indent + 3, 'return 0;') + line(out, indent + 2, '}') + line(out, indent + 1, '}') + line(out, indent + 1, 'PyErr_SetArgsError((PyObject *) self, "%s", arg);', + fieldName) + line(out, indent + 1, 'return -1;') + line(out, indent, '}') + + if mappingMethod: + method, cardinality = mappingMethod + if cardinality > 1: + methodName = method.getName() + getName = 't_%s_%s_map_' %(names[-1], methodName) + line(out) + line(out, indent, 'static PyObject *%s(t_%s *self, PyObject *arg)', + getName, names[-1]) + line(out, indent, '{') + call(out, indent + 1, cls, False, method, names, 1, isExtension, + generics) + line(out) + line(out, indent + 1, 'PyErr_SetArgsError((PyObject *) self, "%s", arg);', + methodName) + line(out, indent + 1, 'return NULL;') + line(out, indent, '}') + + if sequenceLenMethod: + method, cardinality = sequenceLenMethod + methodName = method.getName() + lenName = 't_%s_%s_seq_' %(names[-1], methodName) + line(out) + line(out, indent, 'static int %s(t_%s *self)', lenName, names[-1]) + line(out, indent, '{') + line(out, indent + 1, '%s len;', + typename(method.getReturnType(), cls, False)) + line(out, indent + 1, 'INT_CALL(len = self->object.%s());', methodName) + line(out, indent + 1, 'return (int) len;') + line(out, indent, '}') + + if sequenceGetMethod: + method, cardinality = sequenceGetMethod + methodName = method.getName() + returnType = method.getReturnType() + getName = 't_%s_%s_seq_' %(names[-1], methodName) + line(out) + line(out, indent, 'static PyObject *%s(t_%s *self, int n)', getName, names[-1]) + line(out, indent, '{') + line(out, indent + 1, '%s result%s;', + typename(returnType, cls, False), + not returnType.isPrimitive() and '((jobject) NULL)' or '') + line(out, indent + 1, 'OBJ_CALL(result = self->object.%s((%s) n));', + methodName, typename(method.getParameterTypes()[0], cls, False)) + if generics: + line(out, indent + 1, returnValue(cls, returnType, 'result', + method.getGenericReturnType())) + else: + line(out, indent + 1, returnValue(cls, returnType, 'result')) + line(out, indent, '}') + + while indent: + indent -= 1 + line(out, indent, '}') + + +def package(out, allInOne, cppdir, namespace, names): + + if not allInOne: + out = file(os.path.join(os.path.join(cppdir, *names), + '__init__.cpp'), 'w') + + if allInOne and not names or not allInOne: + line(out, 0, '#include ') + line(out, 0, '#include ') + line(out, 0, '#include "JCCEnv.h"') + line(out, 0, '#include "functions.h"') + + if not names: + line(out) + line(out, 0, 'PyObject *initVM(PyObject *module, PyObject *args, PyObject *kwds);') + + packages = [] + types = [] + + namespaces = namespace.items() + sort(namespaces, key=lambda x: x[0]) + for name, entries in namespaces: + if entries is True: + if names: + line(out, 0, '#include "%s/%s.h"', '/'.join(names), name) + else: + line(out, 0, '#include "%s.h"', name) + types.append(name) + else: + packages.append((name, entries)) + + indent = 0 + if names: + line(out) + for name in names: + line(out, indent, 'namespace %s {', cppname(name)) + indent += 1 + + line(out); + for name, entries in packages: + line(out, indent, 'namespace %s {', cppname(name)) + line(out, indent + 1, 'void __install__(PyObject *module);') + line(out, indent + 1, 'void __initialize__(PyObject *module);') + line(out, indent, '}') + + line(out) + line(out, indent, 'void __install__(PyObject *module)') + line(out, indent, '{') + for name in types: + line(out, indent + 1, 't_%s::install(module);', name) + for name, entries in packages: + line(out, indent + 1, '%s::__install__(module);', cppname(name)) + line(out, indent, '}') + + line(out) + if not names: + line(out, indent, 'PyObject *__initialize__(PyObject *module, PyObject *args, PyObject *kwds)') + line(out, indent, '{') + line(out, indent + 1, 'PyObject *env = initVM(module, args, kwds);') + line(out) + line(out, indent + 1, 'if (env == NULL)') + line(out, indent + 2, 'return NULL;') + line(out) + line(out, indent + 1, 'try {'); + indent += 1 + else: + line(out, indent, 'void __initialize__(PyObject *module)') + line(out, indent, '{') + for name in types: + line(out, indent + 1, 't_%s::initialize(module);', name) + for name, entries in packages: + line(out, indent + 1, '%s::__initialize__(module);', cppname(name)) + if not names: + line(out, indent + 1, 'return env;') + indent -= 1 + line(out, indent + 1, '} catch (int e) {') + line(out, indent + 2, 'switch(e) {') + line(out, indent + 2, ' case _EXC_JAVA:') + line(out, indent + 3, 'return PyErr_SetJavaError();') + line(out, indent + 2, ' default:') + line(out, indent + 3, 'throw;') + line(out, indent + 2, '}') + line(out, indent + 1, '}') + + line(out, indent, '}') + + while indent: + indent -= 1 + line(out, indent, '}') + + if not allInOne: + out.close() + else: + line(out) + + for name, entries in packages: + package(out, allInOne, cppdir, entries, names + (name,)) + + +def module(out, allInOne, classes, imports, cppdir, moduleName, + shared, generics): + + extname = '_%s' %(moduleName) + line(out, 0, '#include ') + line(out, 0, '#include "macros.h"') + line(out, 0, '#include "jccfuncs.h"') + + if allInOne: + out_init = file(os.path.join(cppdir, '__init__.cpp'), 'w') + namespaces = {} + for cls in classes: + for importset in imports.itervalues(): + if cls in importset: + break + else: + namespace = namespaces + classNames = cls.getName().split('.') + for className in classNames[:-1]: + namespace = namespace.setdefault(className, {}) + namespace[classNames[-1]] = True + if allInOne: + package(out_init, True, cppdir, namespaces, ()) + out_init.close() + else: + package(None, False, cppdir, namespaces, ()) + + line(out) + line(out, 0, 'PyObject *initJCC(PyObject *module);') + line(out, 0, 'void __install__(PyObject *module);') + line(out, 0, 'extern PyTypeObject PY_TYPE(JObject), PY_TYPE(ConstVariableDescriptor), PY_TYPE(FinalizerClass), PY_TYPE(FinalizerProxy);') + line(out, 0, 'extern void _install_jarray(PyObject *);') + line(out) + line(out, 0, 'extern "C" {') + + line(out) + line(out, 1, 'void init%s(void)', extname) + line(out, 1, '{') + line(out, 2, 'PyObject *module = Py_InitModule3("%s", jcc_funcs, "");', + extname); + line(out) + line(out, 2, 'initJCC(module);') + line(out) + line(out, 2, 'INSTALL_TYPE(JObject, module);') + line(out, 2, 'INSTALL_TYPE(ConstVariableDescriptor, module);') + line(out, 2, 'INSTALL_TYPE(FinalizerClass, module);') + line(out, 2, 'INSTALL_TYPE(FinalizerProxy, module);') + line(out, 2, '_install_jarray(module);') + line(out, 2, '__install__(module);') + line(out, 1, '}') + line(out, 0, '}') + + +def compile(env, jccPath, output, moduleName, install, dist, debug, jars, + version, prefix, root, install_dir, home_dir, use_distutils, + shared, compiler, modules, wininst, find_jvm_dll, arch, generics, + resources, imports): + + try: + if use_distutils: + raise ImportError + from setuptools import setup, Extension + with_setuptools = True + if shared and not SHARED: + raise NotImplementedError, "JCC was not built with --shared mode support, see JCC's INSTALL file for more information" + except ImportError: + if python_ver < '2.4': + raise ImportError, 'setuptools is required when using Python 2.3' + if shared: + raise ImportError, 'setuptools is required when using --shared' + from distutils.core import setup, Extension + with_setuptools = False + + extname = '_%s' %(moduleName) + + modulePath = os.path.join(output, moduleName) + if not os.path.isdir(modulePath): + os.makedirs(modulePath) + + out = file(os.path.join(modulePath, '__init__.py'), 'w') + line(out) + if shared: + line(out, 0, "import os, sys") + line(out) + line(out, 0, "if sys.platform == 'win32':") + if find_jvm_dll: + line(out, 1, "from jcc.windows import add_jvm_dll_directory_to_path") + line(out, 1, "add_jvm_dll_directory_to_path()") + line(out, 1, "import jcc, %s", extname) + line(out, 0, "else:") + line(out, 1, "import %s", extname) + else: + line(out, 0, 'import os, %s', extname) + line(out) + line(out, 0, '__dir__ = os.path.abspath(os.path.dirname(__file__))') + + package_data = [] + for jar in jars: + shutil.copy2(jar, modulePath) + package_data.append(os.path.basename(jar)) + if resources: + def copytree(src, dst): + _dst = os.path.join(modulePath, dst) + if not os.path.exists(_dst): + os.mkdir(_dst) + for name in os.listdir(src): + if name.startswith('.'): + continue + _src = os.path.join(src, name) + if os.path.islink(_src): + continue + _dst = os.path.join(dst, name) + if os.path.isdir(_src): + copytree(_src, _dst) + else: + shutil.copy2(_src, os.path.join(modulePath, _dst)) + package_data.append(_dst) + for resource in resources: + copytree(resource, os.path.split(resource)[-1]) + + packages = [moduleName] + package = [moduleName] + if modules: + for module in modules: + if os.path.isdir(module): + def copytree(src, dst, is_package): + if is_package: + packages.append('.'.join(package)) + if not os.path.exists(dst): + os.mkdir(dst) + for name in os.listdir(src): + if name.startswith('.'): + continue + _src = os.path.join(src, name) + if os.path.islink(_src): + continue + _dst = os.path.join(dst, name) + if os.path.isdir(_src): + package.append(os.path.basename(_src)) + copytree(_src, _dst, os.path.exists(os.path.join(_src, '__init__.py'))) + package.pop() + elif not is_package or name.endswith('.py'): + shutil.copy2(_src, _dst) + dst = modulePath + if os.path.exists(os.path.join(module, '__init__.py')): + dst = os.path.join(modulePath, os.path.basename(module)) + package.append(os.path.basename(module)) + copytree(module, dst, True) + package.pop() + else: + copytree(module, dst, False) + else: + shutil.copy2(module.split('.')[0] + '.py', modulePath) + + line(out) + line(out, 0, 'class JavaError(Exception):') + line(out, 1, 'def getJavaException(self):') + line(out, 2, 'return self.args[0]') + line(out, 1, 'def __str__(self):') + line(out, 2, 'writer = StringWriter()') + line(out, 2, 'self.getJavaException().printStackTrace(PrintWriter(writer))') + line(out, 2, 'return "\\n".join((super(JavaError, self).__str__(), " Java stacktrace:", str(writer)))') + line(out) + line(out, 0, 'class InvalidArgsError(Exception):') + line(out, 1, 'pass') + line(out) + line(out, 0, '%s._set_exception_types(JavaError, InvalidArgsError)', + extname) + + if version: + line(out) + line(out, 0, 'VERSION = "%s"', version) + + line(out, 0, 'CLASSPATH = [%s]' %(', '.join(['os.path.join(__dir__, "%s")' %(os.path.basename(jar)) for jar in jars]))) + line(out, 0, 'CLASSPATH = os.pathsep.join(CLASSPATH)') + line(out, 0, '%s.CLASSPATH = CLASSPATH', extname) + line(out, 0, '%s._set_function_self(%s.initVM, %s)', + extname, extname, extname) + + line(out) + for import_ in imports: + line(out, 0, 'from %s._%s import *', import_.__name__, import_.__name__) + line(out, 0, 'from %s import *', extname) + out.close() + + includes = [os.path.join(output, extname), + os.path.join(jccPath, 'sources')] + for import_ in imports: + includes.append(os.path.join(import_.__dir__, 'include')) + + sources = ['JObject.cpp', 'JArray.cpp', 'functions.cpp', 'types.cpp'] + if not shared: + sources.append('jcc.cpp') + sources.append('JCCEnv.cpp') + for source in sources: + shutil.copy2(os.path.join(jccPath, 'sources', source), + os.path.join(output, extname)) + + if shared: + def copytree(src, dst): + _dst = os.path.join(modulePath, dst) + if not os.path.exists(_dst): + os.mkdir(_dst) + for name in os.listdir(src): + if name.startswith('.'): + continue + _src = os.path.join(src, name) + if os.path.islink(_src): + continue + _dst = os.path.join(dst, name) + if os.path.isdir(_src): + copytree(_src, _dst) + elif name.endswith('.h'): + shutil.copy2(_src, os.path.join(modulePath, _dst)) + package_data.append(_dst) + copytree(os.path.join(output, extname), 'include') + + sources = [] + for path, dirs, names in os.walk(os.path.join(output, extname)): + for name in names: + if name.endswith('.cpp'): + sources.append(os.path.join(path, name)) + + script_args = ['build_ext'] + + includes[0:0] = INCLUDES + compile_args = CFLAGS + link_args = LFLAGS + + defines=[('PYTHON', None), + ('JCC_VER', '"%s"' %(JCC_VER))] + if shared: + defines.append(('_jcc_shared', None)) + if generics: + defines.append(('_java_generics', None)) + + if compiler: + script_args.append('--compiler=%s' %(compiler)) + + if debug: + script_args.append('--debug') + compile_args += DEBUG_CFLAGS + elif sys.platform == 'win32': + pass + elif sys.platform == 'sunos5': + link_args.append('-Wl,-s') + else: + link_args.append('-Wl,-S') + + if install: + script_args.append('install') + if prefix: + script_args.append('--prefix=%s' % prefix) + if root: + script_args.append('--root=%s' % root) + if install_dir: + script_args.append('--install-lib=%s' % install_dir) + if home_dir: + script_args.append('--home=%s' % home_dir) + + if dist: + if wininst: + script_args.append('bdist_wininst') + elif with_setuptools: + script_args.append('bdist_egg') + else: + script_args.append('bdist') + + args = { + 'extra_compile_args': compile_args, + 'extra_link_args': link_args, + 'include_dirs': includes, + 'sources': sources, + 'define_macros': defines + } + + if shared: + shlibdir = os.path.dirname(os.path.dirname(_jcc.__file__)) + if sys.platform == 'darwin': # distutils no good with -R + machine = platform.machine() + if machine.startswith('iPod') or machine.startswith('iPhone'): + args['extra_link_args'] += ['-L' + shlibdir] + else: + args['extra_link_args'] += ['-Wl,-rpath', shlibdir] + args['library_dirs'] = [shlibdir] + args['libraries'] = ['jcc'] + elif sys.platform == 'linux2': # distutils no good with -R + args['extra_link_args'] += ['-Wl,-rpath', shlibdir] + args['library_dirs'] = [shlibdir] + args['libraries'] = ['jcc'] + args['extra_link_args'] += [ + getattr(import_, "_%s" %(import_.__name__)).__file__ + for import_ in imports + ] + elif sys.platform == 'win32': + _d = debug and '_d' or '' + libdir = os.path.join(modulePath, 'lib') + if not os.path.exists(libdir): + os.mkdir(libdir) + extlib = os.path.join('lib', "%s%s.lib" %(extname, _d)) + package_data.append(extlib) + args['extra_link_args'] += [ + os.path.join(shlibdir, 'jcc', 'jcc%s.lib' %(_d)), + ' '.join(IMPLIB_LFLAGS) %(os.path.join(modulePath, extlib)) + ] + args['libraries'] = [ + os.path.join(import_.__dir__, 'lib', + '_%s%s' %(import_.__name__, _d)) + for import_ in imports + ] + args['define_macros'] += [ + ("_dll_%s" %(import_.__name__), '__declspec(dllimport)') + for import_ in imports + ] + [("_dll_%s" %(moduleName), '__declspec(dllexport)')] + else: + raise NotImplementedError, "shared mode on %s" %(sys.platform) + + if arch and sys.platform == 'darwin': + from distutils import sysconfig + + config_vars = sysconfig.get_config_vars() + cflags = config_vars['CFLAGS'].split(' ') + count = len(cflags) + i = 0 + while i < count - 1: + if cflags[i] == '-arch' and cflags[i + 1] not in arch: + del cflags[i:i+2] + count -= 2 + else: + i += 1 + config_vars['CFLAGS'] = ' '.join(cflags) + + extensions = [Extension('.'.join([moduleName, extname]), **args)] + + args = { + 'name': moduleName, + 'packages': packages, + 'package_dir': {moduleName: modulePath}, + 'package_data': {moduleName: package_data}, + 'version': version, + 'ext_modules': extensions, + 'script_args': script_args + } + if with_setuptools: + args['zip_safe'] = False + + setup(**args) diff --git a/jcc/jcc/sources/JArray.cpp b/jcc/jcc/sources/JArray.cpp new file mode 100644 index 0000000..dd73bd1 --- /dev/null +++ b/jcc/jcc/sources/JArray.cpp @@ -0,0 +1,1209 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef PYTHON + +#include +#include +#include "structmember.h" + +#include "JArray.h" +#include "functions.h" +#include "java/lang/Class.h" + +using namespace java::lang; + + +template class _t_JArray : public t_JArray { +public: + static PyObject *format; +}; + +template +static PyObject *get(U *self, int n) +{ + return self->array.get(n); +} + +template +static PyObject *toSequence(U *self) +{ + return self->array.toSequence(); +} + +template +static PyObject *toSequence(U *self, int lo, int hi) +{ + return self->array.toSequence(lo, hi); +} + +template class _t_iterator { +public: + PyObject_HEAD + U *obj; + Py_ssize_t position; + + static void dealloc(_t_iterator *self) + { + Py_XDECREF(self->obj); + self->ob_type->tp_free((PyObject *) self); + } + + static PyObject *iternext(_t_iterator *self) + { + if (self->position < (Py_ssize_t) self->obj->array.length) + return get(self->obj, self->position++); + + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + static PyTypeObject *JArrayIterator; +}; + +template +static int init(U *self, PyObject *args, PyObject *kwds) +{ + PyObject *obj; + + if (!PyArg_ParseTuple(args, "O", &obj)) + return -1; + + if (PySequence_Check(obj)) + { + self->array = JArray(obj); + if (PyErr_Occurred()) + return -1; + } + else if (PyGen_Check(obj)) + { + PyObject *tuple = + PyObject_CallFunctionObjArgs((PyObject *) &PyTuple_Type, obj, NULL); + + if (!tuple) + return -1; + + self->array = JArray(tuple); + Py_DECREF(tuple); + if (PyErr_Occurred()) + return -1; + } + else if (PyInt_Check(obj)) + { + int n = PyInt_AsLong(obj); + + if (n < 0) + { + PyErr_SetObject(PyExc_ValueError, obj); + return -1; + } + + self->array = JArray(n); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + return 0; +} + +template +static void dealloc(U *self) +{ + self->array = JArray((jobject) NULL); + self->ob_type->tp_free((PyObject *) self); +} + +template +static PyObject *_format(U *self, PyObject *(*fn)(PyObject *)) +{ + if (self->array.this$) + { + PyObject *list = toSequence(self); + + if (list) + { + PyObject *result = (*fn)(list); + + Py_DECREF(list); + if (result) + { + PyObject *args = PyTuple_New(1); + + PyTuple_SET_ITEM(args, 0, result); + result = PyString_Format(U::format, args); + Py_DECREF(args); + + return result; + } + } + + return NULL; + } + + return PyString_FromString(""); +} + +template +static PyObject *repr(U *self) +{ + return _format(self, (PyObject *(*)(PyObject *)) PyObject_Repr); +} + +template +static PyObject *str(U *self) +{ + return _format(self, (PyObject *(*)(PyObject *)) PyObject_Str); +} + +template +static int _compare(U *self, PyObject *value, int i0, int i1, int op, int *cmp) +{ + PyObject *v0 = get(self, i0); + PyObject *v1 = PySequence_Fast_GET_ITEM(value, i1); /* borrowed */ + + if (!v0) + return -1; + + if (!v1) + { + Py_DECREF(v0); + return -1; + } + + *cmp = PyObject_RichCompareBool(v0, v1, op); + Py_DECREF(v0); + + if (*cmp < 0) + return -1; + + return 0; +} + +template +static PyObject *richcompare(U *self, PyObject *value, int op) +{ + PyObject *result = NULL; + int s0, s1; + + if (!PySequence_Check(value)) + { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + + value = PySequence_Fast(value, "not a sequence"); + if (!value) + return NULL; + + s0 = PySequence_Fast_GET_SIZE(value); + s1 = self->array.length; + + if (s1 < 0) + { + Py_DECREF(value); + return NULL; + } + + if (s0 != s1) + { + switch (op) { + case Py_EQ: result = Py_False; break; + case Py_NE: result = Py_True; break; + } + } + + if (!result) + { + int i0, i1, cmp = 1; + + for (i0 = 0, i1 = 0; i0 < s0 && i1 < s1 && cmp; i0++, i1++) { + if (_compare(self, value, i0, i1, Py_EQ, &cmp) < 0) + { + Py_DECREF(value); + return NULL; + } + } + + if (cmp) + { + switch (op) { + case Py_LT: cmp = s0 < s1; break; + case Py_LE: cmp = s0 <= s1; break; + case Py_EQ: cmp = s0 == s1; break; + case Py_NE: cmp = s0 != s1; break; + case Py_GT: cmp = s0 > s1; break; + case Py_GE: cmp = s0 >= s1; break; + default: cmp = 0; + } + + result = cmp ? Py_True : Py_False; + } + else if (op == Py_EQ) + result = Py_False; + else if (op == Py_NE) + result = Py_True; + else if (_compare(self, value, i0, i1, op, &cmp) < 0) + { + Py_DECREF(value); + return NULL; + } + else + result = cmp ? Py_True : Py_False; + } + Py_DECREF(value); + + Py_INCREF(result); + return result; +} + +template +static PyObject *iter(U *self) +{ + _t_iterator *it = + PyObject_New(_t_iterator, _t_iterator::JArrayIterator); + + if (it) + { + it->position = 0; + it->obj = self; Py_INCREF((PyObject *) self); + } + + return (PyObject *) it; +} + +template +static Py_ssize_t seq_length(U *self) +{ + if (self->array.this$) + return self->array.length; + + return 0; +} + +template +static PyObject *seq_get(U *self, Py_ssize_t n) +{ + return get(self, n); +} + +template +static int seq_contains(U *self, PyObject *value) +{ + return 0; +} + +template +static PyObject *seq_concat(U *self, PyObject *arg) +{ + PyObject *list = toSequence(self); + + if (list != NULL && + PyList_Type.tp_as_sequence->sq_inplace_concat(list, arg) < 0) + { + Py_DECREF(list); + return NULL; + } + + return list; +} + +template +static PyObject *seq_repeat(U *self, Py_ssize_t n) +{ + PyObject *list = toSequence(self); + + if (list != NULL && + PyList_Type.tp_as_sequence->sq_inplace_repeat(list, n) < 0) + { + Py_DECREF(list); + return NULL; + } + + return list; +} + +template +static PyObject *seq_getslice(U *self, Py_ssize_t lo, Py_ssize_t hi) +{ + return toSequence(self, lo, hi); +} + +template +static int seq_set(U *self, Py_ssize_t n, PyObject *value) +{ + return self->array.set(n, value); +} + +template +static int seq_setslice(U *self, Py_ssize_t lo, Py_ssize_t hi, PyObject *values) +{ + int length = self->array.length; + + if (values == NULL) + { + PyErr_SetString(PyExc_ValueError, "array size cannot change"); + return -1; + } + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *sequence = PySequence_Fast(values, "not a sequence"); + if (!sequence) + return -1; + + int size = PySequence_Fast_GET_SIZE(sequence); + if (size < 0) + goto error; + + if (size != hi - lo) + { + PyErr_SetString(PyExc_ValueError, "array size cannot change"); + goto error; + } + + for (int i = lo; i < hi; i++) { + PyObject *value = PySequence_Fast_GET_ITEM(sequence, i - lo); + + if (value == NULL) + goto error; + + if (self->array.set(i, value) < 0) + goto error; + } + + Py_DECREF(sequence); + return 0; + + error: + Py_DECREF(sequence); + return -1; +} + +template +static jclass initializeClass(void) +{ + return env->get_vm_env()->GetObjectClass(JArray(0).this$); +} + +template +static PyObject *cast_(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *arg, *clsObj; + + if (!PyArg_ParseTuple(args, "O", &arg)) + return NULL; + + if (!PyObject_TypeCheck(arg, &PY_TYPE(Object))) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + Class argCls = ((t_Object *) arg)->object.getClass(); + + if (!argCls.isArray()) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + clsObj = PyObject_GetAttrString((PyObject *) type, "class_"); + if (!clsObj) + return NULL; + + Class arrayCls = ((t_Class *) clsObj)->object; + + if (!arrayCls.isAssignableFrom(argCls)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + return JArray(((t_JObject *) arg)->object.this$).wrap(); +} + +template +static PyObject *instance_(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *arg, *clsObj; + + if (!PyArg_ParseTuple(args, "O", &arg)) + return NULL; + + if (!PyObject_TypeCheck(arg, &PY_TYPE(Object))) + Py_RETURN_FALSE; + + Class argCls = ((t_Object *) arg)->object.getClass(); + + if (!argCls.isArray()) + Py_RETURN_FALSE; + + clsObj = PyObject_GetAttrString((PyObject *) type, "class_"); + if (!clsObj) + return NULL; + + Class arrayCls = ((t_Class *) clsObj)->object; + + if (!arrayCls.isAssignableFrom(argCls)) + Py_RETURN_FALSE; + + Py_RETURN_TRUE; +} + +template +static PyObject *assignable_(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + return instance_(type, args, kwds); +} + +template< typename T, typename U = _t_JArray > class jarray_type { +public: + PySequenceMethods seq_methods; + PyTypeObject type_object; + + class iterator_type { + public: + PyTypeObject type_object; + + void install(char *name, PyObject *module) + { + type_object.tp_name = name; + + if (PyType_Ready(&type_object) == 0) + { + Py_INCREF((PyObject *) &type_object); + PyModule_AddObject(module, name, (PyObject *) &type_object); + } + + _t_iterator::JArrayIterator = &type_object; + } + + iterator_type() + { + memset(&type_object, 0, sizeof(type_object)); + + type_object.ob_refcnt = 1; + type_object.ob_type = NULL; + type_object.tp_basicsize = sizeof(_t_iterator); + type_object.tp_dealloc = (destructor) _t_iterator::dealloc; + type_object.tp_flags = Py_TPFLAGS_DEFAULT; + type_object.tp_doc = "JArrayIterator wrapper type"; + type_object.tp_iter = (getiterfunc) PyObject_SelfIter; + type_object.tp_iternext = (iternextfunc) _t_iterator::iternext; + } + }; + + iterator_type iterator_type_object; + + void install(char *name, char *type_name, char *iterator_name, + PyObject *module) + { + type_object.tp_name = name; + + if (PyType_Ready(&type_object) == 0) + { + Py_INCREF((PyObject *) &type_object); + PyDict_SetItemString(type_object.tp_dict, "class_", + make_descriptor(initializeClass)); + + PyModule_AddObject(module, name, (PyObject *) &type_object); + } + + U::format = PyString_FromFormat("JArray<%s>%%s", type_name); + iterator_type_object.install(iterator_name, module); + } + + static PyObject *_new(PyTypeObject *type, PyObject *args, PyObject *kwds) + { + U *self = (U *) type->tp_alloc(type, 0); + + if (self) + self->array = JArray((jobject) NULL); + + return (PyObject *) self; + } + + jarray_type() + { + memset(&seq_methods, 0, sizeof(seq_methods)); + memset(&type_object, 0, sizeof(type_object)); + + static PyMethodDef methods[] = { + { "cast_", + (PyCFunction) (PyObject *(*)(PyTypeObject *, + PyObject *, PyObject *)) + cast_, + METH_VARARGS | METH_CLASS, NULL }, + { "instance_", + (PyCFunction) (PyObject *(*)(PyTypeObject *, + PyObject *, PyObject *)) + instance_, + METH_VARARGS | METH_CLASS, NULL }, + { "assignable_", + (PyCFunction) (PyObject *(*)(PyTypeObject *, + PyObject *, PyObject *)) + assignable_, + METH_VARARGS | METH_CLASS, NULL }, + { NULL, NULL, 0, NULL } + }; + + seq_methods.sq_length = + (lenfunc) (Py_ssize_t (*)(U *)) seq_length; + seq_methods.sq_concat = + (binaryfunc) (PyObject *(*)(U *, PyObject *)) seq_concat; + seq_methods.sq_repeat = + (ssizeargfunc) (PyObject *(*)(U *, Py_ssize_t)) seq_repeat; + seq_methods.sq_item = + (ssizeargfunc) (PyObject *(*)(U *, Py_ssize_t)) seq_get; + seq_methods.sq_slice = + (ssizessizeargfunc) (PyObject *(*)(U *, Py_ssize_t, Py_ssize_t)) + seq_getslice; + seq_methods.sq_ass_item = + (ssizeobjargproc) (int (*)(U *, Py_ssize_t, PyObject *)) seq_set; + seq_methods.sq_ass_slice = + (ssizessizeobjargproc) (int (*)(U *, Py_ssize_t, Py_ssize_t, + PyObject *)) seq_setslice; + seq_methods.sq_contains = + (objobjproc) (int (*)(U *, PyObject *)) seq_contains; + seq_methods.sq_inplace_concat = NULL; + seq_methods.sq_inplace_repeat = NULL; + + type_object.ob_refcnt = 1; + type_object.tp_basicsize = sizeof(U); + type_object.tp_dealloc = (destructor) (void (*)(U *)) dealloc; + type_object.tp_repr = (reprfunc) (PyObject *(*)(U *)) repr; + type_object.tp_as_sequence = &seq_methods; + type_object.tp_str = (reprfunc) (PyObject *(*)(U *)) str; + type_object.tp_flags = Py_TPFLAGS_DEFAULT; + type_object.tp_doc = "JArray wrapper type"; + type_object.tp_richcompare = + (richcmpfunc) (PyObject *(*)(U *, PyObject *, int)) richcompare; + type_object.tp_iter = (getiterfunc) (PyObject *(*)(U *)) iter; + type_object.tp_methods = methods; + type_object.tp_base = &PY_TYPE(Object); + type_object.tp_init = + (initproc) (int (*)(U *, PyObject *, PyObject *)) init; + type_object.tp_new = (newfunc) _new; + } +}; + +template class _t_jobjectarray : public _t_JArray { +public: + PyObject *(*wrapfn)(const T&); +}; + +template<> PyObject *get(_t_jobjectarray *self, int n) +{ + return self->array.get(n, self->wrapfn); +} + +template<> PyObject *toSequence(_t_jobjectarray *self) +{ + return self->array.toSequence(self->wrapfn); +} + +template<> PyObject *toSequence(_t_jobjectarray *self, int lo, int hi) +{ + return self->array.toSequence(lo, hi, self->wrapfn); +} + +template<> int init< jobject,_t_jobjectarray >(_t_jobjectarray *self, PyObject *args, PyObject *kwds) +{ + PyObject *obj, *clsObj = NULL; + PyObject *(*wrapfn)(const jobject &) = NULL; + jclass cls; + + if (!PyArg_ParseTuple(args, "O|O", &obj, &clsObj)) + return -1; + + if (clsObj == NULL) + cls = env->findClass("java/lang/Object"); + else if (PyObject_TypeCheck(clsObj, &PY_TYPE(Class))) + cls = (jclass) ((t_Class *) clsObj)->object.this$; + else if (PyType_Check(clsObj)) + { + if (PyType_IsSubtype((PyTypeObject *) clsObj, &PY_TYPE(JObject))) + { + PyObject *cobj = PyObject_GetAttrString(clsObj, "wrapfn_"); + + if (cobj == NULL) + PyErr_Clear(); + else + { + wrapfn = (PyObject *(*)(const jobject &)) + PyCObject_AsVoidPtr(cobj); + Py_DECREF(cobj); + } + + clsObj = PyObject_GetAttrString(clsObj, "class_"); + if (clsObj == NULL) + return -1; + + cls = (jclass) ((t_Class *) clsObj)->object.this$; + Py_DECREF(clsObj); + } + else + { + PyErr_SetObject(PyExc_ValueError, clsObj); + return -1; + } + } + else + { + PyErr_SetObject(PyExc_TypeError, clsObj); + return -1; + } + + if (PySequence_Check(obj)) + { + self->array = JArray(cls, obj); + if (PyErr_Occurred()) + return -1; + } + else if (PyGen_Check(obj)) + { + PyObject *tuple = + PyObject_CallFunctionObjArgs((PyObject *) &PyTuple_Type, obj, NULL); + + if (!tuple) + return -1; + + self->array = JArray(cls, tuple); + Py_DECREF(tuple); + if (PyErr_Occurred()) + return -1; + } + else if (PyInt_Check(obj)) + { + int n = PyInt_AsLong(obj); + + if (n < 0) + { + PyErr_SetObject(PyExc_ValueError, obj); + return -1; + } + + self->array = JArray(cls, n); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + self->wrapfn = wrapfn; + + return 0; +} + +template<> jclass initializeClass(void) +{ + jclass cls = env->findClass("java/lang/Object"); + return env->get_vm_env()->GetObjectClass(JArray(cls, 0).this$); +} + +template<> PyObject *cast_(PyTypeObject *type, + PyObject *args, PyObject *kwds) +{ + PyObject *arg, *clsArg = NULL; + PyObject *(*wrapfn)(const jobject&) = NULL; + jclass elementCls; + + if (!PyArg_ParseTuple(args, "O|O", &arg, &clsArg)) + return NULL; + + if (!PyObject_TypeCheck(arg, &PY_TYPE(Object))) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + Class argCls = ((t_Object *) arg)->object.getClass(); + + if (!argCls.isArray()) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + if (clsArg != NULL) + { + if (!PyType_Check(clsArg)) + { + PyErr_SetObject(PyExc_TypeError, clsArg); + return NULL; + } + else if (!PyType_IsSubtype((PyTypeObject *) clsArg, &PY_TYPE(JObject))) + { + PyErr_SetObject(PyExc_ValueError, clsArg); + return NULL; + } + + PyObject *cobj = PyObject_GetAttrString(clsArg, "wrapfn_"); + + if (cobj == NULL) + PyErr_Clear(); + else + { + wrapfn = (PyObject *(*)(const jobject &)) PyCObject_AsVoidPtr(cobj); + Py_DECREF(cobj); + } + + clsArg = PyObject_GetAttrString(clsArg, "class_"); + if (clsArg == NULL) + return NULL; + + elementCls = (jclass) ((t_Class *) clsArg)->object.this$; + Py_DECREF(clsArg); + } + else + elementCls = env->findClass("java/lang/Object"); + + JNIEnv *vm_env = env->get_vm_env(); + jobjectArray array = vm_env->NewObjectArray(0, elementCls, NULL); + Class arrayCls(vm_env->GetObjectClass((jobject) array)); + + if (!arrayCls.isAssignableFrom(argCls)) + { + PyErr_SetObject(PyExc_TypeError, arg); + return NULL; + } + + return JArray(((t_JObject *) arg)->object.this$).wrap(wrapfn); +} + +template<> PyObject *instance_(PyTypeObject *type, + PyObject *args, PyObject *kwds) +{ + PyObject *arg, *clsArg = NULL; + jclass elementCls; + + if (!PyArg_ParseTuple(args, "O|O", &arg, &clsArg)) + return NULL; + + if (!PyObject_TypeCheck(arg, &PY_TYPE(Object))) + Py_RETURN_FALSE; + + Class argCls = ((t_Object *) arg)->object.getClass(); + + if (!argCls.isArray()) + Py_RETURN_FALSE; + + if (clsArg != NULL) + { + if (!PyType_Check(clsArg)) + { + PyErr_SetObject(PyExc_TypeError, clsArg); + return NULL; + } + else if (!PyType_IsSubtype((PyTypeObject *) clsArg, &PY_TYPE(JObject))) + { + PyErr_SetObject(PyExc_ValueError, clsArg); + return NULL; + } + + clsArg = PyObject_GetAttrString(clsArg, "class_"); + if (clsArg == NULL) + return NULL; + + elementCls = (jclass) ((t_Class *) clsArg)->object.this$; + Py_DECREF(clsArg); + } + else + elementCls = env->findClass("java/lang/Object"); + + JNIEnv *vm_env = env->get_vm_env(); + jobjectArray array = vm_env->NewObjectArray(0, elementCls, NULL); + Class arrayCls(vm_env->GetObjectClass((jobject) array)); + + if (!arrayCls.isAssignableFrom(argCls)) + Py_RETURN_FALSE; + + Py_RETURN_TRUE; +} + +template<> PyObject *assignable_(PyTypeObject *type, + PyObject *args, PyObject *kwds) +{ + PyObject *arg, *clsArg = NULL; + jclass elementCls; + + if (!PyArg_ParseTuple(args, "O|O", &arg, &clsArg)) + return NULL; + + if (!PyObject_TypeCheck(arg, &PY_TYPE(Object))) + Py_RETURN_FALSE; + + Class argCls = ((t_Object *) arg)->object.getClass(); + + if (!argCls.isArray()) + Py_RETURN_FALSE; + + if (clsArg != NULL) + { + if (!PyType_Check(clsArg)) + { + PyErr_SetObject(PyExc_TypeError, clsArg); + return NULL; + } + else if (!PyType_IsSubtype((PyTypeObject *) clsArg, &PY_TYPE(JObject))) + { + PyErr_SetObject(PyExc_ValueError, clsArg); + return NULL; + } + + clsArg = PyObject_GetAttrString(clsArg, "class_"); + if (clsArg == NULL) + return NULL; + + elementCls = (jclass) ((t_Class *) clsArg)->object.this$; + Py_DECREF(clsArg); + } + else + elementCls = env->findClass("java/lang/Object"); + + JNIEnv *vm_env = env->get_vm_env(); + jobjectArray array = vm_env->NewObjectArray(0, elementCls, NULL); + Class arrayCls(vm_env->GetObjectClass((jobject) array)); + + if (!argCls.isAssignableFrom(arrayCls)) + Py_RETURN_FALSE; + + Py_RETURN_TRUE; +} + + +template PyTypeObject *_t_iterator::JArrayIterator; +template PyObject *_t_JArray::format; + +static jarray_type< jobject, _t_jobjectarray > jarray_jobject; + +static jarray_type jarray_jstring; +static jarray_type jarray_jboolean; +static jarray_type jarray_jbyte; +static jarray_type jarray_jchar; +static jarray_type jarray_jdouble; +static jarray_type jarray_jfloat; +static jarray_type jarray_jint; +static jarray_type jarray_jlong; +static jarray_type jarray_jshort; + + +PyObject *JArray::wrap(PyObject *(*wrapfn)(const jobject&)) +{ + if (this$ != NULL) + { + _t_jobjectarray *obj = + PyObject_New(_t_jobjectarray, &jarray_jobject.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + obj->wrapfn = wrapfn; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jstring.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jboolean.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jbyte.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jchar.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jdouble.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jfloat.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jint.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jlong.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray::wrap() const +{ + if (this$ != NULL) + { + _t_JArray *obj = + PyObject_New(_t_JArray, &jarray_jshort.type_object); + + memset(&(obj->array), 0, sizeof(JArray)); + obj->array = *this; + + return (PyObject *) obj; + } + + Py_RETURN_NONE; +} + +PyObject *JArray_Type(PyObject *self, PyObject *arg) +{ + PyObject *type_name = NULL, *type; + char const *name = NULL; + + if (PyType_Check(arg)) + { + type_name = PyObject_GetAttrString(arg, "__name__"); + if (!type_name) + return NULL; + } + else if (PyString_Check(arg)) + { + type_name = arg; + Py_INCREF(type_name); + } + else if (PyFloat_Check(arg)) + { + type_name = NULL; + name = "double"; + } + else + { + PyObject *arg_type = (PyObject *) arg->ob_type; + + type_name = PyObject_GetAttrString(arg_type, "__name__"); + if (!type_name) + return NULL; + } + + if (type_name != NULL) + { + name = PyString_AsString(type_name); + Py_DECREF(type_name); + + if (!name) + return NULL; + } + + if (!strcmp(name, "object")) + type = (PyObject *) &jarray_jobject.type_object; + else if (!strcmp(name, "string")) + type = (PyObject *) &jarray_jstring.type_object; + else if (!strcmp(name, "bool")) + type = (PyObject *) &jarray_jboolean.type_object; + else if (!strcmp(name, "byte")) + type = (PyObject *) &jarray_jbyte.type_object; + else if (!strcmp(name, "char")) + type = (PyObject *) &jarray_jchar.type_object; + else if (!strcmp(name, "double")) + type = (PyObject *) &jarray_jdouble.type_object; + else if (!strcmp(name, "float")) + type = (PyObject *) &jarray_jfloat.type_object; + else if (!strcmp(name, "int")) + type = (PyObject *) &jarray_jint.type_object; + else if (!strcmp(name, "long")) + type = (PyObject *) &jarray_jlong.type_object; + else if (!strcmp(name, "short")) + type = (PyObject *) &jarray_jshort.type_object; + else + { + PyErr_SetObject(PyExc_ValueError, arg); + return NULL; + } + + Py_INCREF(type); + return type; +} + +static PyObject *t_JArray_jbyte__get_string_(t_JArray *self, void *data) +{ + return self->array.to_string_(); +} + +static PyGetSetDef t_JArray_jbyte__fields[] = { + { "string_", (getter) t_JArray_jbyte__get_string_, NULL, "", NULL }, + { NULL, NULL, NULL, NULL, NULL } +}; + + +PyTypeObject *PY_TYPE(JArrayObject); +PyTypeObject *PY_TYPE(JArrayString); +PyTypeObject *PY_TYPE(JArrayBool); +PyTypeObject *PY_TYPE(JArrayByte); +PyTypeObject *PY_TYPE(JArrayChar); +PyTypeObject *PY_TYPE(JArrayDouble); +PyTypeObject *PY_TYPE(JArrayFloat); +PyTypeObject *PY_TYPE(JArrayInt); +PyTypeObject *PY_TYPE(JArrayLong); +PyTypeObject *PY_TYPE(JArrayShort); + + +void _install_jarray(PyObject *module) +{ + jarray_jobject.install("JArray_object", "object", + "__JArray_object_iterator", module); + PY_TYPE(JArrayObject) = &jarray_jobject.type_object; + + jarray_jstring.install("JArray_string", "string", + "__JArray_string_iterator", module); + PY_TYPE(JArrayString) = &jarray_jstring.type_object; + + jarray_jboolean.install("JArray_bool", "bool", + "__JArray_bool_iterator", module); + PY_TYPE(JArrayBool) = &jarray_jboolean.type_object; + + jarray_jbyte.type_object.tp_getset = t_JArray_jbyte__fields; + jarray_jbyte.install("JArray_byte", "byte", + "__JArray_byte_iterator", module); + PY_TYPE(JArrayByte) = &jarray_jbyte.type_object; + + jarray_jchar.install("JArray_char", "char", + "__JArray_char_iterator", module); + PY_TYPE(JArrayChar) = &jarray_jchar.type_object; + + jarray_jdouble.install("JArray_double", "double", + "__JArray_double_iterator", module); + PY_TYPE(JArrayDouble) = &jarray_jdouble.type_object; + + jarray_jfloat.install("JArray_float", "float", + "__JArray_float_iterator", module); + PY_TYPE(JArrayFloat) = &jarray_jfloat.type_object; + + jarray_jint.install("JArray_int", "int", + "__JArray_int_iterator", module); + PY_TYPE(JArrayInt) = &jarray_jint.type_object; + + jarray_jlong.install("JArray_long", "long", + "__JArray_long_iterator", module); + PY_TYPE(JArrayLong) = &jarray_jlong.type_object; + + jarray_jshort.install("JArray_short", "short", + "__JArray_short_iterator", module); + PY_TYPE(JArrayShort) = &jarray_jshort.type_object; +} + +#endif /* PYTHON */ diff --git a/jcc/jcc/sources/JArray.h b/jcc/jcc/sources/JArray.h new file mode 100644 index 0000000..bc8b2c5 --- /dev/null +++ b/jcc/jcc/sources/JArray.h @@ -0,0 +1,1598 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _JArray_H +#define _JArray_H + +#ifdef PYTHON +#include +#include "macros.h" + +extern jobjectArray fromPySequence(jclass cls, PyObject *sequence); +extern PyObject *PyErr_SetJavaError(); + +extern PyTypeObject *PY_TYPE(JArrayObject); +extern PyTypeObject *PY_TYPE(JArrayString); +extern PyTypeObject *PY_TYPE(JArrayBool); +extern PyTypeObject *PY_TYPE(JArrayByte); +extern PyTypeObject *PY_TYPE(JArrayChar); +extern PyTypeObject *PY_TYPE(JArrayDouble); +extern PyTypeObject *PY_TYPE(JArrayFloat); +extern PyTypeObject *PY_TYPE(JArrayInt); +extern PyTypeObject *PY_TYPE(JArrayLong); +extern PyTypeObject *PY_TYPE(JArrayShort); + +#endif + +#include "JCCEnv.h" +#include "java/lang/Object.h" + + +template class JArray : public java::lang::Object { +public: + int length; + + explicit JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jobjectArray) this$) : 0; + } + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(fromPySequence(T::initializeClass(), sequence)) { + length = this$ ? env->getArrayLength((jobjectArray) this$) : 0; + } + + JArray(jclass cls, PyObject *sequence) : java::lang::Object(fromPySequence(cls, sequence)) { + length = this$ ? env->getArrayLength((jobjectArray) this$) : 0; + } + + PyObject *toSequence(PyObject *(*wrapfn)(const T&)) + { + if (this$ == NULL) + Py_RETURN_NONE; + + PyObject *list = PyList_New(length); + + for (int i = 0; i < length; i++) + PyList_SET_ITEM(list, i, (*wrapfn)((*this)[i])); + + return list; + } + + PyObject *get(int n, PyObject *(*wrapfn)(const T&)) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + return (*wrapfn)((*this)[n]); + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } +#endif + + T operator[](int n) { + return T(env->getObjectArrayElement((jobjectArray) this$, n)); + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + JArray(jclass cls, int n) : java::lang::Object(env->get_vm_env()->NewObjectArray(n, cls, NULL)) { + length = env->getArrayLength((jobjectArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jobjectArray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + +#ifdef PYTHON + JArray(jclass cls, PyObject *sequence) : java::lang::Object(fromPySequence(cls, sequence)) { + length = this$ ? env->getArrayLength((jobjectArray) this$) : 0; + } + + PyObject *toSequence(PyObject *(*wrapfn)(const jobject&)) + { + return toSequence(0, length, wrapfn); + } + + PyObject *toSequence(int lo, int hi, PyObject *(*wrapfn)(const jobject&)) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *list = PyList_New(hi - lo); + + if (!wrapfn) + wrapfn = java::lang::t_Object::wrap_jobject; + + for (int i = lo; i < hi; i++) { + jobject jobj = env->getObjectArrayElement((jobjectArray) this$, i); + PyObject *obj = (*wrapfn)(jobj); + + PyList_SET_ITEM(list, i - lo, obj); + } + + return list; + } + + PyObject *get(int n, PyObject *(*wrapfn)(const jobject&)) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!wrapfn) + wrapfn = java::lang::t_Object::wrap_jobject; + + jobject jobj = + env->getObjectArrayElement((jobjectArray) this$, n); + + return (*wrapfn)(jobj); + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!PyObject_TypeCheck(obj, &PY_TYPE(JObject))) + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + jobject jobj = ((t_JObject *) obj)->object.this$; + + try { + env->setObjectArrayElement((jobjectArray) this$, n, jobj); + } catch (int e) { + switch (e) { + case _EXC_JAVA: + PyErr_SetJavaError(); + return -1; + default: + throw; + } + } + + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap(PyObject *(*wrapfn)(const jobject&)); +#endif + + jobject operator[](int n) { + return (jobject) env->getObjectArrayElement((jobjectArray) this$, n); + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jobjectArray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewObjectArray(n, env->findClass("java/lang/String"), NULL)) { + length = env->getArrayLength((jobjectArray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewObjectArray(PySequence_Length(sequence), env->findClass("java/lang/String"), NULL)) { + length = env->getArrayLength((jobjectArray) this$); + + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (obj == NULL) + break; + + jstring str = env->fromPyString(obj); + + Py_DECREF(obj); + if (PyErr_Occurred()) + break; + + env->setObjectArrayElement((jobjectArray) this$, i, str); + env->get_vm_env()->DeleteLocalRef(str); + } + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *list = PyList_New(hi - lo); + + for (int i = lo; i < hi; i++) { + jstring str = (jstring) + env->getObjectArrayElement((jobjectArray) this$, i); + PyObject *obj = env->fromJString(str, 1); + + PyList_SET_ITEM(list, i - lo, obj); + } + + return list; + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + jstring str = (jstring) + env->getObjectArrayElement((jobjectArray) this$, n); + PyObject *obj = env->fromJString(str, 1); + + return obj; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + jstring str = env->fromPyString(obj); + + if (PyErr_Occurred()) + return -1; + + env->setObjectArrayElement((jobjectArray) this$, n, str); + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jstring operator[](int n) { + return (jstring) env->getObjectArrayElement((jobjectArray) this$, n); + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + class arrayElements { + private: + jboolean isCopy; + jbooleanArray array; + jboolean *elts; + public: + arrayElements(jbooleanArray array) { + this->array = array; + elts = env->get_vm_env()->GetBooleanArrayElements(array, &isCopy); + } + virtual ~arrayElements() { + env->get_vm_env()->ReleaseBooleanArrayElements(array, elts, 0); + } + operator jboolean *() { + return elts; + } + }; + + arrayElements elements() { + return arrayElements((jbooleanArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jarray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewBooleanArray(n)) { + length = env->getArrayLength((jarray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewBooleanArray(PySequence_Length(sequence))) { + length = env->getArrayLength((jarray) this$); + arrayElements elts = elements(); + jboolean *buf = (jboolean *) elts; + + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (!obj) + break; + + if (obj == Py_True || obj == Py_False) + { + buf[i] = (jboolean) (obj == Py_True); + Py_DECREF(obj); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + break; + } + } + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *list = PyList_New(hi - lo); + arrayElements elts = elements(); + jboolean *buf = (jboolean *) elts; + + for (int i = lo; i < hi; i++) { + jboolean value = buf[i]; + PyObject *obj = value ? Py_True : Py_False; + + Py_INCREF(obj); + PyList_SET_ITEM(list, i - lo, obj); + } + + return list; + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + Py_RETURN_BOOL(elements()[n]); + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + elements()[n] = (jboolean) PyObject_IsTrue(obj); + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jboolean operator[](int n) { + JNIEnv *vm_env = env->get_vm_env(); + jboolean isCopy = 0; + jboolean *elts = (jboolean *) + vm_env->GetPrimitiveArrayCritical((jarray) this$, &isCopy); + jboolean value = elts[n]; + + vm_env->ReleasePrimitiveArrayCritical((jarray) this$, elts, 0); + + return value; + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + class arrayElements { + private: + jboolean isCopy; + jbyteArray array; + jbyte *elts; + public: + arrayElements(jbyteArray array) { + this->array = array; + elts = env->get_vm_env()->GetByteArrayElements(array, &isCopy); + } + virtual ~arrayElements() { + env->get_vm_env()->ReleaseByteArrayElements(array, elts, 0); + } + operator jbyte *() { + return elts; + } + }; + + arrayElements elements() { + return arrayElements((jbyteArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jarray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewByteArray(n)) { + length = env->getArrayLength((jarray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewByteArray(PySequence_Length(sequence))) { + length = env->getArrayLength((jarray) this$); + arrayElements elts = elements(); + jbyte *buf = (jbyte *) elts; + + if (PyString_Check(sequence)) + memcpy(buf, PyString_AS_STRING(sequence), length); + else + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (!obj) + break; + + if (PyString_Check(obj) && (PyString_GET_SIZE(obj) == 1)) + { + buf[i] = (jbyte) PyString_AS_STRING(obj)[0]; + Py_DECREF(obj); + } + else if (PyInt_CheckExact(obj)) + { + buf[i] = (jbyte) PyInt_AS_LONG(obj); + Py_DECREF(obj); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + break; + } + } + } + + char getType() + { + return 'Z'; + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + arrayElements elts = elements(); + jbyte *buf = (jbyte *) elts; + PyObject *tuple = PyTuple_New(hi - lo); + + for (int i = 0; i < hi - lo; i++) + PyTuple_SET_ITEM(tuple, i, PyInt_FromLong(buf[lo + i])); + + return tuple; + } + + PyObject *to_string_() + { + if (this$ == NULL) + Py_RETURN_NONE; + + arrayElements elts = elements(); + jbyte *buf = (jbyte *) elts; + + return PyString_FromStringAndSize((char *) buf, length); + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + jbyte b = (*this)[n]; + return PyInt_FromLong(b); + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!PyInt_CheckExact(obj)) + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + elements()[n] = (jbyte) PyInt_AS_LONG(obj); + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jbyte operator[](int n) { + JNIEnv *vm_env = env->get_vm_env(); + jboolean isCopy = 0; + jbyte *elts = (jbyte *) + vm_env->GetPrimitiveArrayCritical((jarray) this$, &isCopy); + jbyte value = elts[n]; + + vm_env->ReleasePrimitiveArrayCritical((jarray) this$, elts, 0); + + return value; + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + class arrayElements { + private: + jboolean isCopy; + jcharArray array; + jchar *elts; + public: + arrayElements(jcharArray array) { + this->array = array; + elts = env->get_vm_env()->GetCharArrayElements(array, &isCopy); + } + virtual ~arrayElements() { + env->get_vm_env()->ReleaseCharArrayElements(array, elts, 0); + } + operator jchar *() { + return elts; + } + }; + + arrayElements elements() { + return arrayElements((jcharArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jarray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewCharArray(n)) { + length = env->getArrayLength((jarray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewCharArray(PySequence_Length(sequence))) { + length = env->getArrayLength((jarray) this$); + arrayElements elts = elements(); + jchar *buf = (jchar *) elts; + + if (PyUnicode_Check(sequence)) + { + if (sizeof(Py_UNICODE) == sizeof(jchar)) + memcpy(buf, PyUnicode_AS_UNICODE(sequence), + length * sizeof(jchar)); + else + { + Py_UNICODE *pchars = PyUnicode_AS_UNICODE(sequence); + for (int i = 0; i < length; i++) + buf[i] = (jchar) pchars[i]; + } + } + else + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (!obj) + break; + + if (PyUnicode_Check(obj) && (PyUnicode_GET_SIZE(obj) == 1)) + { + buf[i] = (jchar) PyUnicode_AS_UNICODE(obj)[0]; + Py_DECREF(obj); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + break; + } + } + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + arrayElements elts = elements(); + jchar *buf = (jchar *) elts; + + if (sizeof(Py_UNICODE) == sizeof(jchar)) + return PyUnicode_FromUnicode((const Py_UNICODE *) buf + lo, + hi - lo); + else + { + PyObject *string = PyUnicode_FromUnicode(NULL, hi - lo); + Py_UNICODE *pchars = PyUnicode_AS_UNICODE(string); + + for (int i = lo; i < hi; i++) + pchars[i - lo] = (Py_UNICODE) buf[i]; + + return string; + } + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + jchar c = (*this)[n]; + + if (sizeof(Py_UNICODE) == sizeof(jchar)) + return PyUnicode_FromUnicode((const Py_UNICODE *) &c, 1); + else + { + PyObject *string = PyUnicode_FromUnicode(NULL, 1); + Py_UNICODE *pchars = PyUnicode_AS_UNICODE(string); + + pchars[0] = (Py_UNICODE) c; + + return string; + } + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!PyUnicode_Check(obj)) + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + if (PyUnicode_GET_SIZE(obj) != 1) + { + PyErr_SetObject(PyExc_ValueError, obj); + return -1; + } + + elements()[n] = (jchar) PyUnicode_AS_UNICODE(obj)[0]; + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jchar operator[](int n) { + JNIEnv *vm_env = env->get_vm_env(); + jboolean isCopy = 0; + jchar *elts = (jchar *) + vm_env->GetPrimitiveArrayCritical((jarray) this$, &isCopy); + jchar value = elts[n]; + + vm_env->ReleasePrimitiveArrayCritical((jarray) this$, elts, 0); + + return value; + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + class arrayElements { + private: + jboolean isCopy; + jdoubleArray array; + jdouble *elts; + public: + arrayElements(jdoubleArray array) { + this->array = array; + elts = env->get_vm_env()->GetDoubleArrayElements(array, &isCopy); + } + virtual ~arrayElements() { + env->get_vm_env()->ReleaseDoubleArrayElements(array, elts, 0); + } + operator jdouble *() { + return elts; + } + }; + + arrayElements elements() { + return arrayElements((jdoubleArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jarray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewDoubleArray(n)) { + length = env->getArrayLength((jarray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewDoubleArray(PySequence_Length(sequence))) { + length = env->getArrayLength((jarray) this$); + arrayElements elts = elements(); + jdouble *buf = (jdouble *) elts; + + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (!obj) + break; + + if (PyFloat_Check(obj)) + { + buf[i] = (jdouble) PyFloat_AS_DOUBLE(obj); + Py_DECREF(obj); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + break; + } + } + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *list = PyList_New(hi - lo); + arrayElements elts = elements(); + jdouble *buf = (jdouble *) elts; + + for (int i = lo; i < hi; i++) + PyList_SET_ITEM(list, i - lo, PyFloat_FromDouble((double) buf[i])); + + return list; + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + return PyFloat_FromDouble((double) (*this)[n]); + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!PyFloat_Check(obj)) + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + elements()[n] = (jdouble) PyFloat_AS_DOUBLE(obj); + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jdouble operator[](int n) { + JNIEnv *vm_env = env->get_vm_env(); + jboolean isCopy = 0; + jdouble *elts = (jdouble *) + vm_env->GetPrimitiveArrayCritical((jarray) this$, &isCopy); + jdouble value = elts[n]; + + vm_env->ReleasePrimitiveArrayCritical((jarray) this$, elts, 0); + + return value; + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + class arrayElements { + private: + jboolean isCopy; + jfloatArray array; + jfloat *elts; + public: + arrayElements(jfloatArray array) { + this->array = array; + elts = env->get_vm_env()->GetFloatArrayElements(array, &isCopy); + } + virtual ~arrayElements() { + env->get_vm_env()->ReleaseFloatArrayElements(array, elts, 0); + } + operator jfloat *() { + return elts; + } + }; + + arrayElements elements() { + return arrayElements((jfloatArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jarray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewFloatArray(n)) { + length = env->getArrayLength((jarray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewFloatArray(PySequence_Length(sequence))) { + length = env->getArrayLength((jarray) this$); + arrayElements elts = elements(); + jfloat *buf = (jfloat *) elts; + + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (!obj) + break; + + if (PyFloat_Check(obj)) + { + buf[i] = (jfloat) PyFloat_AS_DOUBLE(obj); + Py_DECREF(obj); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + break; + } + } + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *list = PyList_New(hi - lo); + arrayElements elts = elements(); + jfloat *buf = (jfloat *) elts; + + for (int i = lo; i < hi; i++) + PyList_SET_ITEM(list, i - lo, PyFloat_FromDouble((double) buf[i])); + + return list; + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + return PyFloat_FromDouble((double) (*this)[n]); + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!PyFloat_Check(obj)) + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + elements()[n] = (jfloat) PyFloat_AS_DOUBLE(obj); + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jfloat operator[](int n) { + JNIEnv *vm_env = env->get_vm_env(); + jboolean isCopy = 0; + jfloat *elts = (jfloat *) + vm_env->GetPrimitiveArrayCritical((jarray) this$, &isCopy); + jfloat value = elts[n]; + + vm_env->ReleasePrimitiveArrayCritical((jarray) this$, elts, 0); + + return value; + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + class arrayElements { + private: + jboolean isCopy; + jintArray array; + jint *elts; + public: + arrayElements(jintArray array) { + this->array = array; + elts = env->get_vm_env()->GetIntArrayElements(array, &isCopy); + } + virtual ~arrayElements() { + env->get_vm_env()->ReleaseIntArrayElements(array, elts, 0); + } + operator jint *() { + return elts; + } + }; + + arrayElements elements() { + return arrayElements((jintArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jarray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewIntArray(n)) { + length = env->getArrayLength((jarray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewIntArray(PySequence_Length(sequence))) { + length = env->getArrayLength((jarray) this$); + arrayElements elts = elements(); + jint *buf = (jint *) elts; + + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (!obj) + break; + + if (PyInt_Check(obj)) + { + buf[i] = (jint) PyInt_AS_LONG(obj); + Py_DECREF(obj); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + break; + } + } + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *list = PyList_New(hi - lo); + arrayElements elts = elements(); + jint *buf = (jint *) elts; + + for (int i = lo; i < hi; i++) + PyList_SET_ITEM(list, i - lo, PyInt_FromLong(buf[i])); + + return list; + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + return PyInt_FromLong((*this)[n]); + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!PyInt_Check(obj)) + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + elements()[n] = (jint) PyInt_AS_LONG(obj); + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jint operator[](int n) { + JNIEnv *vm_env = env->get_vm_env(); + jboolean isCopy = 0; + jint *elts = (jint *) + vm_env->GetPrimitiveArrayCritical((jarray) this$, &isCopy); + jint value = elts[n]; + + vm_env->ReleasePrimitiveArrayCritical((jarray) this$, elts, 0); + + return value; + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + class arrayElements { + private: + jboolean isCopy; + jlongArray array; + jlong *elts; + public: + arrayElements(jlongArray array) { + this->array = array; + elts = env->get_vm_env()->GetLongArrayElements(array, &isCopy); + } + virtual ~arrayElements() { + env->get_vm_env()->ReleaseLongArrayElements(array, elts, 0); + } + operator jlong *() { + return elts; + } + }; + + arrayElements elements() { + return arrayElements((jlongArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jarray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewLongArray(n)) { + length = env->getArrayLength((jarray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewLongArray(PySequence_Length(sequence))) { + length = env->getArrayLength((jarray) this$); + arrayElements elts = elements(); + jlong *buf = (jlong *) elts; + + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (!obj) + break; + + if (PyLong_Check(obj)) + { + buf[i] = (jlong) PyLong_AsLongLong(obj); + Py_DECREF(obj); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + break; + } + } + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *list = PyList_New(hi - lo); + arrayElements elts = elements(); + jlong *buf = (jlong *) elts; + + for (int i = lo; i < hi; i++) + PyList_SET_ITEM(list, i - lo, PyLong_FromLongLong((long long) buf[i])); + + return list; + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + return PyLong_FromLongLong((long long) (*this)[n]); + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!PyLong_Check(obj)) + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + elements()[n] = (jlong) PyLong_AsLongLong(obj); + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jlong operator[](long n) { + JNIEnv *vm_env = env->get_vm_env(); + jboolean isCopy = 0; + jlong *elts = (jlong *) + vm_env->GetPrimitiveArrayCritical((jarray) this$, &isCopy); + jlong value = elts[n]; + + vm_env->ReleasePrimitiveArrayCritical((jarray) this$, elts, 0); + + return value; + } +}; + +template<> class JArray : public java::lang::Object { + public: + int length; + + class arrayElements { + private: + jboolean isCopy; + jshortArray array; + jshort *elts; + public: + arrayElements(jshortArray array) { + this->array = array; + elts = env->get_vm_env()->GetShortArrayElements(array, &isCopy); + } + virtual ~arrayElements() { + env->get_vm_env()->ReleaseShortArrayElements(array, elts, 0); + } + operator jshort *() { + return elts; + } + }; + + arrayElements elements() { + return arrayElements((jshortArray) this$); + } + + JArray(jobject obj) : java::lang::Object(obj) { + length = this$ ? env->getArrayLength((jarray) this$) : 0; + } + + JArray(const JArray& obj) : java::lang::Object(obj) { + length = obj.length; + } + + JArray(int n) : java::lang::Object(env->get_vm_env()->NewShortArray(n)) { + length = env->getArrayLength((jarray) this$); + } + +#ifdef PYTHON + JArray(PyObject *sequence) : java::lang::Object(env->get_vm_env()->NewShortArray(PySequence_Length(sequence))) { + length = env->getArrayLength((jarray) this$); + arrayElements elts = elements(); + jshort *buf = (jshort *) elts; + + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + + if (!obj) + break; + + if (PyInt_Check(obj)) + { + buf[i] = (jshort) PyInt_AS_LONG(obj); + Py_DECREF(obj); + } + else + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + break; + } + } + } + + PyObject *toSequence() + { + return toSequence(0, length); + } + + PyObject *toSequence(int lo, int hi) + { + if (this$ == NULL) + Py_RETURN_NONE; + + if (lo < 0) lo = length + lo; + if (lo < 0) lo = 0; + else if (lo > length) lo = length; + if (hi < 0) hi = length + hi; + if (hi < 0) hi = 0; + else if (hi > length) hi = length; + if (lo > hi) lo = hi; + + PyObject *list = PyList_New(hi - lo); + arrayElements elts = elements(); + jshort *buf = (jshort *) elts; + + for (int i = lo; i < hi; i++) + PyList_SET_ITEM(list, i - lo, PyInt_FromLong(buf[i])); + + return list; + } + + PyObject *get(int n) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + return PyInt_FromLong((long) (*this)[n]); + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + + int set(int n, PyObject *obj) + { + if (this$ != NULL) + { + if (n < 0) + n = length + n; + + if (n >= 0 && n < length) + { + if (!PyInt_Check(obj)) + { + PyErr_SetObject(PyExc_TypeError, obj); + return -1; + } + + elements()[n] = (jshort) PyInt_AS_LONG(obj); + return 0; + } + } + + PyErr_SetString(PyExc_IndexError, "index out of range"); + return -1; + } + + PyObject *wrap() const; +#endif + + jshort operator[](int n) { + JNIEnv *vm_env = env->get_vm_env(); + jboolean isCopy = 0; + jshort *elts = (jshort *) + vm_env->GetPrimitiveArrayCritical((jarray) this$, &isCopy); + jshort value = elts[n]; + + vm_env->ReleasePrimitiveArrayCritical((jarray) this$, elts, 0); + + return value; + } +}; + +#ifdef PYTHON + +template class t_JArray { +public: + PyObject_HEAD + JArray array; + + static PyObject *wrap_Object(const JArray& array) + { + if (!!array) + return array.wrap(); + + Py_RETURN_NONE; + } +}; + +#endif + +#endif /* _JArray_H */ diff --git a/jcc/jcc/sources/JCCEnv.cpp b/jcc/jcc/sources/JCCEnv.cpp new file mode 100644 index 0000000..c60cb1b --- /dev/null +++ b/jcc/jcc/sources/JCCEnv.cpp @@ -0,0 +1,967 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "JCCEnv.h" + +#if defined(_MSC_VER) || defined(__WIN32) +_DLL_EXPORT DWORD VM_ENV = 0; +#else +pthread_key_t JCCEnv::VM_ENV = (pthread_key_t) NULL; +#endif + +#if defined(_MSC_VER) || defined(__WIN32) + +static CRITICAL_SECTION *mutex = NULL; + +class lock { +public: + lock() { + EnterCriticalSection(mutex); + } + virtual ~lock() { + LeaveCriticalSection(mutex); + } +}; + +#else + +static pthread_mutex_t *mutex = NULL; + +class lock { +public: + lock() { + pthread_mutex_lock(mutex); + } + virtual ~lock() { + pthread_mutex_unlock(mutex); + } +}; + +#endif + +JCCEnv::JCCEnv(JavaVM *vm, JNIEnv *vm_env) +{ +#if defined(_MSC_VER) || defined(__WIN32) + if (!mutex) + { + mutex = new CRITICAL_SECTION(); + InitializeCriticalSection(mutex); + } +#else + if (!mutex) + { + mutex = new pthread_mutex_t(); + pthread_mutex_init(mutex, NULL); + } +#endif + + if (vm) + set_vm(vm, vm_env); + else + this->vm = NULL; +} + +void JCCEnv::set_vm(JavaVM *vm, JNIEnv *vm_env) +{ + this->vm = vm; + set_vm_env(vm_env); + + _sys = (jclass) vm_env->NewGlobalRef(vm_env->FindClass("java/lang/System")); + _obj = (jclass) vm_env->NewGlobalRef(vm_env->FindClass("java/lang/Object")); +#ifdef _jcc_lib + _thr = (jclass) vm_env->NewGlobalRef(vm_env->FindClass("org/apache/jcc/PythonException")); +#else + _thr = (jclass) vm_env->NewGlobalRef(vm_env->FindClass("java/lang/RuntimeException")); +#endif + + _mids = new jmethodID[max_mid]; + + _mids[mid_sys_identityHashCode] = + vm_env->GetStaticMethodID(_sys, "identityHashCode", + "(Ljava/lang/Object;)I"); + _mids[mid_sys_setProperty] = + vm_env->GetStaticMethodID(_sys, "setProperty", + "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"); + _mids[mid_sys_getProperty] = + vm_env->GetStaticMethodID(_sys, "getProperty", + "(Ljava/lang/String;)Ljava/lang/String;"); + _mids[mid_obj_toString] = + vm_env->GetMethodID(_obj, "toString", + "()Ljava/lang/String;"); + _mids[mid_obj_hashCode] = + vm_env->GetMethodID(_obj, "hashCode", + "()I"); + _mids[mid_obj_getClass] = + vm_env->GetMethodID(_obj, "getClass", + "()Ljava/lang/Class;"); + + + jclass iterable = vm_env->FindClass("java/lang/Iterable"); + + if (iterable == NULL) /* JDK < 1.5 */ + { + vm_env->ExceptionClear(); + _mids[mid_iterator] = NULL; + _mids[mid_iterator_next] = NULL; + } + else + { + _mids[mid_iterator] = + vm_env->GetMethodID(iterable, + "iterator", "()Ljava/util/Iterator;"); + _mids[mid_iterator_next] = + vm_env->GetMethodID(vm_env->FindClass("java/util/Iterator"), + "next", "()Ljava/lang/Object;"); + } + + + _mids[mid_enumeration_nextElement] = + vm_env->GetMethodID(vm_env->FindClass("java/util/Enumeration"), + "nextElement", "()Ljava/lang/Object;"); + + _mids[mid_Boolean_booleanValue] = + vm_env->GetMethodID(vm_env->FindClass("java/lang/Boolean"), + "booleanValue", "()Z"); + _mids[mid_Byte_byteValue] = + vm_env->GetMethodID(vm_env->FindClass("java/lang/Byte"), + "byteValue", "()B"); + _mids[mid_Character_charValue] = + vm_env->GetMethodID(vm_env->FindClass("java/lang/Character"), + "charValue", "()C"); + _mids[mid_Double_doubleValue] = + vm_env->GetMethodID(vm_env->FindClass("java/lang/Double"), + "doubleValue", "()D"); + _mids[mid_Float_floatValue] = + vm_env->GetMethodID(vm_env->FindClass("java/lang/Float"), + "floatValue", "()F"); + _mids[mid_Integer_intValue] = + vm_env->GetMethodID(vm_env->FindClass("java/lang/Integer"), + "intValue", "()I"); + _mids[mid_Long_longValue] = + vm_env->GetMethodID(vm_env->FindClass("java/lang/Long"), + "longValue", "()J"); + _mids[mid_Short_shortValue] = + vm_env->GetMethodID(vm_env->FindClass("java/lang/Short"), + "shortValue", "()S"); +} + +int JCCEnv::attachCurrentThread(char *name, int asDaemon) +{ + JNIEnv *jenv = NULL; + JavaVMAttachArgs attach = { + JNI_VERSION_1_4, name, NULL + }; + int result; + + if (asDaemon) + result = vm->AttachCurrentThreadAsDaemon((void **) &jenv, &attach); + else + result = vm->AttachCurrentThread((void **) &jenv, &attach); + + set_vm_env(jenv); + + return result; +} + +#if defined(_MSC_VER) || defined(__WIN32) + +void JCCEnv::set_vm_env(JNIEnv *vm_env) +{ + if (!VM_ENV) + VM_ENV = TlsAlloc(); + TlsSetValue(VM_ENV, (LPVOID) vm_env); +} + +#else + +void JCCEnv::set_vm_env(JNIEnv *vm_env) +{ + if (!VM_ENV) + pthread_key_create(&VM_ENV, NULL); + pthread_setspecific(VM_ENV, (void *) vm_env); +} + +#endif + +jint JCCEnv::getJNIVersion() const +{ + return get_vm_env()->GetVersion(); +} + +jstring JCCEnv::getJavaVersion() const +{ + return (jstring) + callStaticObjectMethod(_sys, _mids[mid_sys_getProperty], + get_vm_env()->NewStringUTF("java.version")); +} + +jobject JCCEnv::iterator(jobject obj) const +{ + return callObjectMethod(obj, _mids[mid_iterator]); +} + +jobject JCCEnv::iteratorNext(jobject obj) const +{ + return callObjectMethod(obj, _mids[mid_iterator_next]); +} + +jobject JCCEnv::enumerationNext(jobject obj) const +{ + return callObjectMethod(obj, _mids[mid_enumeration_nextElement]); +} + +jboolean JCCEnv::isInstanceOf(jobject obj, getclassfn initializeClass) const +{ + return get_vm_env()->IsInstanceOf(obj, (*initializeClass)()); +} + +jclass JCCEnv::findClass(const char *className) const +{ + jclass cls = NULL; + + if (vm) + { + JNIEnv *vm_env = get_vm_env(); + + if (vm_env) + cls = vm_env->FindClass(className); +#ifdef PYTHON + else + { + PythonGIL gil; + + PyErr_SetString(PyExc_RuntimeError, "attachCurrentThread() must be called first"); + throw _EXC_PYTHON; + } +#else + else + throw _EXC_JAVA; +#endif + } +#ifdef PYTHON + else + { + PythonGIL gil; + + PyErr_SetString(PyExc_RuntimeError, "initVM() must be called first"); + throw _EXC_PYTHON; + } +#else + else + throw _EXC_JAVA; +#endif + + reportException(); + + return cls; +} + +void JCCEnv::registerNatives(jclass cls, JNINativeMethod *methods, int n) const +{ + get_vm_env()->RegisterNatives(cls, methods, n); +} + +jobject JCCEnv::newGlobalRef(jobject obj, int id) +{ + if (obj) + { + if (id) /* zero when weak global ref is desired */ + { + lock locked; + + for (std::multimap::iterator iter = refs.find(id); + iter != refs.end(); + iter++) { + if (iter->first != id) + break; + if (isSame(obj, iter->second.global)) + { + /* If it's in the table but not the same reference, + * it must be a local reference and must be deleted. + */ + if (obj != iter->second.global) + get_vm_env()->DeleteLocalRef(obj); + + iter->second.count += 1; + return iter->second.global; + } + } + + JNIEnv *vm_env = get_vm_env(); + countedRef ref; + + ref.global = vm_env->NewGlobalRef(obj); + ref.count = 1; + refs.insert(std::pair(id, ref)); + vm_env->DeleteLocalRef(obj); + + return ref.global; + } + else + return (jobject) get_vm_env()->NewWeakGlobalRef(obj); + } + + return NULL; +} + +jobject JCCEnv::deleteGlobalRef(jobject obj, int id) +{ + if (obj) + { + if (id) /* zero when obj is weak global ref */ + { + lock locked; + + for (std::multimap::iterator iter = refs.find(id); + iter != refs.end(); + iter++) { + if (iter->first != id) + break; + if (isSame(obj, iter->second.global)) + { + if (iter->second.count == 1) + { + JNIEnv *vm_env = get_vm_env(); + + if (!vm_env) + { + /* Python's cyclic garbage collector may remove + * an object inside a thread that is not attached + * to the JVM. This makes sure the JVM doesn't + * segfault. + */ + attachCurrentThread(NULL, 0); + vm_env = get_vm_env(); + } + + vm_env->DeleteGlobalRef(iter->second.global); + refs.erase(iter); + } + else + iter->second.count -= 1; + + return NULL; + } + } + + printf("deleting non-existent ref: 0x%x\n", id); + } + else + get_vm_env()->DeleteWeakGlobalRef((jweak) obj); + } + + return NULL; +} + +jobject JCCEnv::newObject(getclassfn initializeClass, jmethodID **mids, + int m, ...) +{ + jclass cls = (*initializeClass)(); + JNIEnv *vm_env = get_vm_env(); + jobject obj; + + if (vm_env) + { + va_list ap; + + va_start(ap, m); + obj = vm_env->NewObjectV(cls, (*mids)[m], ap); + va_end(ap); + } +#ifdef PYTHON + else + { + PythonGIL gil; + + PyErr_SetString(PyExc_RuntimeError, "attachCurrentThread() must be called first"); + throw _EXC_PYTHON; + } +#else + else + throw _EXC_JAVA; +#endif + + reportException(); + + return obj; +} + +jobjectArray JCCEnv::newObjectArray(jclass cls, int size) +{ + jobjectArray array = get_vm_env()->NewObjectArray(size, cls, NULL); + + reportException(); + return array; +} + +void JCCEnv::setObjectArrayElement(jobjectArray array, int n, + jobject obj) const +{ + get_vm_env()->SetObjectArrayElement(array, n, obj); + reportException(); +} + +jobject JCCEnv::getObjectArrayElement(jobjectArray array, int n) const +{ + jobject obj = get_vm_env()->GetObjectArrayElement(array, n); + + reportException(); + return obj; +} + +int JCCEnv::getArrayLength(jarray array) const +{ + int len = get_vm_env()->GetArrayLength(array); + + reportException(); + return len; +} + +#ifdef PYTHON +jclass JCCEnv::getPythonExceptionClass() const +{ + return _thr; +} +#endif + +void JCCEnv::reportException() const +{ + JNIEnv *vm_env = get_vm_env(); + jthrowable throwable = vm_env->ExceptionOccurred(); + + if (throwable) + { + if (!env->handlers) + vm_env->ExceptionDescribe(); + +#ifdef PYTHON + PythonGIL gil; + + if (PyErr_Occurred()) + { + /* _thr is PythonException ifdef _jcc_lib (shared mode) + * if not shared mode, _thr is RuntimeException + */ + jobject cls = (jobject) vm_env->GetObjectClass(throwable); + + if (vm_env->IsSameObject(cls, _thr)) + { +#ifndef _jcc_lib + /* PythonException class is not available without shared mode. + * Python exception information thus gets lost and exception + * is reported via plain Java RuntimeException. + */ + PyErr_Clear(); + throw _EXC_JAVA; +#else + throw _EXC_PYTHON; +#endif + } + } +#endif + + throw _EXC_JAVA; + } +} + + +#define DEFINE_CALL(jtype, Type) \ + jtype JCCEnv::call##Type##Method(jobject obj, \ + jmethodID mid, ...) const \ + { \ + va_list ap; \ + jtype result; \ + \ + va_start(ap, mid); \ + result = get_vm_env()->Call##Type##MethodV(obj, mid, ap); \ + va_end(ap); \ + \ + reportException(); \ + \ + return result; \ + } + +#define DEFINE_NONVIRTUAL_CALL(jtype, Type) \ + jtype JCCEnv::callNonvirtual##Type##Method(jobject obj, jclass cls, \ + jmethodID mid, ...) const \ + { \ + va_list ap; \ + jtype result; \ + \ + va_start(ap, mid); \ + result = get_vm_env()->CallNonvirtual##Type##MethodV(obj, cls, \ + mid, ap); \ + va_end(ap); \ + \ + reportException(); \ + \ + return result; \ + } + +#define DEFINE_STATIC_CALL(jtype, Type) \ + jtype JCCEnv::callStatic##Type##Method(jclass cls, \ + jmethodID mid, ...) const \ + { \ + va_list ap; \ + jtype result; \ + \ + va_start(ap, mid); \ + result = get_vm_env()->CallStatic##Type##MethodV(cls, mid, ap); \ + va_end(ap); \ + \ + reportException(); \ + \ + return result; \ + } + +DEFINE_CALL(jobject, Object) +DEFINE_CALL(jboolean, Boolean) +DEFINE_CALL(jbyte, Byte) +DEFINE_CALL(jchar, Char) +DEFINE_CALL(jdouble, Double) +DEFINE_CALL(jfloat, Float) +DEFINE_CALL(jint, Int) +DEFINE_CALL(jlong, Long) +DEFINE_CALL(jshort, Short) + +DEFINE_NONVIRTUAL_CALL(jobject, Object) +DEFINE_NONVIRTUAL_CALL(jboolean, Boolean) +DEFINE_NONVIRTUAL_CALL(jbyte, Byte) +DEFINE_NONVIRTUAL_CALL(jchar, Char) +DEFINE_NONVIRTUAL_CALL(jdouble, Double) +DEFINE_NONVIRTUAL_CALL(jfloat, Float) +DEFINE_NONVIRTUAL_CALL(jint, Int) +DEFINE_NONVIRTUAL_CALL(jlong, Long) +DEFINE_NONVIRTUAL_CALL(jshort, Short) + +DEFINE_STATIC_CALL(jobject, Object) +DEFINE_STATIC_CALL(jboolean, Boolean) +DEFINE_STATIC_CALL(jbyte, Byte) +DEFINE_STATIC_CALL(jchar, Char) +DEFINE_STATIC_CALL(jdouble, Double) +DEFINE_STATIC_CALL(jfloat, Float) +DEFINE_STATIC_CALL(jint, Int) +DEFINE_STATIC_CALL(jlong, Long) +DEFINE_STATIC_CALL(jshort, Short) + +void JCCEnv::callVoidMethod(jobject obj, jmethodID mid, ...) const +{ + va_list ap; + + va_start(ap, mid); + get_vm_env()->CallVoidMethodV(obj, mid, ap); + va_end(ap); + + reportException(); +} + +void JCCEnv::callNonvirtualVoidMethod(jobject obj, jclass cls, + jmethodID mid, ...) const +{ + va_list ap; + + va_start(ap, mid); + get_vm_env()->CallNonvirtualVoidMethodV(obj, cls, mid, ap); + va_end(ap); + + reportException(); +} + +void JCCEnv::callStaticVoidMethod(jclass cls, jmethodID mid, ...) const +{ + va_list ap; + + va_start(ap, mid); + get_vm_env()->CallStaticVoidMethodV(cls, mid, ap); + va_end(ap); + + reportException(); +} + + +jboolean JCCEnv::booleanValue(jobject obj) const +{ + return get_vm_env()->CallBooleanMethod(obj, _mids[mid_Boolean_booleanValue]); +} + +jbyte JCCEnv::byteValue(jobject obj) const +{ + return get_vm_env()->CallByteMethod(obj, _mids[mid_Byte_byteValue]); +} + +jchar JCCEnv::charValue(jobject obj) const +{ + return get_vm_env()->CallCharMethod(obj, _mids[mid_Character_charValue]); +} + +jdouble JCCEnv::doubleValue(jobject obj) const +{ + return get_vm_env()->CallDoubleMethod(obj, _mids[mid_Double_doubleValue]); +} + +jfloat JCCEnv::floatValue(jobject obj) const +{ + return get_vm_env()->CallFloatMethod(obj, _mids[mid_Float_floatValue]); +} + +jint JCCEnv::intValue(jobject obj) const +{ + return get_vm_env()->CallIntMethod(obj, _mids[mid_Integer_intValue]); +} + +jlong JCCEnv::longValue(jobject obj) const +{ + return get_vm_env()->CallLongMethod(obj, _mids[mid_Long_longValue]); +} + +jshort JCCEnv::shortValue(jobject obj) const +{ + return get_vm_env()->CallShortMethod(obj, _mids[mid_Short_shortValue]); +} + + +jmethodID JCCEnv::getMethodID(jclass cls, const char *name, + const char *signature) const +{ + jmethodID id = get_vm_env()->GetMethodID(cls, name, signature); + + reportException(); + + return id; +} + +jfieldID JCCEnv::getFieldID(jclass cls, const char *name, + const char *signature) const +{ + jfieldID id = get_vm_env()->GetFieldID(cls, name, signature); + + reportException(); + + return id; +} + + +jmethodID JCCEnv::getStaticMethodID(jclass cls, const char *name, + const char *signature) const +{ + jmethodID id = get_vm_env()->GetStaticMethodID(cls, name, signature); + + reportException(); + + return id; +} + +jobject JCCEnv::getStaticObjectField(jclass cls, const char *name, + const char *signature) const +{ + JNIEnv *vm_env = get_vm_env(); + jfieldID id = vm_env->GetStaticFieldID(cls, name, signature); + + reportException(); + + return vm_env->GetStaticObjectField(cls, id); +} + +#define DEFINE_GET_STATIC_FIELD(jtype, Type, signature) \ + jtype JCCEnv::getStatic##Type##Field(jclass cls, \ + const char *name) const \ + { \ + JNIEnv *vm_env = get_vm_env(); \ + jfieldID id = vm_env->GetStaticFieldID(cls, name, #signature); \ + reportException(); \ + return vm_env->GetStatic##Type##Field(cls, id); \ + } + +DEFINE_GET_STATIC_FIELD(jboolean, Boolean, Z) +DEFINE_GET_STATIC_FIELD(jbyte, Byte, B) +DEFINE_GET_STATIC_FIELD(jchar, Char, C) +DEFINE_GET_STATIC_FIELD(jdouble, Double, D) +DEFINE_GET_STATIC_FIELD(jfloat, Float, F) +DEFINE_GET_STATIC_FIELD(jint, Int, I) +DEFINE_GET_STATIC_FIELD(jlong, Long, J) +DEFINE_GET_STATIC_FIELD(jshort, Short, S) + +#define DEFINE_GET_FIELD(jtype, Type) \ + jtype JCCEnv::get##Type##Field(jobject obj, jfieldID id) const \ + { \ + jtype value = get_vm_env()->Get##Type##Field(obj, id); \ + reportException(); \ + return value; \ + } + +DEFINE_GET_FIELD(jobject, Object) +DEFINE_GET_FIELD(jboolean, Boolean) +DEFINE_GET_FIELD(jbyte, Byte) +DEFINE_GET_FIELD(jchar, Char) +DEFINE_GET_FIELD(jdouble, Double) +DEFINE_GET_FIELD(jfloat, Float) +DEFINE_GET_FIELD(jint, Int) +DEFINE_GET_FIELD(jlong, Long) +DEFINE_GET_FIELD(jshort, Short) + +#define DEFINE_SET_FIELD(jtype, Type) \ + void JCCEnv::set##Type##Field(jobject obj, jfieldID id, \ + jtype value) const \ + { \ + get_vm_env()->Set##Type##Field(obj, id, value); \ + reportException(); \ + } + +DEFINE_SET_FIELD(jobject, Object) +DEFINE_SET_FIELD(jboolean, Boolean) +DEFINE_SET_FIELD(jbyte, Byte) +DEFINE_SET_FIELD(jchar, Char) +DEFINE_SET_FIELD(jdouble, Double) +DEFINE_SET_FIELD(jfloat, Float) +DEFINE_SET_FIELD(jint, Int) +DEFINE_SET_FIELD(jlong, Long) +DEFINE_SET_FIELD(jshort, Short) + +void JCCEnv::setClassPath(const char *classPath) +{ + JNIEnv *vm_env = get_vm_env(); + jclass _ucl = (jclass) vm_env->FindClass("java/net/URLClassLoader"); + jclass _fil = (jclass) vm_env->FindClass("java/io/File"); + jmethodID mid = vm_env->GetStaticMethodID(_ucl, "getSystemClassLoader", + "()Ljava/lang/ClassLoader;"); + jobject classLoader = vm_env->CallStaticObjectMethod(_ucl, mid); + jmethodID mf = vm_env->GetMethodID(_fil, "", "(Ljava/lang/String;)V"); + jmethodID mu = vm_env->GetMethodID(_fil, "toURL", "()Ljava/net/URL;"); + jmethodID ma = vm_env->GetMethodID(_ucl, "addURL", "(Ljava/net/URL;)V"); +#if defined(_MSC_VER) || defined(__WIN32) + char *pathsep = ";"; + char *path = _strdup(classPath); +#else + char *pathsep = ":"; + char *path = strdup(classPath); +#endif + + for (char *cp = strtok(path, pathsep); + cp != NULL; + cp = strtok(NULL, pathsep)) { + jstring string = vm_env->NewStringUTF(cp); + jobject file = vm_env->NewObject(_fil, mf, string); + jobject url = vm_env->CallObjectMethod(file, mu); + + vm_env->CallVoidMethod(classLoader, ma, url); + } + free(path); +} + +char *JCCEnv::getClassPath() +{ + JNIEnv *vm_env = get_vm_env(); + jclass _ucl = (jclass) vm_env->FindClass("java/net/URLClassLoader"); + jclass _url = (jclass) vm_env->FindClass("java/net/URL"); + jmethodID mid = vm_env->GetStaticMethodID(_ucl, "getSystemClassLoader", + "()Ljava/lang/ClassLoader;"); + jobject classLoader = vm_env->CallStaticObjectMethod(_ucl, mid); + jmethodID gu = vm_env->GetMethodID(_ucl, "getURLs", "()[Ljava/net/URL;"); + jmethodID gp = vm_env->GetMethodID(_url, "getPath", "()Ljava/lang/String;"); +#if defined(_MSC_VER) || defined(__WIN32) + char *pathsep = ";"; +#else + char *pathsep = ":"; +#endif + jobjectArray array = (jobjectArray) + vm_env->CallObjectMethod(classLoader, gu); + int count = array ? vm_env->GetArrayLength(array) : 0; + int first = 1, total = 0; + char *classpath = NULL; + + for (int i = 0; i < count; i++) { + jobject url = vm_env->GetObjectArrayElement(array, i); + jstring path = (jstring) vm_env->CallObjectMethod(url, gp); + const char *chars = vm_env->GetStringUTFChars(path, NULL); + int size = vm_env->GetStringUTFLength(path); + + total += size + 1; + if (classpath == NULL) + classpath = (char *) calloc(total, 1); + else + classpath = (char *) realloc(classpath, total); + if (classpath == NULL) + return NULL; + + if (first) + first = 0; + else + strcat(classpath, pathsep); + + strcat(classpath, chars); + } + + return classpath; +} + +jstring JCCEnv::fromUTF(const char *bytes) const +{ + jstring str = get_vm_env()->NewStringUTF(bytes); + + reportException(); + + return str; +} + +char *JCCEnv::toUTF(jstring str) const +{ + JNIEnv *vm_env = get_vm_env(); + int len = vm_env->GetStringUTFLength(str); + char *bytes = new char[len + 1]; + jboolean isCopy = 0; + const char *utf = vm_env->GetStringUTFChars(str, &isCopy); + + if (!bytes) + return NULL; + + memcpy(bytes, utf, len); + bytes[len] = '\0'; + + vm_env->ReleaseStringUTFChars(str, utf); + + return bytes; +} + +char *JCCEnv::toString(jobject obj) const +{ + try { + return obj + ? toUTF((jstring) callObjectMethod(obj, _mids[mid_obj_toString])) + : NULL; + } catch (int e) { + switch (e) { + case _EXC_PYTHON: + return NULL; + case _EXC_JAVA: { + JNIEnv *vm_env = get_vm_env(); + + vm_env->ExceptionDescribe(); + vm_env->ExceptionClear(); + + return NULL; + } + default: + throw; + } + } +} + +char *JCCEnv::getClassName(jobject obj) const +{ + return obj + ? toString(callObjectMethod(obj, _mids[mid_obj_getClass])) + : NULL; +} + +#ifdef PYTHON + +jstring JCCEnv::fromPyString(PyObject *object) const +{ + if (object == Py_None) + return NULL; + + if (PyUnicode_Check(object)) + { + if (sizeof(Py_UNICODE) == sizeof(jchar)) + { + jchar *buf = (jchar *) PyUnicode_AS_UNICODE(object); + jsize len = (jsize) PyUnicode_GET_SIZE(object); + + return get_vm_env()->NewString(buf, len); + } + else + { + jsize len = PyUnicode_GET_SIZE(object); + Py_UNICODE *pchars = PyUnicode_AS_UNICODE(object); + jchar *jchars = new jchar[len]; + jstring str; + + for (int i = 0; i < len; i++) + jchars[i] = (jchar) pchars[i]; + + str = get_vm_env()->NewString(jchars, len); + delete jchars; + + return str; + } + } + else if (PyString_Check(object)) + return fromUTF(PyString_AS_STRING(object)); + else + { + PyObject *tuple = Py_BuildValue("(sO)", "expected a string", object); + + PyErr_SetObject(PyExc_TypeError, tuple); + Py_DECREF(tuple); + + return NULL; + } +} + +PyObject *JCCEnv::fromJString(jstring js, int delete_local_ref) const +{ + if (!js) + Py_RETURN_NONE; + + JNIEnv *vm_env = get_vm_env(); + PyObject *string; + + if (sizeof(Py_UNICODE) == sizeof(jchar)) + { + jboolean isCopy; + const jchar *buf = vm_env->GetStringChars(js, &isCopy); + jsize len = vm_env->GetStringLength(js); + + string = PyUnicode_FromUnicode((const Py_UNICODE *) buf, len); + vm_env->ReleaseStringChars(js, buf); + } + else + { + jsize len = vm_env->GetStringLength(js); + + string = PyUnicode_FromUnicode(NULL, len); + if (string) + { + jboolean isCopy; + const jchar *jchars = vm_env->GetStringChars(js, &isCopy); + Py_UNICODE *pchars = PyUnicode_AS_UNICODE(string); + + for (int i = 0; i < len; i++) + pchars[i] = (Py_UNICODE) jchars[i]; + + vm_env->ReleaseStringChars(js, jchars); + } + } + + if (delete_local_ref) + vm_env->DeleteLocalRef((jobject) js); + + return string; +} + + +/* may be called from finalizer thread which has no vm_env thread local */ +void JCCEnv::finalizeObject(JNIEnv *jenv, PyObject *obj) +{ + PythonGIL gil; + + set_vm_env(jenv); + Py_DECREF(obj); +} + +#endif /* PYTHON */ diff --git a/jcc/jcc/sources/JCCEnv.h b/jcc/jcc/sources/JCCEnv.h new file mode 100644 index 0000000..de892b8 --- /dev/null +++ b/jcc/jcc/sources/JCCEnv.h @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2007-2008 Open Source Applications Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _JCCEnv_H +#define _JCCEnv_H + +#include +#if defined(_MSC_VER) || defined(__WIN32) +#define _DLL_IMPORT __declspec(dllimport) +#define _DLL_EXPORT __declspec(dllexport) +#include +#undef MAX_PRIORITY +#undef MIN_PRIORITY +#else +#include +#define _DLL_IMPORT +#define _DLL_EXPORT +#endif + +#ifdef __SUNPRO_CC +#undef DEFAULT_TYPE +#endif + +#ifdef TRUE +#undef TRUE +#endif +#ifdef FALSE +#undef FALSE +#endif + +#include + +#ifdef PYTHON +#include +#endif + +#undef EOF + +class JCCEnv; + +#if defined(_MSC_VER) || defined(__WIN32) + +#ifdef _jcc_shared +_DLL_IMPORT extern JCCEnv *env; +_DLL_IMPORT extern DWORD VM_ENV; +#else +_DLL_EXPORT extern JCCEnv *env; +_DLL_EXPORT extern DWORD VM_ENV; +#endif + +#else + +extern JCCEnv *env; + +#endif + +#define _EXC_PYTHON ((int) 0) +#define _EXC_JAVA ((int) 1) + +typedef jclass (*getclassfn)(void); + +class countedRef { +public: + jobject global; + int count; +}; + +class _DLL_EXPORT JCCEnv { +protected: + jclass _sys, _obj, _thr; + jmethodID *_mids; + + enum { + mid_sys_identityHashCode, + mid_sys_setProperty, + mid_sys_getProperty, + mid_obj_toString, + mid_obj_hashCode, + mid_obj_getClass, + mid_iterator, + mid_iterator_next, + mid_enumeration_nextElement, + mid_Boolean_booleanValue, + mid_Byte_byteValue, + mid_Character_charValue, + mid_Double_doubleValue, + mid_Float_floatValue, + mid_Integer_intValue, + mid_Long_longValue, + mid_Short_shortValue, + max_mid + }; + +public: + JavaVM *vm; + std::multimap refs; + int handlers; + + explicit JCCEnv(JavaVM *vm, JNIEnv *env); + +#if defined(_MSC_VER) || defined(__WIN32) + inline JNIEnv *get_vm_env() const + { + return (JNIEnv *) TlsGetValue(VM_ENV); + } +#else + static pthread_key_t VM_ENV; + + inline JNIEnv *get_vm_env() const + { + return (JNIEnv *) pthread_getspecific(VM_ENV); + } +#endif + void set_vm(JavaVM *vm, JNIEnv *vm_env); + void set_vm_env(JNIEnv *vm_env); + int attachCurrentThread(char *name, int asDaemon); + + jint getJNIVersion() const; + jstring getJavaVersion() const; + + jclass findClass(const char *className) const; + jboolean isInstanceOf(jobject obj, getclassfn initializeClass) const; + + void registerNatives(jclass cls, JNINativeMethod *methods, int n) const; + + jobject iterator(jobject obj) const; + jobject iteratorNext(jobject obj) const; + jobject enumerationNext(jobject obj) const; + + jobject newGlobalRef(jobject obj, int id); + jobject deleteGlobalRef(jobject obj, int id); + + jobject newObject(getclassfn initializeClass, jmethodID **mids, int m, ...); + + jobjectArray newObjectArray(jclass cls, int size); + void setObjectArrayElement(jobjectArray a, int n, + jobject obj) const; + jobject getObjectArrayElement(jobjectArray a, int n) const; + int getArrayLength(jarray a) const; + + void reportException() const; + + jobject callObjectMethod(jobject obj, jmethodID mid, ...) const; + jboolean callBooleanMethod(jobject obj, jmethodID mid, ...) const; + jbyte callByteMethod(jobject obj, jmethodID mid, ...) const; + jchar callCharMethod(jobject obj, jmethodID mid, ...) const; + jdouble callDoubleMethod(jobject obj, jmethodID mid, ...) const; + jfloat callFloatMethod(jobject obj, jmethodID mid, ...) const; + jint callIntMethod(jobject obj, jmethodID mid, ...) const; + jlong callLongMethod(jobject obj, jmethodID mid, ...) const; + jshort callShortMethod(jobject obj, jmethodID mid, ...) const; + void callVoidMethod(jobject obj, jmethodID mid, ...) const; + + jobject callNonvirtualObjectMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + jboolean callNonvirtualBooleanMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + jbyte callNonvirtualByteMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + jchar callNonvirtualCharMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + jdouble callNonvirtualDoubleMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + jfloat callNonvirtualFloatMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + jint callNonvirtualIntMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + jlong callNonvirtualLongMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + jshort callNonvirtualShortMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + void callNonvirtualVoidMethod(jobject obj, jclass cls, + jmethodID mid, ...) const; + + jobject callStaticObjectMethod(jclass cls, jmethodID mid, ...) const; + jboolean callStaticBooleanMethod(jclass cls, jmethodID mid, ...) const; + jbyte callStaticByteMethod(jclass cls, jmethodID mid, ...) const; + jchar callStaticCharMethod(jclass cls, jmethodID mid, ...) const; + jdouble callStaticDoubleMethod(jclass cls, jmethodID mid, ...) const; + jfloat callStaticFloatMethod(jclass cls, jmethodID mid, ...) const; + jint callStaticIntMethod(jclass cls, jmethodID mid, ...) const; + jlong callStaticLongMethod(jclass cls, jmethodID mid, ...) const; + jshort callStaticShortMethod(jclass cls, jmethodID mid, ...) const; + void callStaticVoidMethod(jclass cls, jmethodID mid, ...) const; + + jboolean booleanValue(jobject obj) const; + jbyte byteValue(jobject obj) const; + jchar charValue(jobject obj) const; + jdouble doubleValue(jobject obj) const; + jfloat floatValue(jobject obj) const; + jint intValue(jobject obj) const; + jlong longValue(jobject obj) const; + jshort shortValue(jobject obj) const; + + jmethodID getMethodID(jclass cls, const char *name, + const char *signature) const; + jfieldID getFieldID(jclass cls, const char *name, + const char *signature) const; + jmethodID getStaticMethodID(jclass cls, const char *name, + const char *signature) const; + + jobject getStaticObjectField(jclass cls, const char *name, + const char *signature) const; + jboolean getStaticBooleanField(jclass cls, const char *name) const; + jbyte getStaticByteField(jclass cls, const char *name) const; + jchar getStaticCharField(jclass cls, const char *name) const; + jdouble getStaticDoubleField(jclass cls, const char *name) const; + jfloat getStaticFloatField(jclass cls, const char *name) const; + jint getStaticIntField(jclass cls, const char *name) const; + jlong getStaticLongField(jclass cls, const char *name) const; + jshort getStaticShortField(jclass cls, const char *name) const; + + jobject getObjectField(jobject obj, jfieldID id) const; + jboolean getBooleanField(jobject obj, jfieldID id) const; + jbyte getByteField(jobject obj, jfieldID id) const; + jchar getCharField(jobject obj, jfieldID id) const; + jdouble getDoubleField(jobject obj, jfieldID id) const; + jfloat getFloatField(jobject obj, jfieldID id) const; + jint getIntField(jobject obj, jfieldID id) const; + jlong getLongField(jobject obj, jfieldID id) const; + jshort getShortField(jobject obj, jfieldID id) const; + + void setObjectField(jobject obj, jfieldID id, jobject value) const; + void setBooleanField(jobject obj, jfieldID id, jboolean value) const; + void setByteField(jobject obj, jfieldID id, jbyte value) const; + void setCharField(jobject obj, jfieldID id, jchar value) const; + void setDoubleField(jobject obj, jfieldID id, jdouble value) const; + void setFloatField(jobject obj, jfieldID id, jfloat value) const; + void setIntField(jobject obj, jfieldID id, jint value) const; + void setLongField(jobject obj, jfieldID id, jlong value) const; + void setShortField(jobject obj, jfieldID id, jshort value) const; + + int id(jobject obj) const { + return obj + ? get_vm_env()->CallStaticIntMethod(_sys, + _mids[mid_sys_identityHashCode], + obj) + : 0; + } + + int hash(jobject obj) const { + return obj + ? get_vm_env()->CallIntMethod(obj, _mids[mid_obj_hashCode]) + : 0; + } + + void setClassPath(const char *classPath); + char *getClassPath(); + + jstring fromUTF(const char *bytes) const; + char *toUTF(jstring str) const; + char *toString(jobject obj) const; + char *getClassName(jobject obj) const; +#ifdef PYTHON + jclass getPythonExceptionClass() const; + jstring fromPyString(PyObject *object) const; + PyObject *fromJString(jstring js, int delete_local_ref) const; + void finalizeObject(JNIEnv *jenv, PyObject *obj); +#endif + + inline int isSame(jobject o1, jobject o2) const + { + return o1 == o2 || get_vm_env()->IsSameObject(o1, o2); + } +}; + +#ifdef PYTHON + +class PythonGIL { + private: + PyGILState_STATE state; + public: + PythonGIL() + { + state = PyGILState_Ensure(); + } + PythonGIL(JNIEnv *vm_env) + { + state = PyGILState_Ensure(); + env->set_vm_env(vm_env); + } + ~PythonGIL() + { + PyGILState_Release(state); + } +}; + +class PythonThreadState { + private: + PyThreadState *state; + int handler; + public: + PythonThreadState(int handler=0) + { + state = PyEval_SaveThread(); + this->handler = handler; + env->handlers += handler; + } + ~PythonThreadState() + { + PyEval_RestoreThread(state); + env->handlers -= handler; + } +}; + +#endif + +#endif /* _JCCEnv_H */ diff --git a/jcc/jcc/sources/JObject.cpp b/jcc/jcc/sources/JObject.cpp new file mode 100644 index 0000000..bc3415f --- /dev/null +++ b/jcc/jcc/sources/JObject.cpp @@ -0,0 +1,194 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "JCCEnv.h" + +#ifdef PYTHON + +#include +#include "structmember.h" + +#include "JObject.h" +#include "macros.h" + + +/* JObject */ + +static void t_JObject_dealloc(t_JObject *self); +static PyObject *t_JObject_new(PyTypeObject *type, + PyObject *args, PyObject *kwds); + +static PyObject *t_JObject_richcmp(t_JObject *, PyObject *o2, int op); +static PyObject *t_JObject_str(t_JObject *self); +static PyObject *t_JObject_repr(t_JObject *self); +static int t_JObject_hash(t_JObject *self); +static PyObject *t_JObject__getJObject(t_JObject *self, void *data); + +static PyMemberDef t_JObject_members[] = { + { NULL, 0, 0, 0, NULL } +}; + +static PyMethodDef t_JObject_methods[] = { + { NULL, NULL, 0, NULL } +}; + +static PyGetSetDef t_JObject_properties[] = { + { "_jobject", (getter) t_JObject__getJObject, NULL, NULL, NULL }, + { NULL, NULL, NULL, NULL, NULL } +}; + +PyTypeObject PY_TYPE(JObject) = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "jcc.JObject", /* tp_name */ + sizeof(t_JObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)t_JObject_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)t_JObject_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)t_JObject_hash, /* tp_hash */ + 0, /* tp_call */ + (reprfunc)t_JObject_str, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + (Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_BASETYPE), /* tp_flags */ + "t_JObject objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + (richcmpfunc)t_JObject_richcmp, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + t_JObject_methods, /* tp_methods */ + t_JObject_members, /* tp_members */ + t_JObject_properties, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + (newfunc)t_JObject_new, /* tp_new */ +}; + + +static void t_JObject_dealloc(t_JObject *self) +{ + self->object = JObject(NULL); + self->ob_type->tp_free((PyObject *) self); +} + +static PyObject *t_JObject_new(PyTypeObject *type, + PyObject *args, PyObject *kwds) +{ + t_JObject *self = (t_JObject *) type->tp_alloc(type, 0); + + self->object = JObject(NULL); + + return (PyObject *) self; +} + +static PyObject *t_JObject_richcmp(t_JObject *self, PyObject *arg, int op) +{ + int b = 0; + + switch (op) { + case Py_EQ: + case Py_NE: + if (PyObject_TypeCheck(arg, &PY_TYPE(JObject))) + b = self->object == ((t_JObject *) arg)->object; + if (op == Py_EQ) + Py_RETURN_BOOL(b); + Py_RETURN_BOOL(!b); + case Py_LT: + PyErr_SetString(PyExc_NotImplementedError, "<"); + return NULL; + case Py_LE: + PyErr_SetString(PyExc_NotImplementedError, "<="); + return NULL; + case Py_GT: + PyErr_SetString(PyExc_NotImplementedError, ">"); + return NULL; + case Py_GE: + PyErr_SetString(PyExc_NotImplementedError, ">="); + return NULL; + } + + return NULL; +} + +static PyObject *t_JObject_str(t_JObject *self) +{ + if (self->object.this$) + { + char *utf = env->toString(self->object.this$); + + if (utf == NULL) + utf = env->getClassName(self->object.this$); + + if (utf != NULL) + { + PyObject *unicode = + PyUnicode_DecodeUTF8(utf, strlen(utf), "strict"); + + delete utf; + return unicode; + } + } + + return PyString_FromString(""); +} + +static PyObject *t_JObject_repr(t_JObject *self) +{ + PyObject *name = PyObject_GetAttrString((PyObject *) self->ob_type, + "__name__"); + PyObject *str = self->ob_type->tp_str((PyObject *) self); +#if PY_VERSION_HEX < 0x02040000 + PyObject *args = Py_BuildValue("(OO)", name, str); +#else + PyObject *args = PyTuple_Pack(2, name, str); +#endif + PyObject *format = PyString_FromString("<%s: %s>"); + PyObject *repr = PyString_Format(format, args); + + Py_DECREF(name); + Py_DECREF(str); + Py_DECREF(args); + Py_DECREF(format); + + return repr; +} + +static int t_JObject_hash(t_JObject *self) +{ + return env->hash(self->object.this$); +} + +static PyObject *t_JObject__getJObject(t_JObject *self, void *data) +{ + return PyCObject_FromVoidPtr((void *) self->object.this$, NULL); +} + +#endif /* PYTHON */ diff --git a/jcc/jcc/sources/JObject.h b/jcc/jcc/sources/JObject.h new file mode 100644 index 0000000..2a0638d --- /dev/null +++ b/jcc/jcc/sources/JObject.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2007-2008 Open Source Applications Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _JObject_H +#define _JObject_H + +#include +#include "JCCEnv.h" + +class _DLL_EXPORT JObject { +public: + jobject this$; + int id; /* zero when this$ is a weak ref */ + + inline explicit JObject(jobject obj) + { + if (obj) + { + id = env->id(obj); + this$ = env->newGlobalRef(obj, id); + } + else + { + id = 0; + this$ = NULL; + } + } + + inline JObject(const JObject& obj) + { + id = obj.id ? obj.id : env->id(obj.this$); + this$ = env->newGlobalRef(obj.this$, id); + } + + virtual ~JObject() + { + this$ = env->deleteGlobalRef(this$, id); + } + + JObject& weaken$() + { + if (id) + { + jobject ref = env->newGlobalRef(this$, 0); + + env->deleteGlobalRef(this$, id); + id = 0; + this$ = ref; + } + + return *this; + } + + inline int operator!() const + { + return env->isSame(this$, NULL); + } + + inline int operator==(const JObject& obj) const + { + return env->isSame(this$, obj.this$); + } + + JObject& operator=(const JObject& obj) + { + jobject prev = this$; + int objid = obj.id ? obj.id : env->id(obj.this$); + + this$ = env->newGlobalRef(obj.this$, objid); + env->deleteGlobalRef(prev, id); + id = objid; + + return *this; + } +}; + + +#ifdef PYTHON + +#include +#include "macros.h" + +class t_JObject { +public: + PyObject_HEAD + JObject object; +}; + +extern PyTypeObject PY_TYPE(JObject); + +#endif /* PYTHON */ + + +#endif /* _JObject_H */ diff --git a/jcc/jcc/sources/functions.cpp b/jcc/jcc/sources/functions.cpp new file mode 100644 index 0000000..6b207eb --- /dev/null +++ b/jcc/jcc/sources/functions.cpp @@ -0,0 +1,1977 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "java/lang/String.h" +#include "java/lang/Throwable.h" +#include "java/lang/Boolean.h" +#include "java/lang/Byte.h" +#include "java/lang/Character.h" +#include "java/lang/Double.h" +#include "java/lang/Float.h" +#include "java/lang/Integer.h" +#include "java/lang/Long.h" +#include "java/lang/Short.h" +#include "java/util/Iterator.h" +#include "JArray.h" +#include "functions.h" +#include "macros.h" + +using namespace java::lang; +using namespace java::util; + +PyObject *PyExc_JavaError = PyExc_ValueError; +PyObject *PyExc_InvalidArgsError = PyExc_ValueError; + +PyObject *_set_exception_types(PyObject *self, PyObject *args) +{ + if (!PyArg_ParseTuple(args, "OO", + &PyExc_JavaError, &PyExc_InvalidArgsError)) + return NULL; + + Py_RETURN_NONE; +} + +PyObject *_set_function_self(PyObject *self, PyObject *args) +{ + PyObject *object, *module; + + if (!PyArg_ParseTuple(args, "OO", &object, &module)) + return NULL; + + if (!PyCFunction_Check(object)) + { + PyErr_SetObject(PyExc_TypeError, object); + return NULL; + } + + PyCFunctionObject *cfn = (PyCFunctionObject *) object; + + Py_INCREF(module); + Py_XDECREF(cfn->m_self); + cfn->m_self = module; + + Py_RETURN_NONE; +} + +PyObject *findClass(PyObject *self, PyObject *args) +{ + char *className; + + if (!PyArg_ParseTuple(args, "s", &className)) + return NULL; + + try { + jclass cls = env->findClass(className); + + if (cls) + return t_Class::wrap_Object(Class(cls)); + } catch (int e) { + switch (e) { + case _EXC_PYTHON: + return NULL; + case _EXC_JAVA: + return PyErr_SetJavaError(); + default: + throw; + } + } + + Py_RETURN_NONE; +} + +static boxfn get_boxfn(PyTypeObject *type) +{ + static PyObject *boxfn_ = PyString_FromString("boxfn_"); + PyObject *cobj = PyObject_GetAttr((PyObject *) type, boxfn_); + boxfn fn; + + if (cobj == NULL) + return NULL; + + fn = (boxfn) PyCObject_AsVoidPtr(cobj); + Py_DECREF(cobj); + + return fn; +} + +static int is_instance_of(PyObject *arg, PyTypeObject *type) +{ + static PyObject *class_ = PyString_FromString("class_"); + PyObject *clsObj = PyObject_GetAttr((PyObject *) type, class_); + int result; + + if (clsObj == NULL) + return -1; + + result = env->get_vm_env()-> + IsInstanceOf(((t_Object *) arg)->object.this$, + (jclass) ((t_Object *) clsObj)->object.this$); + Py_DECREF(clsObj); + + return result; +} + + +#if defined(_MSC_VER) || defined(__SUNPRO_CC) +int __parseArgs(PyObject *args, char *types, ...) +{ + int count = ((PyTupleObject *)(args))->ob_size; + va_list list, check; + + va_start(list, types); + va_start(check, types); + + return _parseArgs(((PyTupleObject *)(args))->ob_item, count, types, + list, check); +} + +int __parseArg(PyObject *arg, char *types, ...) +{ + va_list list, check; + + va_start(list, types); + va_start(check, types); + + return _parseArgs(&arg, 1, types, list, check); +} + +int _parseArgs(PyObject **args, unsigned int count, char *types, + va_list list, va_list check) +{ + unsigned int typeCount = strlen(types); + + if (count > typeCount) + return -1; +#else + +int _parseArgs(PyObject **args, unsigned int count, char *types, ...) +{ + unsigned int typeCount = strlen(types); + va_list list, check; + + if (count > typeCount) + return -1; + + va_start(list, types); + va_start(check, types); +#endif + + if (!env->vm) + { + PyErr_SetString(PyExc_RuntimeError, "initVM() must be called first"); + return -1; + } + + JNIEnv *vm_env = env->get_vm_env(); + + if (!vm_env) + { + PyErr_SetString(PyExc_RuntimeError, "attachCurrentThread() must be called first"); + return -1; + } + + unsigned int pos = 0; + int array = 0; + + for (unsigned int a = 0; a < count; a++, pos++) { + PyObject *arg = args[a]; + + switch (types[pos]) { + case '[': + { + if (++array > 1) + return -1; + + a -= 1; + break; + } + + case 'j': /* Java object, with class$ */ + case 'k': /* Java object, with initializeClass */ + case 'K': /* Java object, with initializeClass and params */ + { + jclass cls = NULL; + + switch (types[pos]) { + case 'j': + cls = (jclass) va_arg(list, Class *)->this$; + break; + case 'k': + case 'K': + try { + getclassfn initializeClass = va_arg(list, getclassfn); + cls = (*initializeClass)(); + } catch (int e) { + switch (e) { + case _EXC_PYTHON: + return -1; + case _EXC_JAVA: + PyErr_SetJavaError(); + return -1; + default: + throw; + } + } + break; + } + + if (arg == Py_None) + break; + + /* ensure that class Class is initialized (which may not be the + * case because of earlier recursion avoidance (JObject(cls)). + */ + if (!Class::class$) + Class::initializeClass(); + + if (array) + { + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayObject))) + break; + + if (PySequence_Check(arg) && + !PyString_Check(arg) && !PyUnicode_Check(arg)) + { + if (PySequence_Length(arg) > 0) + { + PyObject *obj = PySequence_GetItem(arg, 0); + int ok = 0; + + if (obj == Py_None) + ok = 1; + else if (PyObject_TypeCheck(obj, &PY_TYPE(Object)) && + vm_env->IsInstanceOf(((t_Object *) obj)->object.this$, cls)) + ok = 1; + else if (PyObject_TypeCheck(obj, &PY_TYPE(FinalizerProxy))) + { + PyObject *o = ((t_fp *) obj)->object; + + if (PyObject_TypeCheck(o, &PY_TYPE(Object)) && + vm_env->IsInstanceOf(((t_Object *) o)->object.this$, cls)) + ok = 1; + } + + Py_DECREF(obj); + if (ok) + break; + } + else + break; + } + } + else if (PyObject_TypeCheck(arg, &PY_TYPE(Object)) && + vm_env->IsInstanceOf(((t_Object *) arg)->object.this$, cls)) + break; + else if (PyObject_TypeCheck(arg, &PY_TYPE(FinalizerProxy))) + { + arg = ((t_fp *) arg)->object; + if (PyObject_TypeCheck(arg, &PY_TYPE(Object)) && + vm_env->IsInstanceOf(((t_Object *) arg)->object.this$, cls)) + break; + } + + return -1; + } + + case 'Z': /* boolean, strict */ + { + if (array) + { + if (arg == Py_None) + break; + + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayBool))) + break; + + if (PySequence_Check(arg)) + { + if (PySequence_Length(arg) > 0) + { + PyObject *obj = PySequence_GetItem(arg, 0); + int ok = obj == Py_True || obj == Py_False; + + Py_DECREF(obj); + if (ok) + break; + } + else + break; + } + } + else if (arg == Py_True || arg == Py_False) + break; + + return -1; + } + + case 'B': /* byte */ + { + if (array) + { + if (arg == Py_None) + break; + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayByte))) + break; + } + else if (PyString_Check(arg) && (PyString_Size(arg) == 1)) + break; + else if (PyInt_CheckExact(arg)) + break; + + return -1; + } + + case 'C': /* char */ + { + if (array) + { + if (arg == Py_None) + break; + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayChar))) + break; + } + else if (PyUnicode_Check(arg) && PyUnicode_GET_SIZE(arg) == 1) + break; + return -1; + } + + case 'I': /* int */ + { + if (array) + { + if (arg == Py_None) + break; + + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayInt))) + break; + + if (PySequence_Check(arg)) + { + if (PySequence_Length(arg) > 0) + { + PyObject *obj = PySequence_GetItem(arg, 0); + int ok = PyInt_CheckExact(obj); + + Py_DECREF(obj); + if (ok) + break; + } + else + break; + } + } + else if (PyInt_CheckExact(arg)) + break; + + return -1; + } + + case 'S': /* short */ + { + if (array) + { + if (arg == Py_None) + break; + + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayShort))) + break; + + if (PySequence_Check(arg)) + { + if (PySequence_Length(arg) > 0) + { + PyObject *obj = PySequence_GetItem(arg, 0); + int ok = PyInt_CheckExact(obj); + + Py_DECREF(obj); + if (ok) + break; + } + else + break; + } + } + else if (PyInt_CheckExact(arg)) + break; + + return -1; + } + + case 'D': /* double */ + { + if (array) + { + if (arg == Py_None) + break; + + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayDouble))) + break; + + if (PySequence_Check(arg)) + { + if (PySequence_Length(arg) > 0) + { + PyObject *obj = PySequence_GetItem(arg, 0); + int ok = PyFloat_CheckExact(obj); + + Py_DECREF(obj); + if (ok) + break; + } + else + break; + } + } + else if (PyFloat_CheckExact(arg)) + break; + + return -1; + } + + case 'F': /* float */ + { + if (array) + { + if (arg == Py_None) + break; + + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayFloat))) + break; + + if (PySequence_Check(arg)) + { + if (PySequence_Length(arg) > 0) + { + PyObject *obj = PySequence_GetItem(arg, 0); + int ok = PyFloat_CheckExact(obj); + + Py_DECREF(obj); + if (ok) + break; + } + else + break; + } + } + else if (PyFloat_CheckExact(arg)) + break; + + return -1; + } + + case 'J': /* long long */ + { + if (array) + { + if (arg == Py_None) + break; + + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayLong))) + break; + + if (PySequence_Check(arg)) + { + if (PySequence_Length(arg) > 0) + { + PyObject *obj = PySequence_GetItem(arg, 0); + int ok = PyLong_CheckExact(obj); + + Py_DECREF(obj); + if (ok) + break; + } + else + break; + } + } + else if (PyLong_CheckExact(arg)) + break; + + return -1; + } + + case 's': /* string */ + { + if (array) + { + if (arg == Py_None) + break; + + if (PyObject_TypeCheck(arg, PY_TYPE(JArrayString))) + break; + + if (PySequence_Check(arg) && + !PyString_Check(arg) && !PyUnicode_Check(arg)) + { + if (PySequence_Length(arg) > 0) + { + PyObject *obj = PySequence_GetItem(arg, 0); + int ok = + (obj == Py_None || + PyString_Check(obj) || PyUnicode_Check(obj)); + + Py_DECREF(obj); + if (ok) + break; + } + else + break; + } + } + else if (arg == Py_None || + PyString_Check(arg) || PyUnicode_Check(arg)) + break; + + return -1; + } + + case 'o': /* java.lang.Object */ + break; + + case 'O': /* java.lang.Object with type param */ + { + PyTypeObject *type = va_arg(list, PyTypeObject *); + + if (type != NULL) + { + boxfn fn = get_boxfn(type); + + if (fn == NULL || fn(type, arg, NULL) < 0) + return -1; + } + break; + } + + case 'T': /* tuple of python types with wrapfn_ */ + { + static PyObject *wrapfn_ = PyString_FromString("wrapfn_"); + int len = va_arg(list, int); + + if (PyTuple_Check(arg)) + { + if (PyTuple_GET_SIZE(arg) != len) + return -1; + + for (int i = 0; i < len; i++) { + PyObject *type = PyTuple_GET_ITEM(arg, i); + + if (!(type == Py_None || + (PyType_Check(type) && + PyObject_HasAttr(type, wrapfn_)))) + return -1; + } + break; + } + return -1; + } + + default: + return -1; + } + + if (types[pos] != '[') + array = 0; + } + + if (array) + return -1; + + pos = 0; + + for (unsigned int a = 0; a < count; a++, pos++) { + PyObject *arg = args[a]; + + switch (types[pos]) { + case '[': + { + if (++array > 1) + return -1; + + a -= 1; + break; + } + + case 'j': /* Java object except String and Object */ + case 'k': /* Java object, with initializeClass */ + case 'K': /* Java object, with initializeClass and params */ + { + jclass cls = NULL; + + switch (types[pos]) { + case 'j': + cls = (jclass) va_arg(check, Class *)->this$; + break; + case 'k': + case 'K': + getclassfn initializeClass = va_arg(check, getclassfn); + cls = (*initializeClass)(); + break; + } + + if (array) + { + JArray *array = va_arg(list, JArray *); + +#ifdef _java_generics + if (types[pos] == 'K') + { + PyTypeObject ***tp = va_arg(list, PyTypeObject ***); + + va_arg(list, getparametersfn); + *tp = NULL; + } +#endif + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayObject))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(cls, arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + Object *obj = va_arg(list, Object *); + + if (PyObject_TypeCheck(arg, &PY_TYPE(FinalizerProxy))) + arg = ((t_fp *) arg)->object; + +#ifdef _java_generics + if (types[pos] == 'K') + { + PyTypeObject ***tp = va_arg(list, PyTypeObject ***); + PyTypeObject **(*parameters_)(void *) = + va_arg(list, getparametersfn); + + if (arg == Py_None) + *tp = NULL; + else + *tp = (*parameters_)(arg); + } +#endif + + *obj = arg == Py_None + ? Object(NULL) + : ((t_Object *) arg)->object; + } + break; + } + + case 'Z': /* boolean, strict */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayBool))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + jboolean *b = va_arg(list, jboolean *); + *b = arg == Py_True; + } + break; + } + + case 'B': /* byte */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayByte))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else if (PyString_Check(arg)) + { + jbyte *a = va_arg(list, jbyte *); + *a = (jbyte) PyString_AS_STRING(arg)[0]; + } + else + { + jbyte *a = va_arg(list, jbyte *); + *a = (jbyte) PyInt_AsLong(arg); + } + break; + } + + case 'C': /* char */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayChar))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + jchar *c = va_arg(list, jchar *); + *c = (jchar) PyUnicode_AS_UNICODE(arg)[0]; + } + break; + } + + case 'I': /* int */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayInt))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + jint *n = va_arg(list, jint *); + *n = (jint) PyInt_AsLong(arg); + } + break; + } + + case 'S': /* short */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayShort))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + jshort *n = va_arg(list, jshort *); + *n = (jshort) PyInt_AsLong(arg); + } + break; + } + + case 'D': /* double */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayDouble))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + jdouble *d = va_arg(list, jdouble *); + *d = (jdouble) PyFloat_AsDouble(arg); + } + break; + } + + case 'F': /* float */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayFloat))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + jfloat *d = va_arg(list, jfloat *); + *d = (jfloat) (float) PyFloat_AsDouble(arg); + } + break; + } + + case 'J': /* long long */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayLong))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + jlong *l = va_arg(list, jlong *); + *l = (jlong) PyLong_AsLongLong(arg); + } + break; + } + + case 's': /* string */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else if (PyObject_TypeCheck(arg, PY_TYPE(JArrayString))) + *array = ((t_JArray *) arg)->array; + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + String *str = va_arg(list, String *); + + if (arg == Py_None) + *str = String(NULL); + else + { + *str = p2j(arg); + if (PyErr_Occurred()) + return -1; + } + } + break; + } + + case 'o': /* java.lang.Object */ + case 'O': /* java.lang.Object with type param */ + { + if (array) + { + JArray *array = va_arg(list, JArray *); + + if (arg == Py_None) + *array = JArray((jobject) NULL); + else + *array = JArray(arg); + + if (PyErr_Occurred()) + return -1; + } + else + { + Object *obj = va_arg(list, Object *); + + if (types[pos] == 'O') + { + PyTypeObject *type = va_arg(check, PyTypeObject *); + + if (type != NULL) + { + boxfn fn = get_boxfn(type); + + if (fn == NULL || fn(type, arg, obj) < 0) + return -1; + + break; + } + } + + if (boxObject(NULL, arg, obj) < 0) + return -1; + } + break; + } + + case 'T': /* tuple of python types with wrapfn_ */ + { + int len = va_arg(check, int); + PyTypeObject **types = va_arg(list, PyTypeObject **); + + for (int i = 0; i < len; i++) { + PyObject *type = PyTuple_GET_ITEM(arg, i); + + if (type == Py_None) + types[i] = NULL; + else + types[i] = (PyTypeObject *) type; + } + break; + } + + default: + return -1; + } + + if (types[pos] != '[') + array = 0; + } + + if (pos == typeCount) + return 0; + + return -1; +} + + +String p2j(PyObject *object) +{ + return String(env->fromPyString(object)); +} + +PyObject *j2p(const String& js) +{ + return env->fromJString((jstring) js.this$, 0); +} + +PyObject *PyErr_SetArgsError(char *name, PyObject *args) +{ + if (!PyErr_Occurred()) + { + PyObject *err = Py_BuildValue("(sO)", name, args); + + PyErr_SetObject(PyExc_InvalidArgsError, err); + Py_DECREF(err); + } + + return NULL; +} + +PyObject *PyErr_SetArgsError(PyObject *self, char *name, PyObject *args) +{ + if (!PyErr_Occurred()) + { + PyObject *type = (PyObject *) self->ob_type; + PyObject *err = Py_BuildValue("(OsO)", type, name, args); + + PyErr_SetObject(PyExc_InvalidArgsError, err); + Py_DECREF(err); + } + + return NULL; +} + +PyObject *PyErr_SetArgsError(PyTypeObject *type, char *name, PyObject *args) +{ + if (!PyErr_Occurred()) + { + PyObject *err = Py_BuildValue("(OsO)", type, name, args); + + PyErr_SetObject(PyExc_InvalidArgsError, err); + Py_DECREF(err); + } + + return NULL; +} + +PyObject *PyErr_SetJavaError() +{ + JNIEnv *vm_env = env->get_vm_env(); + jthrowable throwable = vm_env->ExceptionOccurred(); + PyObject *err; + + vm_env->ExceptionClear(); + err = t_Throwable::wrap_Object(Throwable(throwable)); + + PyErr_SetObject(PyExc_JavaError, err); + Py_DECREF(err); + + return NULL; +} + +void throwPythonError(void) +{ + PyObject *exc = PyErr_Occurred(); + + if (exc && PyErr_GivenExceptionMatches(exc, PyExc_JavaError)) + { + PyObject *value, *traceback; + + PyErr_Fetch(&exc, &value, &traceback); + if (value) + { + PyObject *je = PyObject_CallMethod(value, "getJavaException", ""); + + if (!je) + PyErr_Restore(exc, value, traceback); + else + { + Py_DECREF(exc); + Py_DECREF(value); + Py_XDECREF(traceback); + exc = je; + + if (exc && PyObject_TypeCheck(exc, &PY_TYPE(Throwable))) + { + jobject jobj = ((t_Throwable *) exc)->object.this$; + + env->get_vm_env()->Throw((jthrowable) jobj); + Py_DECREF(exc); + + return; + } + } + } + else + { + Py_DECREF(exc); + Py_XDECREF(traceback); + } + } + else if (exc && PyErr_GivenExceptionMatches(exc, PyExc_StopIteration)) + { + PyErr_Clear(); + return; + } + + if (exc) + { + PyObject *name = PyObject_GetAttrString(exc, "__name__"); + + env->get_vm_env()->ThrowNew(env->getPythonExceptionClass(), + PyString_AS_STRING(name)); + Py_DECREF(name); + } + else + env->get_vm_env()->ThrowNew(env->getPythonExceptionClass(), + "python error"); +} + +void throwTypeError(const char *name, PyObject *object) +{ + PyObject *tuple = Py_BuildValue("(ssO)", "while calling", name, object); + + PyErr_SetObject(PyExc_TypeError, tuple); + Py_DECREF(tuple); + + env->get_vm_env()->ThrowNew(env->getPythonExceptionClass(), "type error"); +} + +int abstract_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *err = + Py_BuildValue("(sO)", "instantiating java class", self->ob_type); + + PyErr_SetObject(PyExc_NotImplementedError, err); + Py_DECREF(err); + + return -1; +} + +PyObject *callSuper(PyTypeObject *type, const char *name, PyObject *args, + int cardinality) +{ + PyObject *super = (PyObject *) type->tp_base; + PyObject *method = + PyObject_GetAttrString(super, (char *) name); // python 2.4 cast + PyObject *value; + + if (!method) + return NULL; + + if (cardinality > 1) + value = PyObject_Call(method, args, NULL); + else + { +#if PY_VERSION_HEX < 0x02040000 + PyObject *tuple = Py_BuildValue("(O)", args); +#else + PyObject *tuple = PyTuple_Pack(1, args); +#endif + value = PyObject_Call(method, tuple, NULL); + Py_DECREF(tuple); + } + + Py_DECREF(method); + + return value; +} + +PyObject *callSuper(PyTypeObject *type, PyObject *self, + const char *name, PyObject *args, int cardinality) +{ +#if PY_VERSION_HEX < 0x02040000 + PyObject *tuple = Py_BuildValue("(OO)", type, self); +#else + PyObject *tuple = PyTuple_Pack(2, type, self); +#endif + PyObject *super = PyObject_Call((PyObject *) &PySuper_Type, tuple, NULL); + PyObject *method, *value; + + Py_DECREF(tuple); + if (!super) + return NULL; + + method = PyObject_GetAttrString(super, (char *) name); // python 2.4 cast + Py_DECREF(super); + if (!method) + return NULL; + + if (cardinality > 1) + value = PyObject_Call(method, args, NULL); + else + { +#if PY_VERSION_HEX < 0x02040000 + tuple = Py_BuildValue("(O)", args); +#else + tuple = PyTuple_Pack(1, args); +#endif + value = PyObject_Call(method, tuple, NULL); + Py_DECREF(tuple); + } + + Py_DECREF(method); + + return value; +} + +PyObject *castCheck(PyObject *obj, getclassfn initializeClass, + int reportError) +{ + if (PyObject_TypeCheck(obj, &PY_TYPE(FinalizerProxy))) + obj = ((t_fp *) obj)->object; + + if (!PyObject_TypeCheck(obj, &PY_TYPE(Object))) + { + if (reportError) + PyErr_SetObject(PyExc_TypeError, obj); + return NULL; + } + + jobject jobj = ((t_Object *) obj)->object.this$; + + if (jobj && !env->isInstanceOf(jobj, initializeClass)) + { + if (reportError) + PyErr_SetObject(PyExc_TypeError, obj); + + return NULL; + } + + return obj; +} + +PyObject *get_extension_iterator(PyObject *self) +{ + return PyObject_CallMethod(self, "iterator", ""); +} + +PyObject *get_extension_next(PyObject *self) +{ + return PyObject_CallMethod(self, "next", ""); +} + +PyObject *get_extension_nextElement(PyObject *self) +{ + return PyObject_CallMethod(self, "nextElement", ""); +} + +jobjectArray fromPySequence(jclass cls, PyObject *sequence) +{ + if (sequence == Py_None) + return NULL; + + if (!PySequence_Check(sequence)) + { + PyErr_SetObject(PyExc_TypeError, sequence); + return NULL; + } + + int length = PySequence_Length(sequence); + jobjectArray array; + + try { + array = env->newObjectArray(cls, length); + } catch (int e) { + switch (e) { + case _EXC_PYTHON: + return NULL; + case _EXC_JAVA: + PyErr_SetJavaError(); + return NULL; + default: + throw; + } + } + + JNIEnv *vm_env = env->get_vm_env(); + + for (int i = 0; i < length; i++) { + PyObject *obj = PySequence_GetItem(sequence, i); + int fromString = 0; + jobject jobj; + + if (!obj) + break; + else if (obj == Py_None) + jobj = NULL; + else if (PyString_Check(obj) || PyUnicode_Check(obj)) + { + jobj = env->fromPyString(obj); + fromString = 1; + } + else if (PyObject_TypeCheck(obj, &PY_TYPE(JObject))) + jobj = ((t_JObject *) obj)->object.this$; + else if (PyObject_TypeCheck(obj, &PY_TYPE(FinalizerProxy))) + jobj = ((t_JObject *) ((t_fp *) obj)->object)->object.this$; + else /* todo: add auto-boxing of primitive types */ + { + PyErr_SetObject(PyExc_TypeError, obj); + Py_DECREF(obj); + return NULL; + } + + Py_DECREF(obj); + + try { + env->setObjectArrayElement(array, i, jobj); + if (fromString) + vm_env->DeleteLocalRef(jobj); + } catch (int e) { + switch (e) { + case _EXC_JAVA: + PyErr_SetJavaError(); + return NULL; + default: + throw; + } + } + } + + return array; +} + +void installType(PyTypeObject *type, PyObject *module, char *name, + int isExtension) +{ + if (PyType_Ready(type) == 0) + { + Py_INCREF(type); + if (isExtension) + { + type->ob_type = &PY_TYPE(FinalizerClass); + Py_INCREF(&PY_TYPE(FinalizerClass)); + } + PyModule_AddObject(module, name, (PyObject *) type); + } +} + +PyObject *wrapType(PyTypeObject *type, const jobject& obj) +{ + static PyObject *wrapfn_ = PyString_FromString("wrapfn_"); + PyObject *cobj = PyObject_GetAttr((PyObject *) type, wrapfn_); + PyObject *(*wrapfn)(const jobject&); + + if (cobj == NULL) + return NULL; + + wrapfn = (PyObject *(*)(const jobject &)) PyCObject_AsVoidPtr(cobj); + Py_DECREF(cobj); + + return wrapfn(obj); +} + +PyObject *unboxBoolean(const jobject& obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::Boolean::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(Boolean)); + return NULL; + } + + if (env->booleanValue(obj)) + Py_RETURN_TRUE; + + Py_RETURN_FALSE; + } + + Py_RETURN_NONE; +} + +PyObject *unboxByte(const jobject &obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::Byte::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(Byte)); + return NULL; + } + + return PyInt_FromLong((long) env->byteValue(obj)); + } + + Py_RETURN_NONE; +} + +PyObject *unboxCharacter(const jobject &obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::Character::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(Character)); + return NULL; + } + + jchar c = env->charValue(obj); + return PyUnicode_FromUnicode((Py_UNICODE *) &c, 1); + } + + Py_RETURN_NONE; +} + +PyObject *unboxDouble(const jobject &obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::Double::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(Double)); + return NULL; + } + + return PyFloat_FromDouble((double) env->doubleValue(obj)); + } + + Py_RETURN_NONE; +} + +PyObject *unboxFloat(const jobject &obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::Float::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(Float)); + return NULL; + } + + return PyFloat_FromDouble((double) env->floatValue(obj)); + } + + Py_RETURN_NONE; +} + +PyObject *unboxInteger(const jobject &obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::Integer::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(Integer)); + return NULL; + } + + return PyInt_FromLong((long) env->intValue(obj)); + } + + Py_RETURN_NONE; +} + +PyObject *unboxLong(const jobject &obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::Long::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(Long)); + return NULL; + } + + return PyLong_FromLongLong((PY_LONG_LONG) env->longValue(obj)); + } + + Py_RETURN_NONE; +} + +PyObject *unboxShort(const jobject &obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::Short::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(Short)); + return NULL; + } + + return PyInt_FromLong((long) env->shortValue(obj)); + } + + Py_RETURN_NONE; +} + +PyObject *unboxString(const jobject &obj) +{ + if (obj != NULL) + { + if (!env->isInstanceOf(obj, java::lang::String::initializeClass)) + { + PyErr_SetObject(PyExc_TypeError, + (PyObject *) &java::lang::PY_TYPE(String)); + return NULL; + } + + return env->fromJString((jstring) obj, 0); + } + + Py_RETURN_NONE; +} + +static int boxJObject(PyTypeObject *type, PyObject *arg, + java::lang::Object *obj) +{ + if (arg == Py_None) + { + if (obj != NULL) + *obj = Object(NULL); + } + else if (PyObject_TypeCheck(arg, &PY_TYPE(Object))) + { + if (type != NULL && !is_instance_of(arg, type)) + return -1; + + if (obj != NULL) + *obj = ((t_Object *) arg)->object; + } + else if (PyObject_TypeCheck(arg, &PY_TYPE(FinalizerProxy))) + { + arg = ((t_fp *) arg)->object; + if (PyObject_TypeCheck(arg, &PY_TYPE(Object))) + { + if (type != NULL && !is_instance_of(arg, type)) + return -1; + + if (obj != NULL) + *obj = ((t_Object *) arg)->object; + } + else + return -1; + } + else + return 1; + + return 0; +} + +int boxBoolean(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (arg == Py_True) + { + if (obj != NULL) + *obj = *Boolean::TRUE; + } + else if (arg == Py_False) + { + if (obj != NULL) + *obj = *Boolean::FALSE; + } + else + return -1; + + return 0; +} + +int boxByte(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyInt_Check(arg)) + { + int n = PyInt_AS_LONG(arg); + jbyte b = (jbyte) n; + + if (b == n) + { + if (obj != NULL) + *obj = Byte(b); + } + else + return -1; + } + else if (PyLong_Check(arg)) + { + PY_LONG_LONG ln = PyLong_AsLongLong(arg); + jbyte b = (jbyte) ln; + + if (b == ln) + { + if (obj != NULL) + *obj = Byte(b); + } + else + return -1; + } + else if (PyFloat_Check(arg)) + { + double d = PyFloat_AS_DOUBLE(arg); + jbyte b = (jbyte) d; + + if (b == d) + { + if (obj != NULL) + *obj = Byte(b); + } + else + return -1; + } + else + return -1; + + return 0; +} + +int boxCharacter(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyString_Check(arg)) + { + char *c; + Py_ssize_t len; + + if (PyString_AsStringAndSize(arg, &c, &len) < 0 || len != 1) + return -1; + + if (obj != NULL) + *obj = Character((jchar) c[0]); + } + else if (PyUnicode_Check(arg)) + { + Py_ssize_t len = PyUnicode_GetSize(arg); + + if (len != 1) + return -1; + + if (obj != NULL) + *obj = Character((jchar) PyUnicode_AsUnicode(arg)[0]); + } + else + return -1; + + return 0; +} + +int boxCharSequence(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyString_Check(arg) || PyUnicode_Check(arg)) + { + if (obj != NULL) + { + *obj = p2j(arg); + if (PyErr_Occurred()) + return -1; + } + } + else + return -1; + + return 0; +} + +int boxDouble(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyInt_Check(arg)) + { + if (obj != NULL) + *obj = Double((jdouble) PyInt_AS_LONG(arg)); + } + else if (PyLong_Check(arg)) + { + if (obj != NULL) + *obj = Double((jdouble) PyLong_AsLongLong(arg)); + } + else if (PyFloat_Check(arg)) + { + if (obj != NULL) + *obj = Double(PyFloat_AS_DOUBLE(arg)); + } + else + return -1; + + return 0; +} + +int boxFloat(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyInt_Check(arg)) + { + if (obj != NULL) + *obj = Float((jfloat) PyInt_AS_LONG(arg)); + } + else if (PyLong_Check(arg)) + { + PY_LONG_LONG ln = PyLong_AsLongLong(arg); + float f = (float) ln; + + if ((PY_LONG_LONG) f == ln) + { + if (obj != NULL) + *obj = Float(f); + } + else + return -1; + } + else if (PyFloat_Check(arg)) + { + double d = PyFloat_AS_DOUBLE(arg); + float f = (float) d; + + if ((double) f == d) + { + if (obj != NULL) + *obj = Float(f); + } + else + return -1; + } + else + return -1; + + return 0; +} + +int boxInteger(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyInt_Check(arg)) + { + if (obj != NULL) + *obj = Integer((jint) PyInt_AS_LONG(arg)); + } + else if (PyLong_Check(arg)) + { + PY_LONG_LONG ln = PyLong_AsLongLong(arg); + int n = (int) ln; + + if (n == ln) + { + if (obj != NULL) + *obj = Integer(n); + } + else + return -1; + } + else if (PyFloat_Check(arg)) + { + double d = PyFloat_AS_DOUBLE(arg); + int n = (int) d; + + if ((double) n == d) + { + if (obj != NULL) + *obj = Integer(n); + } + else + return -1; + } + else + return -1; + + return 0; +} + +int boxLong(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyInt_Check(arg)) + { + if (obj != NULL) + *obj = Long((jlong) PyInt_AS_LONG(arg)); + } + else if (PyLong_Check(arg)) + { + if (obj != NULL) + *obj = Long((jlong) PyLong_AsLongLong(arg)); + } + else if (PyFloat_Check(arg)) + { + double d = PyFloat_AS_DOUBLE(arg); + PY_LONG_LONG n = (PY_LONG_LONG) d; + + if ((double) n == d) + { + if (obj != NULL) + *obj = Long((jlong) n); + } + else + return -1; + } + else + return -1; + + return 0; +} + +int boxNumber(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyInt_Check(arg)) + { + if (obj != NULL) + *obj = Integer((jint) PyInt_AS_LONG(arg)); + } + else if (PyLong_Check(arg)) + { + if (obj != NULL) + *obj = Long((jlong) PyLong_AsLongLong(arg)); + } + else if (PyFloat_Check(arg)) + { + if (obj != NULL) + *obj = Double((jdouble) PyFloat_AS_DOUBLE(arg)); + } + else + return -1; + + return 0; +} + +int boxShort(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyInt_Check(arg)) + { + int n = (int) PyInt_AS_LONG(arg); + short sn = (short) n; + + if (sn == n) + { + if (obj != NULL) + *obj = Short((jshort) sn); + } + else + return -1; + } + else if (PyLong_Check(arg)) + { + PY_LONG_LONG ln = PyLong_AsLongLong(arg); + short sn = (short) ln; + + if (sn == ln) + { + if (obj != NULL) + *obj = Short((jshort) sn); + } + else + return -1; + } + else if (PyFloat_Check(arg)) + { + double d = PyFloat_AS_DOUBLE(arg); + short sn = (short) (int) d; + + if ((double) sn == d) + { + if (obj != NULL) + *obj = Short((jshort) sn); + } + else + return -1; + } + else + return -1; + + return 0; +} + +int boxString(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (PyString_Check(arg) || PyUnicode_Check(arg)) + { + if (obj != NULL) + { + *obj = p2j(arg); + if (PyErr_Occurred()) + return -1; + } + } + else + return -1; + + return 0; +} + +int boxObject(PyTypeObject *type, PyObject *arg, java::lang::Object *obj) +{ + int result = boxJObject(type, arg, obj); + + if (result <= 0) + return result; + + if (obj != NULL) + { + if (PyString_Check(arg) || PyUnicode_Check(arg)) + { + *obj = p2j(arg); + if (PyErr_Occurred()) + return -1; + } + else if (arg == Py_True) + *obj = *Boolean::TRUE; + else if (arg == Py_False) + *obj = *Boolean::FALSE; + else if (PyInt_Check(arg)) + { + long ln = PyInt_AS_LONG(arg); + int n = (int) ln; + + if (ln != (long) n) + *obj = Long((jlong) ln); + else + *obj = Integer((jint) n); + } + else if (PyLong_Check(arg)) + *obj = Long((jlong) PyLong_AsLongLong(arg)); + else if (PyFloat_Check(arg)) + *obj = Double((jdouble) PyFloat_AS_DOUBLE(arg)); + else + return -1; + } + else if (!(PyString_Check(arg) || PyUnicode_Check(arg) || + arg == Py_True || arg == Py_False || + PyInt_Check(arg) || PyLong_Check(arg) || + PyFloat_Check(arg))) + return -1; + + return 0; +} + + +#ifdef _java_generics +PyObject *typeParameters(PyTypeObject *types[], size_t size) +{ + size_t count = size / sizeof(PyTypeObject *); + PyObject *tuple = PyTuple_New(count); + + for (size_t i = 0; i < count; i++) { + PyObject *type = (PyObject *) types[i]; + + if (type == NULL) + type = Py_None; + + PyTuple_SET_ITEM(tuple, i, type); + Py_INCREF(type); + } + + return tuple; +} +#endif diff --git a/jcc/jcc/sources/functions.h b/jcc/jcc/sources/functions.h new file mode 100644 index 0000000..243efa3 --- /dev/null +++ b/jcc/jcc/sources/functions.h @@ -0,0 +1,296 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _functions_h +#define _functions_h + +#include "java/util/Iterator.h" +#include "java/util/Enumeration.h" +#include "java/lang/String.h" +#include "java/lang/Object.h" +#include "macros.h" + +#if PY_VERSION_HEX < 0x02050000 +typedef int Py_ssize_t; +typedef inquiry lenfunc; +typedef intargfunc ssizeargfunc; +typedef intintargfunc ssizessizeargfunc; +typedef intobjargproc ssizeobjargproc; +typedef intintobjargproc ssizessizeobjargproc; +#endif + +typedef PyTypeObject **(*getparametersfn)(void *); +typedef int (*boxfn)(PyTypeObject *, PyObject *, java::lang::Object *); + +PyObject *PyErr_SetArgsError(char *name, PyObject *args); +PyObject *PyErr_SetArgsError(PyObject *self, char *name, PyObject *args); +PyObject *PyErr_SetArgsError(PyTypeObject *type, char *name, PyObject *args); +PyObject *PyErr_SetJavaError(); + +extern PyObject *PyExc_JavaError; +extern PyObject *PyExc_InvalidArgsError; + + +void throwPythonError(void); +void throwTypeError(const char *name, PyObject *object); + +#if defined(_MSC_VER) || defined(__SUNPRO_CC) + +#define parseArgs __parseArgs +#define parseArg __parseArg + +int __parseArgs(PyObject *args, char *types, ...); +int __parseArg(PyObject *arg, char *types, ...); + +int _parseArgs(PyObject **args, unsigned int count, char *types, + va_list list, va_list check); + +#else + +#define parseArgs(args, types, rest...) \ + _parseArgs(((PyTupleObject *)(args))->ob_item, \ + ((PyTupleObject *)(args))->ob_size, types, ##rest) + +#define parseArg(arg, types, rest...) \ + _parseArgs(&(arg), 1, types, ##rest) + +int _parseArgs(PyObject **args, unsigned int count, char *types, ...); + +#endif + +int abstract_init(PyObject *self, PyObject *args, PyObject *kwds); +PyObject *wrapType(PyTypeObject *type, const jobject& obj); + +PyObject *unboxBoolean(const jobject& obj); +PyObject *unboxByte(const jobject& obj); +PyObject *unboxCharacter(const jobject& obj); +PyObject *unboxDouble(const jobject& obj); +PyObject *unboxFloat(const jobject& obj); +PyObject *unboxInteger(const jobject& obj); +PyObject *unboxLong(const jobject& obj); +PyObject *unboxShort(const jobject& obj); +PyObject *unboxString(const jobject& obj); + +int boxBoolean(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxByte(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxCharacter(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxCharSequence(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxDouble(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxFloat(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxInteger(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxLong(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxNumber(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxShort(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxString(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); +int boxObject(PyTypeObject *type, PyObject *arg, java::lang::Object *obj); + +PyObject *j2p(const java::lang::String& js); +java::lang::String p2j(PyObject *object); + +PyObject *make_descriptor(PyTypeObject *value); +PyObject *make_descriptor(getclassfn initializeClass); +PyObject *make_descriptor(getclassfn initializeClass, int generics); +PyObject *make_descriptor(PyObject *value); +PyObject *make_descriptor(PyObject *(*wrapfn)(const jobject &)); +PyObject *make_descriptor(boxfn fn); +PyObject *make_descriptor(jboolean value); +PyObject *make_descriptor(jbyte value); +PyObject *make_descriptor(jchar value); +PyObject *make_descriptor(jdouble value); +PyObject *make_descriptor(jfloat value); +PyObject *make_descriptor(jint value); +PyObject *make_descriptor(jlong value); +PyObject *make_descriptor(jshort value); + +jobjectArray make_array(jclass cls, PyObject *sequence); + +PyObject *callSuper(PyTypeObject *type, + const char *name, PyObject *args, int cardinality); +PyObject *callSuper(PyTypeObject *type, PyObject *self, + const char *name, PyObject *args, int cardinality); + +template PyObject *get_iterator(T *self) +{ + jobject iterator; + + OBJ_CALL(iterator = env->iterator(self->object.this$)); + return java::util::t_Iterator::wrap_jobject(iterator); +} + +#ifdef _java_generics +template PyObject *get_generic_iterator(T *self) +{ + PyTypeObject *param = self->parameters ? self->parameters[0] : NULL; + jobject iterator; + + OBJ_CALL(iterator = env->iterator(self->object.this$)); + return java::util::t_Iterator::wrap_jobject(iterator, param); +} +#endif + +template PyObject *get_iterator_next(T *self) +{ + jboolean hasNext; + OBJ_CALL(hasNext = self->object.hasNext()); + if (!hasNext) + { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + jobject next; + OBJ_CALL(next = env->iteratorNext(self->object.this$)); + + jclass cls = java::lang::String::initializeClass(); + if (env->get_vm_env()->IsInstanceOf(next, cls)) + return env->fromJString((jstring) next, 1); + + return U::wrap_jobject(next); +} + +#ifdef _java_generics +template PyObject *get_generic_iterator_next(T *self) +{ + jboolean hasNext; + OBJ_CALL(hasNext = self->object.hasNext()); + if (!hasNext) + { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + jobject next; + OBJ_CALL(next = env->iteratorNext(self->object.this$)); + + jclass cls = java::lang::String::initializeClass(); + if (env->get_vm_env()->IsInstanceOf(next, cls)) + return env->fromJString((jstring) next, 1); + + PyTypeObject *param = self->parameters ? self->parameters[0] : NULL; + if (param != NULL) + return wrapType(param, next); + + return U::wrap_jobject(next); +} +#endif + +template PyObject *get_enumeration_next(T *self) +{ + jboolean hasMoreElements; + OBJ_CALL(hasMoreElements = self->object.hasMoreElements()); + if (!hasMoreElements) + { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + jobject next; + OBJ_CALL(next = env->enumerationNext(self->object.this$)); + + jclass cls = java::lang::String::initializeClass(); + if (env->get_vm_env()->IsInstanceOf(next, cls)) + return env->fromJString((jstring) next, 1); + + return U::wrap_jobject(next); +} + +#ifdef _java_generics +template PyObject *get_generic_enumeration_next(T *self) +{ + jboolean hasMoreElements; + OBJ_CALL(hasMoreElements = self->object.hasMoreElements()); + if (!hasMoreElements) + { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + jobject next; + OBJ_CALL(next = env->enumerationNext(self->object.this$)); + + jclass cls = java::lang::String::initializeClass(); + if (env->get_vm_env()->IsInstanceOf(next, cls)) + return env->fromJString((jstring) next, 1); + + PyTypeObject *param = self->parameters ? self->parameters[0] : NULL; + if (param != NULL) + return wrapType(param, next); + + return U::wrap_jobject(next); +} +#endif + +template PyObject *get_next(T *self) +{ + V next((jobject) NULL); + OBJ_CALL(next = self->object.next()); + if (!next) + { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + jclass cls = java::lang::String::initializeClass(); + if (env->get_vm_env()->IsInstanceOf(next.this$, cls)) + return env->fromJString((jstring) next.this$, 0); + + return U::wrap_Object(next); +} + +#ifdef _java_generics +template PyObject *get_generic_next(T *self) +{ + V next((jobject) NULL); + OBJ_CALL(next = self->object.next()); + if (!next) + { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + jclass cls = java::lang::String::initializeClass(); + if (env->get_vm_env()->IsInstanceOf(next.this$, cls)) + return env->fromJString((jstring) next.this$, 0); + + PyTypeObject *param = self->parameters ? self->parameters[0] : NULL; + if (param != NULL) + return wrapType(param, next.this$); + + return U::wrap_Object(next); +} +#endif + +PyObject *get_extension_iterator(PyObject *self); +PyObject *get_extension_next(PyObject *self); +PyObject *get_extension_nextElement(PyObject *self); + +jobjectArray fromPySequence(jclass cls, PyObject *sequence); +PyObject *castCheck(PyObject *obj, getclassfn initializeClass, + int reportError); +void installType(PyTypeObject *type, PyObject *module, char *name, + int isExtension); + +#ifdef _java_generics +PyObject *typeParameters(PyTypeObject *types[], size_t size); +#endif + +extern PyTypeObject PY_TYPE(FinalizerClass); +extern PyTypeObject PY_TYPE(FinalizerProxy); + +typedef struct { + PyObject_HEAD + PyObject *object; +} t_fp; + +#endif /* _functions_h */ diff --git a/jcc/jcc/sources/jcc.cpp b/jcc/jcc/sources/jcc.cpp new file mode 100644 index 0000000..a32345a --- /dev/null +++ b/jcc/jcc/sources/jcc.cpp @@ -0,0 +1,761 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#ifdef linux +#include +#endif + +#include +#include "structmember.h" + +#include "JObject.h" +#include "JCCEnv.h" +#include "macros.h" + +_DLL_EXPORT JCCEnv *env; + + +/* JCCEnv */ + +class t_jccenv { +public: + PyObject_HEAD + JCCEnv *env; +}; + +static void t_jccenv_dealloc(t_jccenv *self); +static PyObject *t_jccenv_attachCurrentThread(PyObject *self, PyObject *args); +static PyObject *t_jccenv_detachCurrentThread(PyObject *self); +static PyObject *t_jccenv_isCurrentThreadAttached(PyObject *self); +static PyObject *t_jccenv_strhash(PyObject *self, PyObject *arg); +static PyObject *t_jccenv__dumpRefs(PyObject *self, + PyObject *args, PyObject *kwds); +static PyObject *t_jccenv__addClassPath(PyObject *self, PyObject *args); + +static PyObject *t_jccenv__get_jni_version(PyObject *self, void *data); +static PyObject *t_jccenv__get_java_version(PyObject *self, void *data); +static PyObject *t_jccenv__get_classpath(PyObject *self, void *data); + +static PyGetSetDef t_jccenv_properties[] = { + { "jni_version", (getter) t_jccenv__get_jni_version, NULL, NULL, NULL }, + { "java_version", (getter) t_jccenv__get_java_version, NULL, NULL, NULL }, + { "classpath", (getter) t_jccenv__get_classpath, NULL, NULL, NULL }, + { NULL, NULL, NULL, NULL, NULL } +}; + +static PyMemberDef t_jccenv_members[] = { + { NULL, 0, 0, 0, NULL } +}; + +static PyMethodDef t_jccenv_methods[] = { + { "attachCurrentThread", (PyCFunction) t_jccenv_attachCurrentThread, + METH_VARARGS, NULL }, + { "detachCurrentThread", (PyCFunction) t_jccenv_detachCurrentThread, + METH_NOARGS, NULL }, + { "isCurrentThreadAttached", (PyCFunction) t_jccenv_isCurrentThreadAttached, + METH_NOARGS, NULL }, + { "strhash", (PyCFunction) t_jccenv_strhash, + METH_O, NULL }, + { "_dumpRefs", (PyCFunction) t_jccenv__dumpRefs, + METH_VARARGS | METH_KEYWORDS, NULL }, + { "_addClassPath", (PyCFunction) t_jccenv__addClassPath, + METH_VARARGS, NULL }, + { NULL, NULL, 0, NULL } +}; + +PyTypeObject PY_TYPE(JCCEnv) = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "jcc.JCCEnv", /* tp_name */ + sizeof(t_jccenv), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)t_jccenv_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "JCCEnv", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + t_jccenv_methods, /* tp_methods */ + t_jccenv_members, /* tp_members */ + t_jccenv_properties, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + +static void t_jccenv_dealloc(t_jccenv *self) +{ + self->ob_type->tp_free((PyObject *) self); +} + +static void add_option(char *name, char *value, JavaVMOption *option) +{ + char *buf = new char[strlen(name) + strlen(value) + 1]; + + sprintf(buf, "%s%s", name, value); + option->optionString = buf; +} + +#ifdef _jcc_lib +static void add_paths(char *name, char *p0, char *p1, JavaVMOption *option) +{ +#if defined(_MSC_VER) || defined(__WIN32) + char pathsep = ';'; +#else + char pathsep = ':'; +#endif + char *buf = new char[strlen(name) + strlen(p0) + strlen(p1) + 4]; + + sprintf(buf, "%s%s%c%s", name, p0, pathsep, p1); + option->optionString = buf; +} +#endif + + +static PyObject *t_jccenv_attachCurrentThread(PyObject *self, PyObject *args) +{ + char *name = NULL; + int asDaemon = 0, result; + + if (!PyArg_ParseTuple(args, "|si", &name, &asDaemon)) + return NULL; + + result = env->attachCurrentThread(name, asDaemon); + + return PyInt_FromLong(result); +} + +static PyObject *t_jccenv_detachCurrentThread(PyObject *self) +{ + int result = env->vm->DetachCurrentThread(); + + env->set_vm_env(NULL); + + return PyInt_FromLong(result); +} + +static PyObject *t_jccenv_isCurrentThreadAttached(PyObject *self) +{ + if (env->get_vm_env() != NULL) + Py_RETURN_TRUE; + + Py_RETURN_FALSE; +} + +static PyObject *t_jccenv_strhash(PyObject *self, PyObject *arg) +{ + int hash = PyObject_Hash(arg); + char buffer[10]; + + sprintf(buffer, "%08x", (unsigned int) hash); + return PyString_FromStringAndSize(buffer, 8); +} + +static PyObject *t_jccenv__dumpRefs(PyObject *self, + PyObject *args, PyObject *kwds) +{ + static char *kwnames[] = { + "classes", "values", NULL + }; + int classes = 0, values = 0; + PyObject *result; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|ii", kwnames, + &classes, &values)) + return NULL; + + if (classes) + result = PyDict_New(); + else + result = PyList_New(env->refs.size()); + + int count = 0; + + for (std::multimap::iterator iter = env->refs.begin(); + iter != env->refs.end(); + iter++) { + if (classes) // return dict of { class name: instance count } + { + char *name = env->getClassName(iter->second.global); + PyObject *key = PyString_FromString(name); + PyObject *value = PyDict_GetItem(result, key); + + if (value == NULL) + value = PyInt_FromLong(1); + else + value = PyInt_FromLong(PyInt_AS_LONG(value) + 1); + + PyDict_SetItem(result, key, value); + Py_DECREF(key); + Py_DECREF(value); + + delete name; + } + else if (values) // return list of (value string, ref count) + { + char *str = env->toString(iter->second.global); + PyObject *key = PyString_FromString(str); + PyObject *value = PyInt_FromLong(iter->second.count); + +#if PY_VERSION_HEX < 0x02040000 + PyList_SET_ITEM(result, count++, Py_BuildValue("(OO)", key, value)); +#else + PyList_SET_ITEM(result, count++, PyTuple_Pack(2, key, value)); +#endif + Py_DECREF(key); + Py_DECREF(value); + + delete str; + } + else // return list of (id hash code, ref count) + { + PyObject *key = PyInt_FromLong(iter->first); + PyObject *value = PyInt_FromLong(iter->second.count); + +#if PY_VERSION_HEX < 0x02040000 + PyList_SET_ITEM(result, count++, Py_BuildValue("(OO)", key, value)); +#else + PyList_SET_ITEM(result, count++, PyTuple_Pack(2, key, value)); +#endif + Py_DECREF(key); + Py_DECREF(value); + } + } + + return result; +} + +static PyObject *t_jccenv__addClassPath(PyObject *self, PyObject *args) +{ + const char *classpath; + + if (!PyArg_ParseTuple(args, "s", &classpath)) + return NULL; + + env->setClassPath(classpath); + + Py_RETURN_NONE; +} + +static PyObject *t_jccenv__get_jni_version(PyObject *self, void *data) +{ + return PyInt_FromLong(env->getJNIVersion()); +} + +static PyObject *t_jccenv__get_java_version(PyObject *self, void *data) +{ + return env->fromJString(env->getJavaVersion(), 1); +} + +static PyObject *t_jccenv__get_classpath(PyObject *self, void *data) +{ + char *classpath = env->getClassPath(); + + if (classpath) + { + PyObject *result = PyString_FromString(classpath); + + free(classpath); + return result; + } + + Py_RETURN_NONE; +} + +_DLL_EXPORT PyObject *getVMEnv(PyObject *self) +{ + if (env->vm != NULL) + { + t_jccenv *jccenv = (t_jccenv *) PY_TYPE(JCCEnv).tp_alloc(&PY_TYPE(JCCEnv), 0); + jccenv->env = env; + + return (PyObject *) jccenv; + } + + Py_RETURN_NONE; +} + +#ifdef _jcc_lib +static void registerNatives(JNIEnv *vm_env); +#endif + +_DLL_EXPORT PyObject *initJCC(PyObject *module) +{ + static int _once_only = 1; +#if defined(_MSC_VER) || defined(__WIN32) +#define verstring(n) #n + PyObject *ver = PyString_FromString(verstring(JCC_VER)); +#else + PyObject *ver = PyString_FromString(JCC_VER); +#endif + PyObject_SetAttrString(module, "JCC_VERSION", ver); Py_DECREF(ver); + + if (_once_only) + { + PyEval_InitThreads(); + INSTALL_TYPE(JCCEnv, module); + + if (env == NULL) + env = new JCCEnv(NULL, NULL); + + _once_only = 0; + Py_RETURN_TRUE; + } + + Py_RETURN_FALSE; +} + +_DLL_EXPORT PyObject *initVM(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwnames[] = { + "classpath", "initialheap", "maxheap", "maxstack", + "vmargs", NULL + }; + char *classpath = NULL; + char *initialheap = NULL, *maxheap = NULL, *maxstack = NULL; + char *vmargs = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|zzzzz", kwnames, + &classpath, + &initialheap, &maxheap, &maxstack, + &vmargs)) + return NULL; + + if (env->vm) + { + PyObject *module_cp = NULL; + + if (initialheap || maxheap || maxstack || vmargs) + { + PyErr_SetString(PyExc_ValueError, + "JVM is already running, options are ineffective"); + return NULL; + } + + if (classpath == NULL && self != NULL) + { + module_cp = PyObject_GetAttrString(self, "CLASSPATH"); + if (module_cp != NULL) + classpath = PyString_AsString(module_cp); + } + + if (classpath && classpath[0]) + env->setClassPath(classpath); + + Py_XDECREF(module_cp); + + return getVMEnv(self); + } + else + { + JavaVMInitArgs vm_args; + JavaVMOption vm_options[32]; + JNIEnv *vm_env; + JavaVM *vm; + unsigned int nOptions = 0; + PyObject *module_cp = NULL; + + vm_args.version = JNI_VERSION_1_4; + JNI_GetDefaultJavaVMInitArgs(&vm_args); + + if (classpath == NULL && self != NULL) + { + module_cp = PyObject_GetAttrString(self, "CLASSPATH"); + if (module_cp != NULL) + classpath = PyString_AsString(module_cp); + } + +#ifdef _jcc_lib + PyObject *jcc = PyImport_ImportModule("jcc"); + PyObject *cp = PyObject_GetAttrString(jcc, "CLASSPATH"); + + if (classpath) + add_paths("-Djava.class.path=", PyString_AsString(cp), classpath, + &vm_options[nOptions++]); + else + add_option("-Djava.class.path=", PyString_AsString(cp), + &vm_options[nOptions++]); + + Py_DECREF(cp); + Py_DECREF(jcc); +#else + if (classpath) + add_option("-Djava.class.path=", classpath, + &vm_options[nOptions++]); +#endif + + Py_XDECREF(module_cp); + + if (initialheap) + add_option("-Xms", initialheap, &vm_options[nOptions++]); + if (maxheap) + add_option("-Xmx", maxheap, &vm_options[nOptions++]); + if (maxstack) + add_option("-Xss", maxstack, &vm_options[nOptions++]); + + if (vmargs) + { +#ifdef _MSC_VER + char *buf = _strdup(vmargs); +#else + char *buf = strdup(vmargs); +#endif + char *sep = ","; + char *option; + + for (option = strtok(buf, sep); option; option = strtok(NULL, sep)) + { + if (nOptions < sizeof(vm_options) / sizeof(JavaVMOption)) + add_option("", option, &vm_options[nOptions++]); + else + { + free(buf); + for (unsigned int i = 0; i < nOptions; i++) + delete vm_options[i].optionString; + PyErr_Format(PyExc_ValueError, "Too many options (> %d)", + nOptions); + return NULL; + } + } + free(buf); + } + + //vm_options[nOptions++].optionString = "-verbose:gc"; + //vm_options[nOptions++].optionString = "-Xcheck:jni"; + + vm_args.nOptions = nOptions; + vm_args.ignoreUnrecognized = JNI_FALSE; + vm_args.options = vm_options; + + if (JNI_CreateJavaVM(&vm, (void **) &vm_env, &vm_args) < 0) + { + for (unsigned int i = 0; i < nOptions; i++) + delete vm_options[i].optionString; + + PyErr_Format(PyExc_ValueError, + "An error occurred while creating Java VM"); + return NULL; + } + + env->set_vm(vm, vm_env); + + for (unsigned int i = 0; i < nOptions; i++) + delete vm_options[i].optionString; + + t_jccenv *jccenv = (t_jccenv *) PY_TYPE(JCCEnv).tp_alloc(&PY_TYPE(JCCEnv), 0); + jccenv->env = env; + +#ifdef _jcc_lib + registerNatives(vm_env); +#endif + + return (PyObject *) jccenv; + } +} + +#ifdef _jcc_lib + +static void raise_error(JNIEnv *vm_env, const char *message) +{ + jclass cls = vm_env->FindClass("org/apache/jcc/PythonException"); + vm_env->ThrowNew(cls, message); +} + +static void _PythonVM_init(JNIEnv *vm_env, jobject self, + jstring programName, jobjectArray args) +{ + const char *str = vm_env->GetStringUTFChars(programName, JNI_FALSE); +#ifdef linux + char buf[32]; + + // load python runtime for other .so modules to link (such as _time.so) + sprintf(buf, "libpython%d.%d.so", PY_MAJOR_VERSION, PY_MINOR_VERSION); + dlopen(buf, RTLD_NOW | RTLD_GLOBAL); +#endif + + Py_SetProgramName((char *) str); + + PyEval_InitThreads(); + Py_Initialize(); + + if (args) + { + int argc = vm_env->GetArrayLength(args); + char **argv = (char **) calloc(argc + 1, sizeof(char *)); + + argv[0] = (char *) str; + for (int i = 0; i < argc; i++) { + jstring arg = (jstring) vm_env->GetObjectArrayElement(args, i); + argv[i + 1] = (char *) vm_env->GetStringUTFChars(arg, JNI_FALSE); + } + + PySys_SetArgv(argc + 1, argv); + + for (int i = 0; i < argc; i++) { + jstring arg = (jstring) vm_env->GetObjectArrayElement(args, i); + vm_env->ReleaseStringUTFChars(arg, argv[i + 1]); + } + free(argv); + } + else + PySys_SetArgv(1, (char **) &str); + + vm_env->ReleaseStringUTFChars(programName, str); + PyEval_ReleaseLock(); +} + +static jobject _PythonVM_instantiate(JNIEnv *vm_env, jobject self, + jstring moduleName, jstring className) +{ + PythonGIL gil(vm_env); + + const char *modStr = vm_env->GetStringUTFChars(moduleName, JNI_FALSE); + PyObject *module = + PyImport_ImportModule((char *) modStr); // python 2.4 cast + + vm_env->ReleaseStringUTFChars(moduleName, modStr); + + if (!module) + { + raise_error(vm_env, "import failed"); + return NULL; + } + + const char *clsStr = vm_env->GetStringUTFChars(className, JNI_FALSE); + PyObject *cls = + PyObject_GetAttrString(module, (char *) clsStr); // python 2.4 cast + PyObject *obj; + jobject jobj; + + vm_env->ReleaseStringUTFChars(className, clsStr); + Py_DECREF(module); + + if (!cls) + { + raise_error(vm_env, "class not found"); + return NULL; + } + + obj = PyObject_CallFunctionObjArgs(cls, NULL); + Py_DECREF(cls); + + if (!obj) + { + raise_error(vm_env, "instantiation failed"); + return NULL; + } + + PyObject *cObj = PyObject_GetAttrString(obj, "_jobject"); + + if (!cObj) + { + raise_error(vm_env, "instance does not proxy a java object"); + Py_DECREF(obj); + + return NULL; + } + + jobj = (jobject) PyCObject_AsVoidPtr(cObj); + Py_DECREF(cObj); + + jobj = vm_env->NewLocalRef(jobj); + Py_DECREF(obj); + + return jobj; +} + +extern "C" { + + JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) + { + JNIEnv *vm_env; + + if (!vm->GetEnv((void **) &vm_env, JNI_VERSION_1_4)) + env = new JCCEnv(vm, vm_env); + + registerNatives(vm_env); + + return JNI_VERSION_1_4; + } + + JNIEXPORT void JNICALL Java_org_apache_jcc_PythonVM_init(JNIEnv *vm_env, jobject self, jstring programName, jobjectArray args) + { + return _PythonVM_init(vm_env, self, programName, args); + } + + JNIEXPORT jobject JNICALL Java_org_apache_jcc_PythonVM_instantiate(JNIEnv *vm_env, jobject self, jstring moduleName, jstring className) + { + return _PythonVM_instantiate(vm_env, self, moduleName, className); + } + + JNIEXPORT jint JNICALL Java_org_apache_jcc_PythonVM_acquireThreadState(JNIEnv *vm_env) + { + PyGILState_STATE state = PyGILState_Ensure(); + PyThreadState *tstate = PyGILState_GetThisThreadState(); + int result = -1; + + if (tstate != NULL && tstate->gilstate_counter >= 1) + result = ++tstate->gilstate_counter; + + PyGILState_Release(state); + return result; + } + + JNIEXPORT jint JNICALL Java_org_apache_jcc_PythonVM_releaseThreadState(JNIEnv *vm_env) + { + PyGILState_STATE state = PyGILState_Ensure(); + PyThreadState *tstate = PyGILState_GetThisThreadState(); + int result = -1; + + if (tstate != NULL && tstate->gilstate_counter >= 1) + result = --tstate->gilstate_counter; + + PyGILState_Release(state); + return result; + } +}; + +static void JNICALL _PythonException_getErrorInfo(JNIEnv *vm_env, jobject self) +{ + PythonGIL gil(vm_env); + + if (!PyErr_Occurred()) + return; + + PyObject *type, *value, *tb, *errorName; + jclass jcls = vm_env->GetObjectClass(self); + + PyErr_Fetch(&type, &value, &tb); + + errorName = PyObject_GetAttrString(type, "__name__"); + if (errorName != NULL) + { + jfieldID fid = + vm_env->GetFieldID(jcls, "errorName", "Ljava/lang/String;"); + jstring str = env->fromPyString(errorName); + + vm_env->SetObjectField(self, fid, str); + vm_env->DeleteLocalRef(str); + Py_DECREF(errorName); + } + + if (value != NULL) + { + PyObject *message = PyObject_Str(value); + + if (message != NULL) + { + jfieldID fid = + vm_env->GetFieldID(jcls, "message", "Ljava/lang/String;"); + jstring str = env->fromPyString(message); + + vm_env->SetObjectField(self, fid, str); + vm_env->DeleteLocalRef(str); + Py_DECREF(message); + } + } + + PyObject *module = NULL, *cls = NULL, *stringIO = NULL, *result = NULL; + PyObject *_stderr = PySys_GetObject("stderr"); + if (!_stderr) + goto err; + + module = PyImport_ImportModule("cStringIO"); + if (!module) + goto err; + + cls = PyObject_GetAttrString(module, "StringIO"); + Py_DECREF(module); + if (!cls) + goto err; + + stringIO = PyObject_CallObject(cls, NULL); + Py_DECREF(cls); + if (!stringIO) + goto err; + + Py_INCREF(_stderr); + PySys_SetObject("stderr", stringIO); + + PyErr_Restore(type, value, tb); + PyErr_Print(); + + result = PyObject_CallMethod(stringIO, "getvalue", NULL); + Py_DECREF(stringIO); + + if (result != NULL) + { + jfieldID fid = + vm_env->GetFieldID(jcls, "traceback", "Ljava/lang/String;"); + jstring str = env->fromPyString(result); + + vm_env->SetObjectField(self, fid, str); + vm_env->DeleteLocalRef(str); + Py_DECREF(result); + } + + PySys_SetObject("stderr", _stderr); + Py_DECREF(_stderr); + + return; + + err: + PyErr_Restore(type, value, tb); +} + +static void JNICALL _PythonException_clear(JNIEnv *vm_env, jobject self) +{ + PythonGIL gil(vm_env); + PyErr_Clear(); +} + +static void registerNatives(JNIEnv *vm_env) +{ + jclass cls = vm_env->FindClass("org/apache/jcc/PythonException"); + JNINativeMethod methods[] = { + { "getErrorInfo", "()V", (void *) _PythonException_getErrorInfo }, + { "clear", "()V", (void *) _PythonException_clear }, + }; + + vm_env->RegisterNatives(cls, methods, 2); +} + +#endif /* _jcc_lib */ diff --git a/jcc/jcc/sources/jccfuncs.h b/jcc/jcc/sources/jccfuncs.h new file mode 100644 index 0000000..5ffdc09 --- /dev/null +++ b/jcc/jcc/sources/jccfuncs.h @@ -0,0 +1,46 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _jccfuncs_H +#define _jccfuncs_H + +#ifdef PYTHON + +PyObject *__initialize__(PyObject *module, PyObject *args, PyObject *kwds); +PyObject *initVM(PyObject *self, PyObject *args, PyObject *kwds); +PyObject *getVMEnv(PyObject *self); +PyObject *_set_exception_types(PyObject *self, PyObject *args); +PyObject *_set_function_self(PyObject *self, PyObject *args); +PyObject *findClass(PyObject *self, PyObject *args); +PyObject *JArray_Type(PyObject *self, PyObject *arg); + +PyMethodDef jcc_funcs[] = { + { "initVM", (PyCFunction) __initialize__, + METH_VARARGS | METH_KEYWORDS, NULL }, + { "getVMEnv", (PyCFunction) getVMEnv, + METH_NOARGS, NULL }, + { "findClass", (PyCFunction) findClass, + METH_VARARGS, NULL }, + { "_set_exception_types", (PyCFunction) _set_exception_types, + METH_VARARGS, NULL }, + { "_set_function_self", (PyCFunction) _set_function_self, + METH_VARARGS, NULL }, + { "JArray", (PyCFunction) JArray_Type, + METH_O, NULL }, + { NULL, NULL, 0, NULL } +}; + +#endif + +#endif /* _jccfuncs_H */ diff --git a/jcc/jcc/sources/macros.h b/jcc/jcc/sources/macros.h new file mode 100644 index 0000000..3ce4fcc --- /dev/null +++ b/jcc/jcc/sources/macros.h @@ -0,0 +1,193 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _macros_H +#define _macros_H + +#define OBJ_CALL(action) \ + { \ + try { \ + PythonThreadState state(1); \ + action; \ + } catch (int e) { \ + switch (e) { \ + case _EXC_PYTHON: \ + return NULL; \ + case _EXC_JAVA: \ + return PyErr_SetJavaError(); \ + default: \ + throw; \ + } \ + } \ + } + +#define INT_CALL(action) \ + { \ + try { \ + PythonThreadState state(1); \ + action; \ + } catch (int e) { \ + switch (e) { \ + case _EXC_PYTHON: \ + return -1; \ + case _EXC_JAVA: \ + PyErr_SetJavaError(); \ + return -1; \ + default: \ + throw; \ + } \ + } \ + } + + +#define DECLARE_METHOD(type, name, flags) \ + { #name, (PyCFunction) type##_##name, flags, "" } + +#define DECLARE_GET_FIELD(type, name) \ + { #name, (getter) type##_get__##name, NULL, "", NULL } + +#define DECLARE_SET_FIELD(type, name) \ + { #name, NULL, (setter) type##_set__##name, "", NULL } + +#define DECLARE_GETSET_FIELD(type, name) \ + { #name, (getter) type##_get__##name, (setter) type##_set__##name, "", NULL } + +#define PY_TYPE(name) name##$$Type + +#define DECLARE_TYPE(name, t_name, base, javaClass, \ + init, iter, iternext, getset, mapping, sequence) \ +PyTypeObject PY_TYPE(name) = { \ + PyObject_HEAD_INIT(NULL) \ + /* ob_size */ 0, \ + /* tp_name */ #name, \ + /* tp_basicsize */ sizeof(t_name), \ + /* tp_itemsize */ 0, \ + /* tp_dealloc */ 0, \ + /* tp_print */ 0, \ + /* tp_getattr */ 0, \ + /* tp_setattr */ 0, \ + /* tp_compare */ 0, \ + /* tp_repr */ 0, \ + /* tp_as_number */ 0, \ + /* tp_as_sequence */ sequence, \ + /* tp_as_mapping */ mapping, \ + /* tp_hash */ 0, \ + /* tp_call */ 0, \ + /* tp_str */ 0, \ + /* tp_getattro */ 0, \ + /* tp_setattro */ 0, \ + /* tp_as_buffer */ 0, \ + /* tp_flags */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, \ + /* tp_doc */ #t_name" objects", \ + /* tp_traverse */ 0, \ + /* tp_clear */ 0, \ + /* tp_richcompare */ 0, \ + /* tp_weaklistoffset */ 0, \ + /* tp_iter */ (getiterfunc) iter, \ + /* tp_iternext */ (iternextfunc) iternext, \ + /* tp_methods */ t_name##__methods_, \ + /* tp_members */ 0, \ + /* tp_getset */ getset, \ + /* tp_base */ &PY_TYPE(base), \ + /* tp_dict */ 0, \ + /* tp_descr_get */ 0, \ + /* tp_descr_set */ 0, \ + /* tp_dictoffset */ 0, \ + /* tp_init */ (initproc)init, \ + /* tp_alloc */ 0, \ + /* tp_new */ 0, \ +}; \ +PyObject *t_name::wrap_Object(const javaClass& object) \ +{ \ + if (!!object) \ + { \ + t_name *self = \ + (t_name *) PY_TYPE(name).tp_alloc(&PY_TYPE(name), 0); \ + if (self) \ + self->object = object; \ + return (PyObject *) self; \ + } \ + Py_RETURN_NONE; \ +} \ +PyObject *t_name::wrap_jobject(const jobject& object) \ +{ \ + if (!!object) \ + { \ + if (!env->isInstanceOf(object, javaClass::initializeClass)) \ + { \ + PyErr_SetObject(PyExc_TypeError, \ + (PyObject *) &PY_TYPE(name)); \ + return NULL; \ + } \ + t_name *self = (t_name *) \ + PY_TYPE(name).tp_alloc(&PY_TYPE(name), 0); \ + if (self) \ + self->object = javaClass(object); \ + return (PyObject *) self; \ + } \ + Py_RETURN_NONE; \ +} \ + + +#define INSTALL_TYPE(name, module) \ + if (PyType_Ready(&PY_TYPE(name)) == 0) \ + { \ + Py_INCREF(&PY_TYPE(name)); \ + PyModule_AddObject(module, #name, (PyObject *) &PY_TYPE(name)); \ + } + + +#define Py_RETURN_BOOL(b) \ + { \ + if (b) \ + Py_RETURN_TRUE; \ + else \ + Py_RETURN_FALSE; \ + } + +#define Py_RETURN_SELF \ + { \ + Py_INCREF(self); \ + return (PyObject *) self; \ + } + + +#if PY_VERSION_HEX < 0x02040000 + +#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None +#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True +#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False + +#define Py_CLEAR(op) \ + do { \ + if (op) { \ + PyObject *tmp = (PyObject *)(op); \ + (op) = NULL; \ + Py_DECREF(tmp); \ + } \ + } while (0) + +#define Py_VISIT(op) \ + do { \ + if (op) { \ + int vret = visit((PyObject *)(op), arg); \ + if (vret) \ + return vret; \ + } \ + } while (0) + +#endif /* Python 2.3.5 */ + + +#endif /* _macros_H */ diff --git a/jcc/jcc/sources/types.cpp b/jcc/jcc/sources/types.cpp new file mode 100644 index 0000000..74bde68 --- /dev/null +++ b/jcc/jcc/sources/types.cpp @@ -0,0 +1,563 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "structmember.h" + +#include "java/lang/Object.h" +#include "java/lang/Class.h" +#include "functions.h" + +using namespace java::lang; + + +/* FinalizerProxy */ + +static PyObject *t_fc_call(PyObject *self, PyObject *args, PyObject *kwds); + +static void t_fp_dealloc(t_fp *self); +static PyObject *t_fp_getattro(t_fp *self, PyObject *name); +static int t_fp_setattro(t_fp *self, PyObject *name, PyObject *value); +static int t_fp_traverse(t_fp *self, visitproc visit, void *arg); +static int t_fp_clear(t_fp *self); +static PyObject *t_fp_repr(t_fp *self); +static PyObject *t_fp_iter(t_fp *self); + +static Py_ssize_t t_fp_map_length(t_fp *self); +static PyObject *t_fp_map_get(t_fp *self, PyObject *key); +static int t_fp_map_set(t_fp *self, PyObject *key, PyObject *value); + +static Py_ssize_t t_fp_seq_length(t_fp *self); +static PyObject *t_fp_seq_get(t_fp *self, Py_ssize_t n); +static int t_fp_seq_contains(t_fp *self, PyObject *value); +static PyObject *t_fp_seq_concat(t_fp *self, PyObject *arg); +static PyObject *t_fp_seq_repeat(t_fp *self, Py_ssize_t n); +static PyObject *t_fp_seq_getslice(t_fp *self, Py_ssize_t low, Py_ssize_t high); +static int t_fp_seq_set(t_fp *self, Py_ssize_t i, PyObject *value); +static int t_fp_seq_setslice(t_fp *self, Py_ssize_t low, + Py_ssize_t high, PyObject *arg); +static PyObject *t_fp_seq_inplace_concat(t_fp *self, PyObject *arg); +static PyObject *t_fp_seq_inplace_repeat(t_fp *self, Py_ssize_t n); + + +PyTypeObject PY_TYPE(FinalizerClass) = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "jcc.FinalizerClass", /* tp_name */ + PyType_Type.tp_basicsize, /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + (ternaryfunc) t_fc_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "FinalizerClass", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyType_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + +static PyMappingMethods t_fp_as_mapping = { + (lenfunc)t_fp_map_length, /* mp_length */ + (binaryfunc)t_fp_map_get, /* mp_subscript */ + (objobjargproc)t_fp_map_set, /* mp_ass_subscript */ +}; + +static PySequenceMethods t_fp_as_sequence = { + (lenfunc)t_fp_seq_length, /* sq_length */ + (binaryfunc)t_fp_seq_concat, /* sq_concat */ + (ssizeargfunc)t_fp_seq_repeat, /* sq_repeat */ + (ssizeargfunc)t_fp_seq_get, /* sq_item */ + (ssizessizeargfunc)t_fp_seq_getslice, /* sq_slice */ + (ssizeobjargproc)t_fp_seq_set, /* sq_ass_item */ + (ssizessizeobjargproc)t_fp_seq_setslice, /* sq_ass_slice */ + (objobjproc)t_fp_seq_contains, /* sq_contains */ + (binaryfunc)t_fp_seq_inplace_concat, /* sq_inplace_concat */ + (ssizeargfunc)t_fp_seq_inplace_repeat, /* sq_inplace_repeat */ +}; + +PyTypeObject PY_TYPE(FinalizerProxy) = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "jcc.FinalizerProxy", /* tp_name */ + sizeof(t_fp), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)t_fp_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)t_fp_repr, /* tp_repr */ + 0, /* tp_as_number */ + &t_fp_as_sequence, /* tp_as_sequence */ + &t_fp_as_mapping, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + (getattrofunc)t_fp_getattro, /* tp_getattro */ + (setattrofunc)t_fp_setattro, /* tp_setattro */ + 0, /* tp_as_buffer */ + (Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_HAVE_GC), /* tp_flags */ + "FinalizerProxy", /* tp_doc */ + (traverseproc)t_fp_traverse, /* tp_traverse */ + (inquiry)t_fp_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + (getiterfunc)t_fp_iter, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + +static PyObject *t_fc_call(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *obj = PyType_Type.tp_call(self, args, kwds); + + if (obj) + { + t_fp *fp = (t_fp *) PY_TYPE(FinalizerProxy).tp_alloc(&PY_TYPE(FinalizerProxy), 0); + + fp->object = obj; /* released by t_fp_clear() */ + obj = (PyObject *) fp; + } + + return obj; +} + +static void t_fp_dealloc(t_fp *self) +{ + if (self->object) + ((t_JObject *) self->object)->object.weaken$(); + + t_fp_clear(self); + self->ob_type->tp_free((PyObject *) self); +} + +static int t_fp_traverse(t_fp *self, visitproc visit, void *arg) +{ + Py_VISIT(self->object); + return 0; +} + +static int t_fp_clear(t_fp *self) +{ + Py_CLEAR(self->object); + return 0; +} + +static PyObject *t_fp_repr(t_fp *self) +{ + return PyObject_Repr(self->object); +} + +static PyObject *t_fp_iter(t_fp *self) +{ + return PyObject_GetIter(self->object); +} + +static PyObject *t_fp_getattro(t_fp *self, PyObject *name) +{ + return PyObject_GetAttr(self->object, name); +} + +static int t_fp_setattro(t_fp *self, PyObject *name, PyObject *value) +{ + return PyObject_SetAttr(self->object, name, value); +} + +static Py_ssize_t t_fp_map_length(t_fp *self) +{ + return PyMapping_Size(self->object); +} + +static PyObject *t_fp_map_get(t_fp *self, PyObject *key) +{ + return PyObject_GetItem(self->object, key); +} + +static int t_fp_map_set(t_fp *self, PyObject *key, PyObject *value) +{ + if (value == NULL) + return PyObject_DelItem(self->object, key); + + return PyObject_SetItem(self->object, key, value); +} + +static Py_ssize_t t_fp_seq_length(t_fp *self) +{ + return PySequence_Length(self->object); +} + +static PyObject *t_fp_seq_get(t_fp *self, Py_ssize_t n) +{ + return PySequence_GetItem(self->object, n); +} + +static int t_fp_seq_contains(t_fp *self, PyObject *value) +{ + return PySequence_Contains(self->object, value); +} + +static PyObject *t_fp_seq_concat(t_fp *self, PyObject *arg) +{ + return PySequence_Concat(self->object, arg); +} + +static PyObject *t_fp_seq_repeat(t_fp *self, Py_ssize_t n) +{ + return PySequence_Repeat(self->object, n); +} + +static PyObject *t_fp_seq_getslice(t_fp *self, Py_ssize_t low, Py_ssize_t high) +{ + return PySequence_GetSlice(self->object, low, high); +} + +static int t_fp_seq_set(t_fp *self, Py_ssize_t i, PyObject *value) +{ + return PySequence_SetItem(self->object, i, value); +} + +static int t_fp_seq_setslice(t_fp *self, Py_ssize_t low, + Py_ssize_t high, PyObject *arg) +{ + return PySequence_SetSlice(self->object, low, high, arg); +} + +static PyObject *t_fp_seq_inplace_concat(t_fp *self, PyObject *arg) +{ + return PySequence_InPlaceConcat(self->object, arg); +} + +static PyObject *t_fp_seq_inplace_repeat(t_fp *self, Py_ssize_t n) +{ + return PySequence_InPlaceRepeat(self->object, n); +} + + +/* const variable descriptor */ + +class t_descriptor { +public: + PyObject_HEAD + int flags; + union { + PyObject *value; + getclassfn initializeClass; + } access; +}; + +#define DESCRIPTOR_VALUE 0x0001 +#define DESCRIPTOR_CLASS 0x0002 +#define DESCRIPTOR_GETFN 0x0004 +#define DESCRIPTOR_GENERIC 0x0008 + +static void t_descriptor_dealloc(t_descriptor *self); +static PyObject *t_descriptor___get__(t_descriptor *self, + PyObject *obj, PyObject *type); + +static PyMethodDef t_descriptor_methods[] = { + { NULL, NULL, 0, NULL } +}; + + +PyTypeObject PY_TYPE(ConstVariableDescriptor) = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "jcc.ConstVariableDescriptor", /* tp_name */ + sizeof(t_descriptor), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)t_descriptor_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "const variable descriptor", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + t_descriptor_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + (descrgetfunc)t_descriptor___get__, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + +static void t_descriptor_dealloc(t_descriptor *self) +{ + if (self->flags & DESCRIPTOR_VALUE) + { + Py_DECREF(self->access.value); + } + self->ob_type->tp_free((PyObject *) self); +} + +PyObject *make_descriptor(PyTypeObject *value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + Py_INCREF(value); + self->access.value = (PyObject *) value; + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(getclassfn initializeClass) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + self->access.initializeClass = initializeClass; + self->flags = DESCRIPTOR_CLASS; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(getclassfn initializeClass, int generics) +{ + t_descriptor *self = (t_descriptor *) make_descriptor(initializeClass); + + if (self && generics) + self->flags |= DESCRIPTOR_GENERIC; + + return (PyObject *) self; +} + +PyObject *make_descriptor(PyObject *value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + self->access.value = value; + self->flags = DESCRIPTOR_VALUE; + } + else + Py_DECREF(value); + + return (PyObject *) self; +} + +PyObject *make_descriptor(PyObject *(*wrapfn)(const jobject &)) +{ + return make_descriptor(PyCObject_FromVoidPtr((void *) wrapfn, NULL)); +} + +PyObject *make_descriptor(boxfn fn) +{ + return make_descriptor(PyCObject_FromVoidPtr((void *) fn, NULL)); +} + +PyObject *make_descriptor(jboolean b) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + PyObject *value = b ? Py_True : Py_False; + self->access.value = (PyObject *) value; Py_INCREF(value); + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(jbyte value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + self->access.value = PyInt_FromLong(value); + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(jchar value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + Py_UNICODE pchar = (Py_UNICODE) value; + + self->access.value = PyUnicode_FromUnicode(&pchar, 1); + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(jdouble value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + self->access.value = PyFloat_FromDouble(value); + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(jfloat value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + self->access.value = PyFloat_FromDouble((double) value); + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(jint value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + self->access.value = PyInt_FromLong(value); + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(jlong value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + self->access.value = PyLong_FromLongLong((long long) value); + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +PyObject *make_descriptor(jshort value) +{ + t_descriptor *self = (t_descriptor *) + PY_TYPE(ConstVariableDescriptor).tp_alloc(&PY_TYPE(ConstVariableDescriptor), 0); + + if (self) + { + self->access.value = PyInt_FromLong((short) value); + self->flags = DESCRIPTOR_VALUE; + } + + return (PyObject *) self; +} + +static PyObject *t_descriptor___get__(t_descriptor *self, + PyObject *obj, PyObject *type) +{ + if (self->flags & DESCRIPTOR_VALUE) + { + Py_INCREF(self->access.value); + return self->access.value; + } + + if (self->flags & DESCRIPTOR_CLASS) + { +#ifdef _java_generics + if (self->flags & DESCRIPTOR_GENERIC) + return t_Class::wrap_Object(Class((*self->access.initializeClass)()), (PyTypeObject *) type); + else +#endif + return t_Class::wrap_Object(Class((*self->access.initializeClass)())); + } + + Py_RETURN_NONE; +} + diff --git a/jcc/jcc/windows.py b/jcc/jcc/windows.py new file mode 100644 index 0000000..f847f25 --- /dev/null +++ b/jcc/jcc/windows.py @@ -0,0 +1,85 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, _winreg + + +class WindowsRegistry(object): + + def __init__(self): + self.handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE) + + def get(self, key, name): + + handle = None + try: + handle = _winreg.OpenKey(self.handle, key) + return _winreg.QueryValueEx(handle, name)[0] + finally: + if handle is not None: + handle.Close() + + def close(self): + self.handle.Close() + + +def get_jvm_dll_directory(client_or_server="client"): + + jre_key = r"SOFTWARE\JavaSoft\Java Runtime Environment" + jdk_key = r"SOFTWARE\JavaSoft\Java Development Kit" + current_key = r"%s\%s" + + registry = None + try: + registry = WindowsRegistry() + + try: # try JRE + version = registry.get(jre_key, "CurrentVersion") + path = registry.get(current_key %(jre_key, version), "JavaHome") + if not os.path.exists(path): + path = None + except: + path = None + + if not path: + try: # try JDK + version = registry.get(jdk_key, "CurrentVersion") + path = registry.get(current_key %(jdk_key, version), "JavaHome") + if os.path.exists(path): + path = os.path.abspath(os.path.join(path, "jre")) + else: + path = None + except: + path = None + + finally: + if registry is not None: + registry.close() + + if path: + path = os.path.abspath(os.path.join(path, "bin", client_or_server)) + if os.path.exists(os.path.join(path, "jvm.dll")): + return path + + return None + + +def add_jvm_dll_directory_to_path(): + + path = os.environ['Path'].split(os.pathsep) + dll_path = get_jvm_dll_directory() + if dll_path is not None: + path.append(dll_path) + os.environ['Path'] = os.pathsep.join(path) + return True + + raise ValueError, "jvm.dll could not be found" diff --git a/jcc/setup.py b/jcc/setup.py new file mode 100644 index 0000000..f8af865 --- /dev/null +++ b/jcc/setup.py @@ -0,0 +1,382 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os, sys, platform, subprocess + +jcc_ver = '2.11' +machine = platform.machine() + +if machine.startswith("iPod") or machine.startswith("iPhone"): + platform = 'ipod' +elif sys.platform == "win32" and "--compiler=mingw32" in sys.argv: + platform = 'mingw32' +else: + platform = sys.platform + +# Add or edit the entry corresponding to your system in the JDK, INCLUDES, +# CFLAGS, DEBUG_CFLAGS, LFLAGS and JAVAC dictionaries below. +# These entries are used to build JCC _and_ by JCC to drive compiling and +# linking via distutils or setuptools the extensions it generated code for. +# +# The key for your system is determined by the platform variable defined +# above. +# +# Instead of editing the entries below, you may also override these +# dictionaries with JCC_JDK, JCC_INCLUDES, JCC_CFLAGS, JCC_DEBUG_CFLAGS, +# JCC_LFLAGS and JCC_JAVAC environment variables using os.pathsep as value +# separator. + +if platform in ("win32", "mingw32"): + try: + from helpers.windows import JAVAHOME + except ImportError: + JAVAHOME = None +elif platform in ("darwin",): + try: + from helpers.darwin import JAVAHOME + except ImportError: + JAVAHOME = None +else: + JAVAHOME = None + +JDK = { + 'darwin': JAVAHOME, + 'ipod': '/usr/include/gcc', + 'linux2': '/usr/lib/jvm/java-6-openjdk', + 'sunos5': '/usr/jdk/instances/jdk1.6.0', + 'win32': JAVAHOME, + 'mingw32': JAVAHOME, + 'freebsd7': '/usr/local/diablo-jdk1.6.0' +} +if 'JCC_JDK' in os.environ: + JDK[platform] = os.environ['JCC_JDK'] + + +if not JDK[platform]: + raise RuntimeError(''' + +Can't determine where the Java JDK has been installed on this machine. + +Please set the environment variable JCC_JDK to that location before +running setup.py. +''') + +elif not os.path.isdir(JDK[platform]): + raise RuntimeError(''' + +Java JDK directory '%s' does not exist. + +Please set the environment variable JCC_JDK to the correct location before +running setup.py. +''' %(JDK[platform])) + + +INCLUDES = { + 'darwin': ['%(darwin)s/Headers' %(JDK)], + 'ipod': ['%(ipod)s/darwin/default' %(JDK)], + 'linux2': ['%(linux2)s/include' %(JDK), + '%(linux2)s/include/linux' %(JDK)], + 'sunos5': ['%(sunos5)s/include' %(JDK), + '%(sunos5)s/include/solaris' %(JDK)], + 'win32': ['%(win32)s/include' %(JDK), + '%(win32)s/include/win32' %(JDK)], + 'mingw32': ['%(mingw32)s/include' %(JDK), + '%(mingw32)s/include/win32' %(JDK)], + 'freebsd7': ['%(freebsd7)s/include' %(JDK), + '%(freebsd7)s/include/freebsd' %(JDK)], +} + +CFLAGS = { + 'darwin': ['-fno-strict-aliasing', '-Wno-write-strings'], + 'ipod': ['-Wno-write-strings'], + 'linux2': ['-fno-strict-aliasing', '-Wno-write-strings'], + 'sunos5': ['-features=iddollar', + '-erroff=badargtypel2w,wbadinitl,wvarhidemem'], + 'win32': ["/EHsc", "/D_CRT_SECURE_NO_WARNINGS"], # MSVC 9 (2008) + 'mingw32': ['-fno-strict-aliasing', '-Wno-write-strings'], + 'freebsd7': ['-fno-strict-aliasing', '-Wno-write-strings'], +} + +# added to CFLAGS when JCC is invoked with --debug +DEBUG_CFLAGS = { + 'darwin': ['-O0', '-g', '-DDEBUG'], + 'ipod': ['-O0', '-g', '-DDEBUG'], + 'linux2': ['-O0', '-g', '-DDEBUG'], + 'sunos5': ['-DDEBUG'], + 'win32': ['/Od', '/DDEBUG'], + 'mingw32': ['-O0', '-g', '-DDEBUG'], + 'freebsd7': ['-O0', '-g', '-DDEBUG'], +} + +LFLAGS = { + 'darwin': ['-framework', 'JavaVM'], + 'ipod': ['-ljvm', '-lpython%s.%s' %(sys.version_info[0:2]), + '-L/usr/lib/gcc/arm-apple-darwin9/4.0.1'], + 'linux2/i386': ['-L%(linux2)s/jre/lib/i386' %(JDK), '-ljava', + '-L%(linux2)s/jre/lib/i386/client' %(JDK), '-ljvm', + '-Wl,-rpath=%(linux2)s/jre/lib/i386:%(linux2)s/jre/lib/i386/client' %(JDK)], + 'linux2/i686': ['-L%(linux2)s/jre/lib/i386' %(JDK), '-ljava', + '-L%(linux2)s/jre/lib/i386/client' %(JDK), '-ljvm', + '-Wl,-rpath=%(linux2)s/jre/lib/i386:%(linux2)s/jre/lib/i386/client' %(JDK)], + 'linux2/x86_64': ['-L%(linux2)s/jre/lib/amd64' %(JDK), '-ljava', + '-L%(linux2)s/jre/lib/amd64/server' %(JDK), '-ljvm', + '-Wl,-rpath=%(linux2)s/jre/lib/amd64:%(linux2)s/jre/lib/amd64/server' %(JDK)], + 'sunos5': ['-L%(sunos5)s/jre/lib/i386' %(JDK), '-ljava', + '-L%(sunos5)s/jre/lib/i386/client' %(JDK), '-ljvm', + '-R%(sunos5)s/jre/lib/i386:%(sunos5)s/jre/lib/i386/client' %(JDK)], + 'win32': ['/LIBPATH:%(win32)s/lib' %(JDK), 'jvm.lib'], + 'mingw32': ['-L%(mingw32)s/lib' %(JDK), '-ljvm'], + 'freebsd7': ['-L%(freebsd7)s/jre/lib/i386' %(JDK), '-ljava', '-lverify', + '-L%(freebsd7)s/jre/lib/i386/client' %(JDK), '-ljvm', + '-Wl,-rpath=%(freebsd7)s/jre/lib/i386:%(freebsd7)s/jre/lib/i386/client' %(JDK)], +} + +IMPLIB_LFLAGS = { + 'win32': ["/IMPLIB:%s"], + 'mingw32': ["-Wl,--out-implib,%s"] +} + +if platform == 'linux2': + LFLAGS['linux2'] = LFLAGS['linux2/%s' %(machine)] + +JAVAC = { + 'darwin': ['javac', '-target', '1.5'], + 'ipod': ['jikes', '-cp', '/usr/share/classpath/glibj.zip'], + 'linux2': ['javac'], + 'sunos5': ['javac'], + 'win32': ['%(win32)s/bin/javac.exe' %(JDK)], + 'mingw32': ['%(mingw32)s/bin/javac.exe' %(JDK)], + 'freebsd7': ['javac'], +} + +JAVADOC = { + 'darwin': ['javadoc'], + 'ipod': [], + 'linux2': ['javadoc'], + 'sunos5': ['javadoc'], + 'win32': ['%(win32)s/bin/javadoc.exe' %(JDK)], + 'mingw32': ['%(mingw32)s/bin/javadoc.exe' %(JDK)], + 'freebsd7': ['javadoc'], +} + +try: + if 'USE_DISTUTILS' in os.environ: + raise ImportError + from setuptools import setup, Extension + from pkg_resources import require + with_setuptools = require('setuptools')[0].parsed_version + + enable_shared = False + with_setuptools_c7 = ('00000000', '00000006', '*c', '00000007', '*final') + + if with_setuptools >= with_setuptools_c7 and 'NO_SHARED' not in os.environ: + if platform in ('darwin', 'ipod', 'win32'): + enable_shared = True + elif platform == 'linux2': + from helpers.linux import patch_setuptools + enable_shared = patch_setuptools(with_setuptools) + elif platform == 'mingw32': + enable_shared = True + # need to monkeypatch the CygwinCCompiler class to generate + # jcc.lib in the correct place + from helpers.mingw32 import JCCMinGW32CCompiler + import distutils.cygwinccompiler + distutils.cygwinccompiler.Mingw32CCompiler = JCCMinGW32CCompiler + +except ImportError: + if sys.version_info < (2, 4): + raise ImportError, 'setuptools is required when using Python 2.3' + else: + from distutils.core import setup, Extension + with_setuptools = None + enable_shared = False + + +def main(debug): + + _jcc_argsep = os.environ.get('JCC_ARGSEP', os.pathsep) + + if 'JCC_INCLUDES' in os.environ: + _includes = os.environ['JCC_INCLUDES'].split(_jcc_argsep) + else: + _includes = INCLUDES[platform] + + if 'JCC_CFLAGS' in os.environ: + _cflags = os.environ['JCC_CFLAGS'].split(_jcc_argsep) + else: + _cflags = CFLAGS[platform] + + if 'JCC_DEBUG_CFLAGS' in os.environ: + _debug_cflags = os.environ['JCC_DEBUG_CFLAGS'].split(_jcc_argsep) + else: + _debug_cflags = DEBUG_CFLAGS[platform] + + if 'JCC_LFLAGS' in os.environ: + _lflags = os.environ['JCC_LFLAGS'].split(_jcc_argsep) + else: + _lflags = LFLAGS[platform] + + if 'JCC_IMPLIB_LFLAGS' in os.environ: + _implib_lflags = os.environ['JCC_IMPLIB_LFLAGS'].split(_jcc_argsep) + else: + _implib_lflags = IMPLIB_LFLAGS.get(platform, []) + + if 'JCC_JAVAC' in os.environ: + _javac = os.environ['JCC_JAVAC'].split(_jcc_argsep) + else: + _javac = JAVAC[platform] + + if 'JCC_JAVADOC' in os.environ: + _javadoc = os.environ['JCC_JAVADOC'].split(_jcc_argsep) + else: + _javadoc = JAVADOC[platform] + + from helpers.build import jcc_build_py + + jcc_build_py.config_file = \ + os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'jcc', 'config.py') + jcc_build_py.config_text = \ + '\n'.join(['', + 'INCLUDES=%s' %(_includes), + 'CFLAGS=%s' %(_cflags), + 'DEBUG_CFLAGS=%s' %(_debug_cflags), + 'LFLAGS=%s' %(_lflags), + 'IMPLIB_LFLAGS=%s' %(_implib_lflags), + 'SHARED=%s' %(enable_shared), + 'VERSION="%s"' %(jcc_ver), + '']) + + extensions = [] + + boot = '_jcc' + + cflags = ['-DPYTHON'] + _cflags + if debug: + cflags += _debug_cflags + includes = _includes + [boot, 'jcc/sources'] + lflags = _lflags + if not debug: + if platform == 'win32': + pass + elif platform == 'sunos5': + lflags += ['-Wl,-s'] + else: + lflags += ['-Wl,-S'] + + sources = ['jcc/sources/jcc.cpp', + 'jcc/sources/JCCEnv.cpp', + 'jcc/sources/JObject.cpp', + 'jcc/sources/JArray.cpp', + 'jcc/sources/functions.cpp', + 'jcc/sources/types.cpp'] + for path, dirs, names in os.walk(boot): + for name in names: + if name.endswith('.cpp'): + sources.append(os.path.join(path, name)) + package_data = ['sources/*.cpp', 'sources/*.h', 'patches/patch.*'] + + if with_setuptools and enable_shared: + from subprocess import Popen, PIPE + from setuptools import Library + + kwds = { "extra_compile_args": cflags, + "include_dirs": includes, + "define_macros": [('_jcc_lib', None), + ('JCC_VER', '"%s"' %(jcc_ver))], + "sources": sources[0:2] } + + if platform in ('darwin', 'ipod'): + kwds["extra_link_args"] = \ + lflags + ['-install_name', '@rpath/libjcc.dylib', + '-current_version', jcc_ver, + '-compatibility_version', jcc_ver] + elif platform == 'linux2': + kwds["extra_link_args"] = \ + lflags + ['-lpython%s.%s' %(sys.version_info[0:2])] + kwds["force_shared"] = True # requires jcc/patches/patch.43 + elif platform in IMPLIB_LFLAGS: + jcclib = 'jcc%s.lib' %(debug and '_d' or '') + implib_flags = ' '.join(IMPLIB_LFLAGS[platform]) + kwds["extra_link_args"] = \ + lflags + [implib_flags %(os.path.join('jcc', jcclib))] + package_data.append(jcclib) + else: + kwds["extra_link_args"] = lflags + + extensions.append(Library('jcc', **kwds)) + + args = _javac[:] + args.extend(('-d', 'jcc/classes')) + args.append('java/org/apache/jcc/PythonVM.java') + args.append('java/org/apache/jcc/PythonException.java') + if not os.path.exists('jcc/classes'): + os.makedirs('jcc/classes') + try: + process = Popen(args, stderr=PIPE) + except Exception, e: + raise type(e), "%s: %s" %(e, args) + process.wait() + if process.returncode != 0: + raise OSError, process.stderr.read() + package_data.append('classes/org/apache/jcc/PythonVM.class') + package_data.append('classes/org/apache/jcc/PythonException.class') + + args = _javadoc[:] + args.extend(('-d', 'javadoc', '-sourcepath', 'java', 'org.apache.jcc')) + try: + process = Popen(args, stderr=PIPE) + except Exception, e: + raise type(e), "%s: %s" %(e, args) + process.wait() + if process.returncode != 0: + raise OSError, process.stderr.read() + + extensions.append(Extension('jcc._jcc', + extra_compile_args=cflags, + extra_link_args=lflags, + include_dirs=includes, + define_macros=[('_java_generics', None), + ('JCC_VER', '"%s"' %(jcc_ver))], + sources=sources)) + + args = { + 'name': 'JCC', + 'version': jcc_ver, + 'description': 'a C++ code generator for calling Java from C++/Python', + 'long_description': open('DESCRIPTION').read(), + 'author': 'Andi Vajda', + 'author_email': 'vajda@apache.org', + 'classifiers': ['Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: C++', + 'Programming Language :: Java', + 'Programming Language :: Python', + 'Topic :: Software Development :: Code Generators', + 'Topic :: Software Development :: Libraries :: Java Libraries'], + 'packages': ['jcc'], + 'package_dir': {'jcc': 'jcc'}, + 'package_data': {'jcc': package_data}, + 'ext_modules': extensions, + "cmdclass": {"build_py": jcc_build_py}, + } + if with_setuptools: + args['zip_safe'] = False + + setup(**args) + + +if __name__ == "__main__": + main('--debug' in sys.argv) diff --git a/lucene-java-3.4.0/lucene/BUILD.txt b/lucene-java-3.4.0/lucene/BUILD.txt new file mode 100644 index 0000000..3b11915 --- /dev/null +++ b/lucene-java-3.4.0/lucene/BUILD.txt @@ -0,0 +1,105 @@ +Lucene Build Instructions + +Basic steps: + 0) Install JDK 1.5 (or greater), Ant 1.7.x (not 1.6.x, not 1.8.x) + 1) Download Lucene from Apache and unpack it + 2) Connect to the top-level of your Lucene installation + 3) Install JavaCC (optional) + 4) Run ant + +Step 0) Set up your development environment (JDK 1.5 or greater, +Ant 1.7.x) + +We'll assume that you know how to get and set up the JDK - if you +don't, then we suggest starting at http://java.sun.com and learning +more about Java, before returning to this README. Lucene runs with +JDK 1.5 and later. + +Like many Open Source java projects, Lucene uses Apache Ant for build +control. Specifically, you MUST use Ant version 1.7.x + +Ant is "kind of like make without make's wrinkles". Ant is +implemented in java and uses XML-based configuration files. You can +get it at: + + http://ant.apache.org + +You'll need to download the Ant binary distribution. Install it +according to the instructions at: + + http://ant.apache.org/manual + +Step 1) Download Lucene from Apache + +We'll assume you already did this, or you wouldn't be reading this +file. However, you might have received this file by some alternate +route, or you might have an incomplete copy of the Lucene, so: Lucene +releases are available for download at: + + http://www.apache.org/dyn/closer.cgi/lucene/java/ + +Download either a zip or a tarred/gzipped version of the archive, and +uncompress it into a directory of your choice. + +Step 2) From the command line, change (cd) into the top-level directory of your Lucene installation + +Lucene's top-level directory contains the build.xml file. By default, +you do not need to change any of the settings in this file, but you do +need to run ant from this location so it knows where to find build.xml. + +If you would like to change settings you can do so by creating one +or more of the following files and placing your own property settings +in there: + + ~/lucene.build.properties + ~/build.properties + lucene-x.y/build.properties + +The first property which is found in the order with which the files are +loaded becomes the property setting which is used by the Ant build +system. + +NOTE: the ~ character represents your user account home directory. + +Step 3) Install JavaCC + +Building the Lucene distribution from the source does not require the JavaCC +parser generator, but if you wish to regenerate any of the pre-generated +parser pieces, you will need to install JavaCC. Version 4.1 is tested to +work correctly. + + http://javacc.dev.java.net + +Follow the download links and download the zip file to a temporary +location on your file system. + +After JavaCC is installed, create a build.properties file +(as in step 2), and add the line + + javacc.home=/javacc + +where this points to the root directory of your javacc installation +(the directory that contains bin/lib/javacc.jar). + +Step 4) Run ant + +Assuming you have ant in your PATH and have set ANT_HOME to the +location of your ant installation, typing "ant" at the shell prompt +and command prompt should run ant. Ant will by default look for the +"build.xml" file in your current directory, and compile Lucene. + +To rebuild any of the JavaCC-based parsers, run "ant javacc". + +For further information on Lucene, go to: + http://lucene.apache.org/ + +Please join the Lucene-User mailing list by visiting this site: + http://lucene.apache.org/java/docs/mailinglists.html + +Please post suggestions, questions, corrections or additions to this +document to the lucene-user mailing list. + +This file was originally written by Steven J. Owens . +This file was modified by Jon S. Stevens . + +Copyright (c) 2001-2005 The Apache Software Foundation. All rights reserved. diff --git a/lucene-java-3.4.0/lucene/CHANGES.txt b/lucene-java-3.4.0/lucene/CHANGES.txt new file mode 100644 index 0000000..7f81b9d --- /dev/null +++ b/lucene-java-3.4.0/lucene/CHANGES.txt @@ -0,0 +1,5565 @@ +Lucene Change Log + +For more information on past and future Lucene versions, please see: +http://s.apache.org/luceneversions + +======================= Lucene 3.4.0 ======================= + +Bug fixes + +* LUCENE-3251: Directory#copy failed to close target output if opening the + source stream failed. (Simon Willnauer) + +* LUCENE-3255: If segments_N file is all zeros (due to file + corruption), don't read that to mean the index is empty. (Gregory + Tarr, Mark Harwood, Simon Willnauer, Mike McCandless) + +* LUCENE-3254: Fixed minor bug in deletes were written to disk, + causing the file to sometimes be larger than it needed to be. (Mike + McCandless) + +* LUCENE-3224: Fixed a big where CheckIndex would incorrectly report a + corrupt index if a term with docfreq >= 16 was indexed more than once + at the same position. (Robert Muir) + +* LUCENE-3334: If Java7 is detected, IOUtils.closeSafely() will log + suppressed exceptions in the original exception, so stack trace + will contain them. (Uwe Schindler) + +* LUCENE-3339: Fixed deadlock case when multiple threads use the new + block-add (IndexWriter.add/updateDocuments) methods. (Robert Muir, + Mike McCandless) + +* LUCENE-3340: Fixed case where IndexWriter was not flushing at + exactly maxBufferedDeleteTerms (Mike McCandless) + +* LUCENE-3358, LUCENE-3361: StandardTokenizer and UAX29URLEmailTokenizer + wrongly discarded combining marks attached to Han or Hiragana characters, + this is fixed if you supply Version >= 3.4 If you supply a previous + lucene version, you get the old buggy behavior for backwards compatibility. + (Trejkaz, Robert Muir) + +* LUCENE-3368: IndexWriter commits segments without applying their buffered + deletes when flushing concurrently. (Simon Willnauer, Mike McCandless) + +* LUCENE-3365: Create or Append mode determined before obtaining write lock + can cause IndexWriter overriding an existing index. + (Geoff Cooney via Simon Willnauer) + +* LUCENE-3380: Fixed a bug where FileSwitchDirectory's listAll() would wrongly + throw NoSuchDirectoryException when all files written so far have been + written to one directory, but the other still has not yet been created on the + filesystem. (Robert Muir) + +* LUCENE-3402: term vectors disappeared from the index if optimize() was called + following addIndexes(). (Shai Erera) + +* LUCENE-3409: IndexWriter.deleteAll was failing to close pooled NRT + SegmentReaders, leading to unused files accumulating in the + Directory. (tal steier via Mike McCandless) + +* LUCENE-3390: Added SortField.setMissingValue(v) to enable well defined + sorting behavior for documents that do not include the given field. + (Gilad Barkai via Doron Cohen) + +* LUCENE-3418: Lucene was failing to fsync index files on commit, + meaning an operating system or hardware crash, or power loss, could + easily corrupt the index. (Mark Miller, Robert Muir, Mike + McCandless) + +New Features + +* LUCENE-3290: Added FieldInvertState.numUniqueTerms + (Mike McCandless, Robert Muir) + +* LUCENE-3280: Add FixedBitSet, like OpenBitSet but is not elastic + (grow on demand if you set/get/clear too-large indices). (Mike + McCandless) + +* LUCENE-2048: Added the ability to omit positions but still index + term frequencies, you can now control what is indexed into + the postings via AbstractField.setIndexOptions: + DOCS_ONLY: only documents are indexed: term frequencies and positions are omitted + DOCS_AND_FREQS: only documents and term frequencies are indexed: positions are omitted + DOCS_AND_FREQS_AND_POSITIONS: full postings: documents, frequencies, and positions + AbstractField.setOmitTermFrequenciesAndPositions is deprecated, + you should use DOCS_ONLY instead. (Robert Muir) + +* LUCENE-3097: Added a new grouping collector that can be used to retrieve all most relevant + documents per group. This can be useful in situations when one wants to compute grouping + based facets / statistics on the complete query result. (Martijn van Groningen) + +Optimizations + +* LUCENE-3289: When building an FST you can now tune how aggressively + the FST should try to share common suffixes. Typically you can + greatly reduce RAM required during building, and CPU consumed, at + the cost of a somewhat larger FST. (Mike McCandless) + +Test Cases + +* LUCENE-3327: Fix AIOOBE when TestFSTs is run with + -Dtests.verbose=true (James Dyer via Mike McCandless) + +Build + +* LUCENE-3406: Add ant target 'package-local-src-tgz' to Lucene and Solr + to package sources from the local working copy. + (Seung-Yeoul Yang via Steve Rowe) + + +======================= Lucene 3.3.0 ======================= + +Changes in backwards compatibility policy + +* LUCENE-3140: IndexOutput.copyBytes now takes a DataInput (superclass + of IndexInput) as its first argument. (Robert Muir, Dawid Weiss, + Mike McCandless) + +* LUCENE-3191: FieldComparator.value now returns an Object not + Comparable; FieldDoc.fields also changed from Comparable[] to + Object[] (Uwe Schindler, Mike McCandless) + +* LUCENE-3208: Made deprecated methods Query.weight(Searcher) and + Searcher.createWeight() final to prevent override. If you have + overridden one of these methods, cut over to the non-deprecated + implementation. (Uwe Schindler, Robert Muir, Yonik Seeley) + +* LUCENE-3238: Made MultiTermQuery.rewrite() final, to prevent + problems (such as not properly setting rewrite methods, or + not working correctly with things like SpanMultiTermQueryWrapper). + To rewrite to a simpler form, instead return a simpler enum + from getEnum(IndexReader). For example, to rewrite to a single term, + return a SingleTermEnum. (ludovic Boutros, Uwe Schindler, Robert Muir) + +Changes in runtime behavior + +* LUCENE-2834: the hash used to compute the lock file name when the + lock file is not stored in the index has changed. This means you + will see a different lucene-XXX-write.lock in your lock directory. + (Robert Muir, Uwe Schindler, Mike McCandless) + +* LUCENE-3146: IndexReader.setNorm throws IllegalStateException if the field + does not store norms. (Shai Erera, Mike McCandless) + +* LUCENE-3198: On Linux, if the JRE is 64 bit and supports unmapping, + FSDirectory.open now defaults to MMapDirectory instead of + NIOFSDirectory since MMapDirectory gives better performance. (Mike + McCandless) + +* LUCENE-3200: MMapDirectory now uses chunk sizes that are powers of 2. + When setting the chunk size, it is rounded down to the next possible + value. The new default value for 64 bit platforms is 2^30 (1 GiB), + for 32 bit platforms it stays unchanged at 2^28 (256 MiB). + Internally, MMapDirectory now only uses one dedicated final IndexInput + implementation supporting multiple chunks, which makes Hotspot's life + easier. (Uwe Schindler, Robert Muir, Mike McCandless) + +Bug fixes + +* LUCENE-3147,LUCENE-3152: Fixed open file handles leaks in many places in the + code. Now MockDirectoryWrapper (in test-framework) tracks all open files, + including locks, and fails if the test fails to release all of them. + (Mike McCandless, Robert Muir, Shai Erera, Simon Willnauer) + +* LUCENE-3102: CachingCollector.replay was failing to call setScorer + per-segment (Martijn van Groningen via Mike McCandless) + +* LUCENE-3183: Fix rare corner case where seeking to empty term + (field="", term="") with terms index interval 1 could hit + ArrayIndexOutOfBoundsException (selckin, Robert Muir, Mike + McCandless) + +* LUCENE-3208: IndexSearcher had its own private similarity field + and corresponding get/setter overriding Searcher's implementation. If you + setted a different Similarity instance on IndexSearcher, methods implemented + in the superclass Searcher were not using it, leading to strange bugs. + (Uwe Schindler, Robert Muir) + +* LUCENE-3197: Fix core merge policies to not over-merge during + background optimize when documents are still being deleted + concurrently with the optimize (Mike McCandless) + +* LUCENE-3222: The RAM accounting for buffered delete terms was + failing to measure the space required to hold the term's field and + text character data. (Mike McCandless) + +* LUCENE-3238: Fixed bug where using WildcardQuery("prefix*") inside + of a SpanMultiTermQueryWrapper rewrote incorrectly and returned + an error instead. (ludovic Boutros, Uwe Schindler, Robert Muir) + +API Changes + +* LUCENE-3208: Renamed protected IndexSearcher.createWeight() to expert + public method IndexSearcher.createNormalizedWeight() as this better describes + what this method does. The old method is still there for backwards + compatibility. Query.weight() was deprecated and simply delegates to + IndexSearcher. Both deprecated methods will be removed in Lucene 4.0. + (Uwe Schindler, Robert Muir, Yonik Seeley) + +* LUCENE-3197: MergePolicy.findMergesForOptimize now takes + Map instead of Set as the second + argument, so the merge policy knows which segments were originally + present vs produced by an optimizing merge (Mike McCandless) + +Optimizations + +* LUCENE-1736: DateTools.java general improvements. + (David Smiley via Steve Rowe) + +New Features + +* LUCENE-3140: Added experimental FST implementation to Lucene. + (Robert Muir, Dawid Weiss, Mike McCandless) + +* LUCENE-3193: A new TwoPhaseCommitTool allows running a 2-phase commit + algorithm over objects that implement the new TwoPhaseCommit interface (such + as IndexWriter). (Shai Erera) + +* LUCENE-3191: Added TopDocs.merge, to facilitate merging results from + different shards (Uwe Schindler, Mike McCandless) + +* LUCENE-3179: Added OpenBitSet.prevSetBit (Paul Elschot via Mike McCandless) + +* LUCENE-3210: Made TieredMergePolicy more aggressive in reclaiming + segments with deletions; added new methods + set/getReclaimDeletesWeight to control this. (Mike McCandless) + +Build + +* LUCENE-1344: Create OSGi bundle using dev-tools/maven. + (Nicolas Lalevée, Luca Stancapiano via ryan) + +* LUCENE-3204: The maven-ant-tasks jar is now included in the source tree; + users of the generate-maven-artifacts target no longer have to manually + place this jar in the Ant classpath. NOTE: when Ant looks for the + maven-ant-tasks jar, it looks first in its pre-existing classpath, so + any copies it finds will be used instead of the copy included in the + Lucene/Solr source tree. For this reason, it is recommeded to remove + any copies of the maven-ant-tasks jar in the Ant classpath, e.g. under + ~/.ant/lib/ or under the Ant installation's lib/ directory. (Steve Rowe) + + +======================= Lucene 3.2.0 ======================= + +Changes in backwards compatibility policy + +* LUCENE-2953: PriorityQueue's internal heap was made private, as subclassing + with generics can lead to ClassCastException. For advanced use (e.g. in Solr) + a method getHeapArray() was added to retrieve the internal heap array as a + non-generic Object[]. (Uwe Schindler, Yonik Seeley) + +* LUCENE-1076: IndexWriter.setInfoStream now throws IOException + (Mike McCandless, Shai Erera) + +* LUCENE-3084: MergePolicy.OneMerge.segments was changed from + SegmentInfos to a List. SegmentInfos itsself was changed + to no longer extend Vector (to update code that is using + Vector-API, use the new asList() and asSet() methods returning unmodifiable + collections; modifying SegmentInfos is now only possible through + the explicitely declared methods). IndexWriter.segString() now takes + Iterable instead of List. A simple recompile + should fix this. MergePolicy and SegmentInfos are internal/experimental + APIs not covered by the strict backwards compatibility policy. + (Uwe Schindler, Mike McCandless) + +Changes in runtime behavior + +* LUCENE-3065: When a NumericField is retrieved from a Document loaded + from IndexReader (or IndexSearcher), it will now come back as + NumericField not as a Field with a string-ified version of the + numeric value you had indexed. Note that this only applies for + newly-indexed Documents; older indices will still return Field + with the string-ified numeric value. If you call Document.get(), + the value comes still back as String, but Document.getFieldable() + returns NumericField instances. (Uwe Schindler, Ryan McKinley, + Mike McCandless) + +* LUCENE-1076: Changed the default merge policy from + LogByteSizeMergePolicy to TieredMergePolicy, as of Version.LUCENE_32 + (passed to IndexWriterConfig), which is able to merge non-contiguous + segments. This means docIDs no longer necessarily stay "in order" + during indexing. If this is a problem then you can use either of + the LogMergePolicy impls. (Mike McCandless) + +New features + +* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader + that allows to upgrade all segments to last recent supported index + format without fully optimizing. (Uwe Schindler, Mike McCandless) + +* LUCENE-1076: Added TieredMergePolicy which is able to merge non-contiguous + segments, which means docIDs no longer necessarily stay "in order". + (Mike McCandless, Shai Erera) + +* LUCENE-3071: Adding ReversePathHierarchyTokenizer, added skip parameter to + PathHierarchyTokenizer (Olivier Favre via ryan) + +* LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache + document IDs and scores encountered during the search, and "replay" them to + another Collector. (Mike McCandless, Shai Erera) + +* LUCENE-3112: Added experimental IndexWriter.add/updateDocuments, + enabling a block of documents to be indexed, atomically, with + guaranteed sequential docIDs. (Mike McCandless) + +API Changes + +* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public + (though @lucene.experimental), allowing for custom MergeScheduler + implementations. (Shai Erera) + +* LUCENE-3065: Document.getField() was deprecated, as it throws + ClassCastException when loading lazy fields or NumericFields. + (Uwe Schindler, Ryan McKinley, Mike McCandless) + +* LUCENE-2027: Directory.touchFile is deprecated and will be removed + in 4.0. (Mike McCandless) + +Optimizations + +* LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early + on empty or one-element lists/arrays. (Uwe Schindler) + +* LUCENE-2897: Apply deleted terms while flushing a segment. We still + buffer deleted terms to later apply to past segments. (Mike McCandless) + +* LUCENE-3126: IndexWriter.addIndexes copies incoming segments into CFS if they + aren't already and MergePolicy allows that. (Shai Erera) + +Bug fixes + +* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new + indexes, causing existing deletions to be applied on the incoming indexes as + well. (Shai Erera, Mike McCandless) + +* LUCENE-3024: Index with more than 2.1B terms was hitting AIOOBE when + seeking TermEnum (eg used by Solr's faceting) (Tom Burton-West, Mike + McCandless) + +* LUCENE-3042: When a filter or consumer added Attributes to a TokenStream + chain after it was already (partly) consumed [or clearAttributes(), + captureState(), cloneAttributes(),... was called by the Tokenizer], + the Tokenizer calling clearAttributes() or capturing state after addition + may not do this on the newly added Attribute. This bug affected only + very special use cases of the TokenStream-API, most users would not + have recognized it. (Uwe Schindler, Robert Muir) + +* LUCENE-3054: PhraseQuery can in some cases stack overflow in + SorterTemplate.quickSort(). This fix also adds an optimization to + PhraseQuery as term with lower doc freq will also have less positions. + (Uwe Schindler, Robert Muir, Otis Gospodnetic) + +* LUCENE-3068: sloppy phrase query failed to match valid documents when multiple + query terms had same position in the query. (Doron Cohen) + +* LUCENE-3012: Lucene writes the header now for separate norm files (*.sNNN) + (Robert Muir) + +Build + +* LUCENE-3006: Building javadocs will fail on warnings by default. + Override with -Dfailonjavadocwarning=false (sarowe, gsingers) + +* LUCENE-3128: "ant eclipse" creates a .project file for easier Eclipse + integration (unless one already exists). (Daniel Serodio via Shai Erera) + +Test Cases + +* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to + stop iterating if at least 'tests.iter.min' ran and a failure occured. + (Shai Erera, Chris Hostetter) + +======================= Lucene 3.1.0 ======================= + +Changes in backwards compatibility policy + +* LUCENE-2719: Changed API of internal utility class + org.apache.lucene.util.SorterTemplate to support faster quickSort using + pivot values and also merge sort and insertion sort. If you have used + this class, you have to implement two more methods for handling pivots. + (Uwe Schindler, Robert Muir, Mike McCandless) + +* LUCENE-1923: Renamed SegmentInfo & SegmentInfos segString method to + toString. These are advanced APIs and subject to change suddenly. + (Tim Smith via Mike McCandless) + +* LUCENE-2190: Removed deprecated customScore() and customExplain() + methods from experimental CustomScoreQuery. (Uwe Schindler) + +* LUCENE-2286: Enabled DefaultSimilarity.setDiscountOverlaps by default. + This means that terms with a position increment gap of zero do not + affect the norms calculation by default. (Robert Muir) + +* LUCENE-2320: MergePolicy.writer is now of type SetOnce, which allows setting + the IndexWriter for a MergePolicy exactly once. You can change references to + 'writer' from writer.doXYZ() to writer.get().doXYZ() + (it is also advisable to add an assert writer != null; before you + access the wrapped IndexWriter.) + + In addition, MergePolicy only exposes a default constructor, and the one that + took IndexWriter as argument has been removed from all MergePolicy extensions. + (Shai Erera via Mike McCandless) + +* LUCENE-2328: SimpleFSDirectory.SimpleFSIndexInput is moved to + FSDirectory.FSIndexInput. Anyone extending this class will have to + fix their code on upgrading. (Earwin Burrfoot via Mike McCandless) + +* LUCENE-2302: The new interface for term attributes, CharTermAttribute, + now implements CharSequence. This requires the toString() methods of + CharTermAttribute, deprecated TermAttribute, and Token to return only + the term text and no other attribute contents. LUCENE-2374 implements + an attribute reflection API to no longer rely on toString() for attribute + inspection. (Uwe Schindler, Robert Muir) + +* LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer, + PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed + the now obsolete and deprecated Analyzer.setOverridesTokenStreamMethod(). + Analyzer and TokenStream base classes now have an assertion in their ctor, + that check subclasses to be final or at least have final implementations + of incrementToken(), tokenStream(), and reusableTokenStream(). + (Uwe Schindler, Robert Muir) + +* LUCENE-2316: Directory.fileLength contract was clarified - it returns the + actual file's length if the file exists, and throws FileNotFoundException + otherwise. Returning length=0 for a non-existent file is no longer allowed. If + you relied on that, make sure to catch the exception. (Shai Erera) + +* LUCENE-2386: IndexWriter no longer performs an empty commit upon new index + creation. Previously, if you passed an empty Directory and set OpenMode to + CREATE*, IndexWriter would make a first empty commit. If you need that + behavior you can call writer.commit()/close() immediately after you create it. + (Shai Erera, Mike McCandless) + +* LUCENE-2733: Removed public constructors of utility classes with only static + methods to prevent instantiation. (Uwe Schindler) + +* LUCENE-2602: The default (LogByteSizeMergePolicy) merge policy now + takes deletions into account by default. You can disable this by + calling setCalibrateSizeByDeletes(false) on the merge policy. (Mike + McCandless) + +* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty + values in multi-valued field has been changed for some cases in index. + If you index empty fields and uses positions/offsets information on that + fields, reindex is recommended. (David Smiley, Koji Sekiguchi) + +* LUCENE-2804: Directory.setLockFactory new declares throwing an IOException. + (Shai Erera, Robert Muir) + +* LUCENE-2837: Added deprecations noting that in 4.0, Searcher and + Searchable are collapsed into IndexSearcher; contrib/remote and + MultiSearcher have been removed. (Mike McCandless) + +* LUCENE-2854: Deprecated SimilarityDelegator and + Similarity.lengthNorm; the latter is now final, forcing any custom + Similarity impls to cutover to the more general computeNorm (Robert + Muir, Mike McCandless) + +* LUCENE-2869: Deprecated Query.getSimilarity: instead of using + "runtime" subclassing/delegation, subclass the Weight instead. + (Robert Muir) + +* LUCENE-2674: A new idfExplain method was added to Similarity, that + accepts an incoming docFreq. If you subclass Similarity, make sure + you also override this method on upgrade. (Robert Muir, Mike + McCandless) + +Changes in runtime behavior + +* LUCENE-1923: Made IndexReader.toString() produce something + meaningful (Tim Smith via Mike McCandless) + +* LUCENE-2179: CharArraySet.clear() is now functional. + (Robert Muir, Uwe Schindler) + +* LUCENE-2455: IndexWriter.addIndexes no longer optimizes the target index + before it adds the new ones. Also, the existing segments are not merged and so + the index will not end up with a single segment (unless it was empty before). + In addition, addIndexesNoOptimize was renamed to addIndexes and no longer + invokes a merge on the incoming and target segments, but instead copies the + segments to the target index. You can call maybeMerge or optimize after this + method completes, if you need to. + + In addition, Directory.copyTo* were removed in favor of copy which takes the + target Directory, source and target files as arguments, and copies the source + file to the target Directory under the target file name. (Shai Erera) + +* LUCENE-2663: IndexWriter no longer forcefully clears any existing + locks when create=true. This was a holdover from when + SimpleFSLockFactory was the default locking implementation, and, + even then it was dangerous since it could mask bugs in IndexWriter's + usage, allowing applications to accidentally open two writers on the + same directory. (Mike McCandless) + +* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on + LogMergePolicy now affect optimize() as well (as opposed to only regular + merges). This means that you can run optimize() and too large segments won't + be merged. (Shai Erera) + +* LUCENE-2753: IndexReader and DirectoryReader .listCommits() now return a List, + guaranteeing the commits are sorted from oldest to latest. (Shai Erera) + +* LUCENE-2785: TopScoreDocCollector, TopFieldCollector and + the IndexSearcher search methods that take an int nDocs will now + throw IllegalArgumentException if nDocs is 0. Instead, you should + use the newly added TotalHitCountCollector. (Mike McCandless) + +* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio + to determine whether the passed in segment should be compound. + (Shai Erera, Earwin Burrfoot) + +* LUCENE-2805: IndexWriter now increments the index version on every change to + the index instead of for every commit. Committing or closing the IndexWriter + without any changes to the index will not cause any index version increment. + (Simon Willnauer, Mike McCandless) + +* LUCENE-2650, LUCENE-2825: The behavior of FSDirectory.open has changed. On 64-bit + Windows and Solaris systems that support unmapping, FSDirectory.open returns + MMapDirectory. Additionally the behavior of MMapDirectory has been + changed to enable unmapping by default if supported by the JRE. + (Mike McCandless, Uwe Schindler, Robert Muir) + +* LUCENE-2829: Improve the performance of "primary key" lookup use + case (running a TermQuery that matches one document) on a + multi-segment index. (Robert Muir, Mike McCandless) + +* LUCENE-2010: Segments with 100% deleted documents are now removed on + IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless) + +* LUCENE-2960: Allow some changes to IndexWriterConfig to take effect + "live" (after an IW is instantiated), via + IndexWriter.getConfig().setXXX(...) (Shay Banon, Mike McCandless) + +API Changes + +* LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George + Aroush via Mike McCandless) + +* LUCENE-1260: Change norm encode (float->byte) and decode + (byte->float) to be instance methods not static methods. This way a + custom Similarity can alter how norms are encoded, though they must + still be encoded as a single byte (Johan Kindgren via Mike + McCandless) + +* LUCENE-2103: NoLockFactory should have a private constructor; + until Lucene 4.0 the default one will be deprecated. + (Shai Erera via Uwe Schindler) + +* LUCENE-2177: Deprecate the Field ctors that take byte[] and Store. + Since the removal of compressed fields, Store can only be YES, so + it's not necessary to specify. (Erik Hatcher via Mike McCandless) + +* LUCENE-2200: Several final classes had non-overriding protected + members. These were converted to private and unused protected + constructors removed. (Steven Rowe via Robert Muir) + +* LUCENE-2240: SimpleAnalyzer and WhitespaceAnalyzer now have + Version ctors. (Simon Willnauer via Uwe Schindler) + +* LUCENE-2259: Add IndexWriter.deleteUnusedFiles, to attempt removing + unused files. This is only useful on Windows, which prevents + deletion of open files. IndexWriter will eventually remove these + files itself; this method just lets you do so when you know the + files are no longer open by IndexReaders. (luocanrao via Mike + McCandless) + +* LUCENE-2282: IndexFileNames is exposed as a public class allowing for easier + use by external code. In addition it offers a matchExtension method which + callers can use to query whether a certain file matches a certain extension. + (Shai Erera via Mike McCandless) + +* LUCENE-124: Add a TopTermsBoostOnlyBooleanQueryRewrite to MultiTermQuery. + This rewrite method is similar to TopTermsScoringBooleanQueryRewrite, but + only scores terms by their boost values. For example, this can be used + with FuzzyQuery to ensure that exact matches are always scored higher, + because only the boost will be used in scoring. (Robert Muir) + +* LUCENE-2015: Add a static method foldToASCII to ASCIIFoldingFilter to + expose its folding logic. (Cédrik Lime via Robert Muir) + +* LUCENE-2294: IndexWriter constructors have been deprecated in favor of a + single ctor which accepts IndexWriterConfig and a Directory. You can set all + the parameters related to IndexWriter on IndexWriterConfig. The different + setter/getter methods were deprecated as well. One should call + writer.getConfig().getXYZ() to query for a parameter XYZ. + Additionally, the setter/getter related to MergePolicy were deprecated as + well. One should interact with the MergePolicy directly. + (Shai Erera via Mike McCandless) + +* LUCENE-2320: IndexWriter's MergePolicy configuration was moved to + IndexWriterConfig and the respective methods on IndexWriter were deprecated. + (Shai Erera via Mike McCandless) + +* LUCENE-2328: Directory now keeps track itself of the files that are written + but not yet fsynced. The old Directory.sync(String file) method is deprecated + and replaced with Directory.sync(Collection files). Take a look at + FSDirectory to see a sample of how such tracking might look like, if needed + in your custom Directories. (Earwin Burrfoot via Mike McCandless) + +* LUCENE-2302: Deprecated TermAttribute and replaced by a new + CharTermAttribute. The change is backwards compatible, so + mixed new/old TokenStreams all work on the same char[] buffer + independent of which interface they use. CharTermAttribute + has shorter method names and implements CharSequence and + Appendable. This allows usage like Java's StringBuilder in + addition to direct char[] access. Also terms can directly be + used in places where CharSequence is allowed (e.g. regular + expressions). + (Uwe Schindler, Robert Muir) + +* LUCENE-2402: IndexWriter.deleteUnusedFiles now deletes unreferenced commit + points too. If you use an IndexDeletionPolicy which holds onto index commits + (such as SnapshotDeletionPolicy), you can call this method to remove those + commit points when they are not needed anymore (instead of waiting for the + next commit). (Shai Erera) + +* LUCENE-2481: SnapshotDeletionPolicy.snapshot() and release() were replaced + with equivalent ones that take a String (id) as argument. You can pass + whatever ID you want, as long as you use the same one when calling both. + (Shai Erera) + +* LUCENE-2356: Add IndexWriterConfig.set/getReaderTermIndexDivisor, to + set what IndexWriter passes for termsIndexDivisor to the readers it + opens internally when apply deletions or creating a near-real-time + reader. (Earwin Burrfoot via Mike McCandless) + +* LUCENE-2167,LUCENE-2699,LUCENE-2763,LUCENE-2847: StandardTokenizer/Analyzer + in common/standard/ now implement the Word Break rules from the Unicode 6.0.0 + Text Segmentation algorithm (UAX#29), covering the full range of Unicode code + points, including values from U+FFFF to U+10FFFF + + ClassicTokenizer/Analyzer retains the old (pre-Lucene 3.1) StandardTokenizer/ + Analyzer implementation and behavior. Only the Unicode Basic Multilingual + Plane (code points from U+0000 to U+FFFF) is covered. + + UAX29URLEmailTokenizer tokenizes URLs and E-mail addresses according to the + relevant RFCs, in addition to implementing the UAX#29 Word Break rules. + (Steven Rowe, Robert Muir, Uwe Schindler) + +* LUCENE-2778: RAMDirectory now exposes newRAMFile() which allows to override + and return a different RAMFile implementation. (Shai Erera) + +* LUCENE-2785: Added TotalHitCountCollector whose sole purpose is to + count the number of hits matching the query. (Mike McCandless) + +* LUCENE-2846: Deprecated IndexReader.setNorm(int, String, float). This method + is only syntactic sugar for setNorm(int, String, byte), but using the global + Similarity.getDefault().encodeNormValue(). Use the byte-based method instead + to ensure that the norm is encoded with your Similarity. + (Robert Muir, Mike McCandless) + +* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the + contents of AttributeImpl and AttributeSource using a well-defined API. + This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes + in a structured way. + There are also some backwards incompatible changes in toString() output, + as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute + leading to changed toString() return values. The new API allows to get a + string representation in a well-defined way using a new method + reflectAsString(). For backwards compatibility reasons, when toString() + was implemented by implementation subclasses, the default implementation of + AttributeImpl.reflectWith() uses toString()s output instead to report the + Attribute's properties. Otherwise, reflectWith() uses Java's reflection + (like toString() did before) to get the attribute properties. + In addition, the mandatory equals() and hashCode() are no longer required + for AttributeImpls, but can still be provided (if needed). + (Uwe Schindler) + +* LUCENE-2691: Deprecate IndexWriter.getReader in favor of + IndexReader.open(IndexWriter) (Grant Ingersoll, Mike McCandless) + +* LUCENE-2876: Deprecated Scorer.getSimilarity(). If your Scorer uses a Similarity, + it should keep it itself. Fixed Scorers to pass their parent Weight, so that + Scorer.visitSubScorers (LUCENE-2590) will work correctly. + (Robert Muir, Doron Cohen) + +* LUCENE-2900: When opening a near-real-time (NRT) reader + (IndexReader.re/open(IndexWriter)) you can now specify whether + deletes should be applied. Applying deletes can be costly, and some + expert use cases can handle seeing deleted documents returned. The + deletes remain buffered so that the next time you open an NRT reader + and pass true, all deletes will be a applied. (Mike McCandless) + +* LUCENE-1253: LengthFilter (and Solr's KeepWordTokenFilter) now + require up front specification of enablePositionIncrement. Together with + StopFilter they have a common base class (FilteringTokenFilter) that handles + the position increments automatically. Implementors only need to override an + accept() method that filters tokens. (Uwe Schindler, Robert Muir) + +Bug fixes + +* LUCENE-2249: ParallelMultiSearcher should shut down thread pool on + close. (Martin Traverso via Uwe Schindler) + +* LUCENE-2273: FieldCacheImpl.getCacheEntries() used WeakHashMap + incorrectly and lead to ConcurrentModificationException. + (Uwe Schindler, Robert Muir) + +* LUCENE-2328: Index files fsync tracking moved from + IndexWriter/IndexReader to Directory, and it no longer leaks memory. + (Earwin Burrfoot via Mike McCandless) + +* LUCENE-2074: Reduce buffer size of lexer back to default on reset. + (Ruben Laguna, Shai Erera via Uwe Schindler) + +* LUCENE-2496: Don't throw NPE if IndexWriter is opened with CREATE on + a prior (corrupt) index missing its segments_N file. (Mike + McCandless) + +* LUCENE-2458: QueryParser no longer automatically forms phrase queries, + assuming whitespace tokenization. Previously all CJK queries, for example, + would be turned into phrase queries. The old behavior is preserved with + the matchVersion parameter for previous versions. Additionally, you can + explicitly enable the old behavior with setAutoGeneratePhraseQueries(true) + (Robert Muir) + +* LUCENE-2537: FSDirectory.copy() implementation was unsafe and could result in + OOM if a large file was copied. (Shai Erera) + +* LUCENE-2580: MultiPhraseQuery throws AIOOBE if number of positions + exceeds number of terms at one position (Jayendra Patil via Mike McCandless) + +* LUCENE-2617: Optional clauses of a BooleanQuery were not factored + into coord if the scorer for that segment returned null. This + can cause the same document to score to differently depending on + what segment it resides in. (yonik) + +* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll) + +* LUCENE-2732: Fix charset problems in XML loading in + HyphenationCompoundWordTokenFilter. (Uwe Schindler) + +* LUCENE-2802: NRT DirectoryReader returned incorrect values from + getVersion, isOptimized, getCommitUserData, getIndexCommit and isCurrent due + to a mutable reference to the IndexWriters SegmentInfos. + (Simon Willnauer, Earwin Burrfoot) + +* LUCENE-2852: Fixed corner case in RAMInputStream that would hit a + false EOF after seeking to EOF then seeking back to same block you + were just in and then calling readBytes (Robert Muir, Mike McCandless) + +* LUCENE-2860: Fixed SegmentInfo.sizeInBytes to factor includeDocStores when it + decides whether to return the cached computed size or not. (Shai Erera) + +* LUCENE-2584: SegmentInfo.files() could hit ConcurrentModificationException if + called by multiple threads. (Alexander Kanarsky via Shai Erera) + +* LUCENE-2809: Fixed IndexWriter.numDocs to take into account + applied but not yet flushed deletes. (Mike McCandless) + +* LUCENE-2879: MultiPhraseQuery previously calculated its phrase IDF by summing + internally, it now calls Similarity.idfExplain(Collection, IndexSearcher). + (Robert Muir) + +* LUCENE-2693: RAM used by IndexWriter was slightly incorrectly computed. + (Jason Rutherglen via Shai Erera) + +* LUCENE-1846: DateTools now uses the US locale everywhere, so DateTools.round() + is safe also in strange locales. (Uwe Schindler) + +* LUCENE-2891: IndexWriterConfig did not accept -1 in setReaderTermIndexDivisor, + which can be used to prevent loading the terms index into memory. (Shai Erera) + +* LUCENE-2937: Encoding a float into a byte (e.g. encoding field norms during + indexing) had an underflow detection bug that caused floatToByte(f)==0 where + f was greater than 0, but slightly less than byteToFloat(1). This meant that + certain very small field norms (index_boost * length_norm) could have + been rounded down to 0 instead of being rounded up to the smallest + positive number. (yonik) + +* LUCENE-2936: PhraseQuery score explanations were not correctly + identifying matches vs non-matches. (hossman) + +* LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if + the underlying readByte() is inlined (which happens e.g. in MMapDirectory). + The loop was unwinded which makes the hotspot bug disappear. + (Uwe Schindler, Robert Muir, Mike McCandless) + +New features + +* LUCENE-2128: Parallelized fetching document frequencies during weight + creation. (Israel Tsadok, Simon Willnauer via Uwe Schindler) + +* LUCENE-2069: Added Unicode 4 support to CharArraySet. Due to the switch + to Java 5, supplementary characters are now lowercased correctly if the + set is created as case insensitive. + CharArraySet now requires a Version argument to preserve + backwards compatibility. If Version < 3.1 is passed to the constructor, + CharArraySet yields the old behavior. (Simon Willnauer) + +* LUCENE-2069: Added Unicode 4 support to LowerCaseFilter. Due to the switch + to Java 5, supplementary characters are now lowercased correctly. + LowerCaseFilter now requires a Version argument to preserve + backwards compatibility. If Version < 3.1 is passed to the constructor, + LowerCaseFilter yields the old behavior. (Simon Willnauer, Robert Muir) + +* LUCENE-2034: Added ReusableAnalyzerBase, an abstract subclass of Analyzer + that makes it easier to reuse TokenStreams correctly. This issue also added + StopwordAnalyzerBase, which improves consistency of all Analyzers that use + stopwords, and implement many analyzers in contrib with it. + (Simon Willnauer via Robert Muir) + +* LUCENE-2198, LUCENE-2901: Support protected words in stemming TokenFilters using a + new KeywordAttribute. (Simon Willnauer, Drew Farris via Uwe Schindler) + +* LUCENE-2183, LUCENE-2240, LUCENE-2241: Added Unicode 4 support + to CharTokenizer and its subclasses. CharTokenizer now has new + int-API which is conditionally preferred to the old char-API depending + on the provided Version. Version < 3.1 will use the char-API. + (Simon Willnauer via Uwe Schindler) + +* LUCENE-2247: Added a CharArrayMap for performance improvements + in some stemmers and synonym filters. (Uwe Schindler) + +* LUCENE-2320: Added SetOnce which wraps an object and allows it to be set + exactly once. (Shai Erera via Mike McCandless) + +* LUCENE-2314: Added AttributeSource.copyTo(AttributeSource) that + allows to use cloneAttributes() and this method as a replacement + for captureState()/restoreState(), if the state itself + needs to be inspected/modified. (Uwe Schindler) + +* LUCENE-2293: Expose control over max number of threads that + IndexWriter will allow to run concurrently while indexing + documents (previously this was hardwired to 5), using + IndexWriterConfig.setMaxThreadStates. (Mike McCandless) + +* LUCENE-2297: Enable turning on reader pooling inside IndexWriter + even when getReader (near-real-timer reader) is not in use, through + IndexWriterConfig.enable/disableReaderPooling. (Mike McCandless) + +* LUCENE-2331: Add NoMergePolicy which never returns any merges to execute. In + addition, add NoMergeScheduler which never executes any merges. These two are + convenient classes in case you want to disable segment merges by IndexWriter + without tweaking a particular MergePolicy parameters, such as mergeFactor. + MergeScheduler's methods are now public. (Shai Erera via Mike McCandless) + +* LUCENE-2339: Deprecate static method Directory.copy in favor of + Directory.copyTo, and use nio's FileChannel.transferTo when copying + files between FSDirectory instances. (Earwin Burrfoot via Mike + McCandless). + +* LUCENE-2074: Make StandardTokenizer fit for Unicode 4.0, if the + matchVersion parameter is Version.LUCENE_31. (Uwe Schindler) + +* LUCENE-2385: Moved NoDeletionPolicy from benchmark to core. NoDeletionPolicy + can be used to prevent commits from ever getting deleted from the index. + (Shai Erera) + +* LUCENE-1585: IndexWriter now accepts a PayloadProcessorProvider which can + return a DirPayloadProcessor for a given Directory, which returns a + PayloadProcessor for a given Term. The PayloadProcessor will be used to + process the payloads of the segments as they are merged (e.g. if one wants to + rewrite payloads of external indexes as they are added, or of local ones). + (Shai Erera, Michael Busch, Mike McCandless) + +* LUCENE-2440: Add support for custom ExecutorService in + ParallelMultiSearcher (Edward Drapkin via Mike McCandless) + +* LUCENE-2295: Added a LimitTokenCountAnalyzer / LimitTokenCountFilter + to wrap any other Analyzer and provide the same functionality as + MaxFieldLength provided on IndexWriter. This patch also fixes a bug + in the offset calculation in CharTokenizer. (Uwe Schindler, Shai Erera) + +* LUCENE-2526: Don't throw NPE from MultiPhraseQuery.toString when + it's empty. (Ross Woolf via Mike McCandless) + +* LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike + McCandless) + +* LUCENE-2590: Added Scorer.visitSubScorers, and Scorer.freq. Along + with a custom Collector these experimental methods make it possible + to gather the hit-count per sub-clause and per document while a + search is running. (Simon Willnauer, Mike McCandless) + +* LUCENE-2636: Added MultiCollector which allows running the search with several + Collectors. (Shai Erera) + +* LUCENE-2754, LUCENE-2757: Added a wrapper around MultiTermQueries + to add span support: SpanMultiTermQueryWrapper. + Using this wrapper its easy to add fuzzy/wildcard to e.g. a SpanNearQuery. + (Robert Muir, Uwe Schindler) + +* LUCENE-2838: ConstantScoreQuery now directly supports wrapping a Query + instance for stripping off scores. The use of a QueryWrapperFilter + is no longer needed and discouraged for that use case. Directly wrapping + Query improves performance, as out-of-order collection is now supported. + (Uwe Schindler) + +* LUCENE-2864: Add getMaxTermFrequency (maximum within-document TF) to + FieldInvertState so that it can be used in Similarity.computeNorm. + (Robert Muir) + +* LUCENE-2720: Segments now record the code version which created them. + (Shai Erera, Mike McCandless, Uwe Schindler) + +* LUCENE-2474: Added expert ReaderFinishedListener API to + IndexReader, to allow apps that maintain external per-segment caches + to evict entries when a segment is finished. (Shay Banon, Yonik + Seeley, Mike McCandless) + +* LUCENE-2911: The new StandardTokenizer, UAX29URLEmailTokenizer, and + the ICUTokenizer in contrib now all tag types with a consistent set + of token types (defined in StandardTokenizer). Tokens in the major + CJK types are explicitly marked to allow for custom downstream handling: + , , , and . + (Robert Muir, Steven Rowe) + +* LUCENE-2913: Add missing getters to Numeric* classes. (Uwe Schindler) + +* LUCENE-1810: Added FieldSelectorResult.LATENT to not cache lazy loaded fields + (Tim Smith, Grant Ingersoll) + +* LUCENE-2692: Added several new SpanQuery classes for positional checking + (match is in a range, payload is a specific value) (Grant Ingersoll) + +Optimizations + +* LUCENE-2494: Use CompletionService in ParallelMultiSearcher instead of + simple polling for results. (Edward Drapkin, Simon Willnauer) + +* LUCENE-2075: Terms dict cache is now shared across threads instead + of being stored separately in thread local storage. Also fixed + terms dict so that the cache is used when seeking the thread local + term enum, which will be important for MultiTermQuery impls that do + lots of seeking (Mike McCandless, Uwe Schindler, Robert Muir, Yonik + Seeley) + +* LUCENE-2136: If the multi reader (DirectoryReader or MultiReader) + only has a single sub-reader, delegate all enum requests to it. + This avoid the overhead of using a PQ unnecessarily. (Mike + McCandless) + +* LUCENE-2137: Switch to AtomicInteger for some ref counting (Earwin + Burrfoot via Mike McCandless) + +* LUCENE-2123, LUCENE-2261: Move FuzzyQuery rewrite to separate RewriteMode + into MultiTermQuery. The number of fuzzy expansions can be specified with + the maxExpansions parameter to FuzzyQuery. + (Uwe Schindler, Robert Muir, Mike McCandless) + +* LUCENE-2164: ConcurrentMergeScheduler has more control over merge + threads. First, it gives smaller merges higher thread priority than + larges ones. Second, a new set/getMaxMergeCount setting will pause + the larger merges to allow smaller ones to finish. The defaults for + these settings are now dynamic, depending the number CPU cores as + reported by Runtime.getRuntime().availableProcessors() (Mike + McCandless) + +* LUCENE-2169: Improved CharArraySet.copy(), if source set is + also a CharArraySet. (Simon Willnauer via Uwe Schindler) + +* LUCENE-2084: Change IndexableBinaryStringTools to work on byte[] and char[] + directly, instead of Byte/CharBuffers, and modify CollationKeyFilter to + take advantage of this for faster performance. + (Steven Rowe, Uwe Schindler, Robert Muir) + +* LUCENE-2188: Add a utility class for tracking deprecated overridden + methods in non-final subclasses. + (Uwe Schindler, Robert Muir) + +* LUCENE-2195: Speedup CharArraySet if set is empty. + (Simon Willnauer via Robert Muir) + +* LUCENE-2285: Code cleanup. (Shai Erera via Uwe Schindler) + +* LUCENE-2303: Remove code duplication in Token class by subclassing + TermAttributeImpl, move DEFAULT_TYPE constant to TypeInterface, improve + null-handling for TypeAttribute. (Uwe Schindler) + +* LUCENE-2329: Switch TermsHash* from using a PostingList object per unique + term to parallel arrays, indexed by termID. This reduces garbage collection + overhead significantly, which results in great indexing performance wins + when the available JVM heap space is low. This will become even more + important when the DocumentsWriter RAM buffer is searchable in the future, + because then it will make sense to make the RAM buffers as large as + possible. (Mike McCandless, Michael Busch) + +* LUCENE-2380: The terms field cache methods (getTerms, + getTermsIndex), which replace the older String equivalents + (getStrings, getStringIndex), consume quite a bit less RAM in most + cases. (Mike McCandless) + +* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching. + (Mike McCandless) + +* LUCENE-2531: Fix issue when sorting by a String field that was + causing too many fallbacks to compare-by-value (instead of by-ord). + (Mike McCandless) + +* LUCENE-2574: IndexInput exposes copyBytes(IndexOutput, long) to allow for + efficient copying by sub-classes. Optimized copy is implemented for RAM and FS + streams. (Shai Erera) + +* LUCENE-2719: Improved TermsHashPerField's sorting to use a better + quick sort algorithm that dereferences the pivot element not on + every compare call. Also replaced lots of sorting code in Lucene + by the improved SorterTemplate class. + (Uwe Schindler, Robert Muir, Mike McCandless) + +* LUCENE-2760: Optimize SpanFirstQuery and SpanPositionRangeQuery. + (Robert Muir) + +* LUCENE-2770: Make SegmentMerger always work on atomic subreaders, + even when IndexWriter.addIndexes(IndexReader...) is used with + DirectoryReaders or other MultiReaders. This saves lots of memory + during merge of norms. (Uwe Schindler, Mike McCandless) + +* LUCENE-2824: Optimize BufferedIndexInput to do less bounds checks. + (Robert Muir) + +* LUCENE-2010: Segments with 100% deleted documents are now removed on + IndexReader or IndexWriter commit. (Uwe Schindler, Mike McCandless) + +* LUCENE-1472: Removed synchronization from static DateTools methods + by using a ThreadLocal. Also converted DateTools.Resolution to a + Java 5 enum (this should not break backwards). (Uwe Schindler) + +Build + +* LUCENE-2124: Moved the JDK-based collation support from contrib/collation + into core, and moved the ICU-based collation support into contrib/icu. + (Robert Muir) + +* LUCENE-2326: Removed SVN checkouts for backwards tests. The backwards + branch is now included in the svn repository using "svn copy" + after release. (Uwe Schindler) + +* LUCENE-2074: Regenerating StandardTokenizerImpl files now needs + JFlex 1.5 (currently only available on SVN). (Uwe Schindler) + +* LUCENE-1709: Tests are now parallelized by default (except for benchmark). You + can force them to run sequentially by passing -Drunsequential=1 on the command + line. The number of threads that are spawned per CPU defaults to '1'. If you + wish to change that, you can run the tests with -DthreadsPerProcessor=[num]. + (Robert Muir, Shai Erera, Peter Kofler) + +* LUCENE-2516: Backwards tests are now compiled against released lucene-core.jar + from tarball of previous version. Backwards tests are now packaged together + with src distribution. (Uwe Schindler) + +* LUCENE-2611: Added Ant target to install IntelliJ IDEA configuration: + "ant idea". See http://wiki.apache.org/lucene-java/HowtoConfigureIntelliJ + (Steven Rowe) + +* LUCENE-2657: Switch from using Maven POM templates to full POMs when + generating Maven artifacts (Steven Rowe) + +* LUCENE-2609: Added jar-test-framework Ant target which packages Lucene's + tests' framework classes. (Drew Farris, Grant Ingersoll, Shai Erera, + Steven Rowe) + +Test Cases + +* LUCENE-2037 Allow Junit4 tests in our environment (Erick Erickson + via Mike McCandless) + +* LUCENE-1844: Speed up the unit tests (Mark Miller, Erick Erickson, + Mike McCandless) + +* LUCENE-2065: Use Java 5 generics throughout our unit tests. (Kay + Kay via Mike McCandless) + +* LUCENE-2155: Fix time and zone dependent localization test failures + in queryparser tests. (Uwe Schindler, Chris Male, Robert Muir) + +* LUCENE-2170: Fix thread starvation problems. (Uwe Schindler) + +* LUCENE-2248, LUCENE-2251, LUCENE-2285: Refactor tests to not use + Version.LUCENE_CURRENT, but instead use a global static value + from LuceneTestCase(J4), that contains the release version. + (Uwe Schindler, Simon Willnauer, Shai Erera) + +* LUCENE-2313, LUCENE-2322: Add VERBOSE to LuceneTestCase(J4) to control + verbosity of tests. If VERBOSE==false (default) tests should not print + anything other than errors to System.(out|err). The setting can be + changed with -Dtests.verbose=true on test invocation. + (Shai Erera, Paul Elschot, Uwe Schindler) + +* LUCENE-2318: Remove inconsistent system property code for retrieving + temp and data directories inside test cases. It is now centralized in + LuceneTestCase(J4). Also changed lots of tests to use + getClass().getResourceAsStream() to retrieve test data. Tests needing + access to "real" files from the test folder itself, can use + LuceneTestCase(J4).getDataFile(). (Uwe Schindler) + +* LUCENE-2398, LUCENE-2611: Improve tests to work better from IDEs such + as Eclipse and IntelliJ. + (Paolo Castagna, Steven Rowe via Robert Muir) + +* LUCENE-2804: add newFSDirectory to LuceneTestCase to create a FSDirectory at + random. (Shai Erera, Robert Muir) + +Documentation + +* LUCENE-2579: Fix oal.search's package.html description of abstract + methods. (Santiago M. Mola via Mike McCandless) + +* LUCENE-2625: Add a note to IndexReader.termDocs() with additional verbiage + that the TermEnum must be seeked since it is unpositioned. + (Adriano Crestani via Robert Muir) + +* LUCENE-2894: Use google-code-prettify for syntax highlighting in javadoc. + (Shinichiro Abe, Koji Sekiguchi) + +================== Release 2.9.4 / 3.0.3 ==================== + +Changes in runtime behavior + +* LUCENE-2689: NativeFSLockFactory no longer attempts to acquire a + test lock just before the real lock is acquired. (Surinder Pal + Singh Bindra via Mike McCandless) + +* LUCENE-2762: Fixed bug in IndexWriter causing it to hold open file + handles against deleted files when compound-file was enabled (the + default) and readers are pooled. As a result of this the peak + worst-case free disk space required during optimize is now 3X the + index size, when compound file is enabled (else 2X). (Mike + McCandless) + +* LUCENE-2773: LogMergePolicy accepts a double noCFSRatio (default = + 0.1), which means any time a merged segment is greater than 10% of + the index size, it will be left in non-compound format even if + compound format is on. This change was made to reduce peak + transient disk usage during optimize which increased due to + LUCENE-2762. (Mike McCandless) + +Bug fixes + +* LUCENE-2142 (correct fix): FieldCacheImpl.getStringIndex no longer + throws an exception when term count exceeds doc count. + (Mike McCandless, Uwe Schindler) + +* LUCENE-2513: when opening writable IndexReader on a not-current + commit, do not overwrite "future" commits. (Mike McCandless) + +* LUCENE-2536: IndexWriter.rollback was failing to properly rollback + buffered deletions against segments that were flushed (Mark Harwood + via Mike McCandless) + +* LUCENE-2541: Fixed NumericRangeQuery that returned incorrect results + with endpoints near Long.MIN_VALUE and Long.MAX_VALUE: + NumericUtils.splitRange() overflowed, if + - the range contained a LOWER bound + that was greater than (Long.MAX_VALUE - (1L << precisionStep)) + - the range contained an UPPER bound + that was less than (Long.MIN_VALUE + (1L << precisionStep)) + With standard precision steps around 4, this had no effect on + most queries, only those that met the above conditions. + Queries with large precision steps failed more easy. Queries with + precision step >=64 were not affected. Also 32 bit data types int + and float were not affected. + (Yonik Seeley, Uwe Schindler) + +* LUCENE-2593: Fixed certain rare cases where a disk full could lead + to a corrupted index (Robert Muir, Mike McCandless) + +* LUCENE-2620: Fixed a bug in WildcardQuery where too many asterisks + would result in unbearably slow performance. (Nick Barkas via Robert Muir) + +* LUCENE-2627: Fixed bug in MMapDirectory chunking when a file is an + exact multiple of the chunk size. (Robert Muir) + +* LUCENE-2634: isCurrent on an NRT reader was failing to return false + if the writer had just committed (Nikolay Zamosenchuk via Mike McCandless) + +* LUCENE-2650: Added extra safety to MMapIndexInput clones to prevent accessing + an unmapped buffer if the input is closed (Mike McCandless, Uwe Schindler, Robert Muir) + +* LUCENE-2384: Reset zzBuffer in StandardTokenizerImpl when lexer is reset. + (Ruben Laguna via Uwe Schindler, sub-issue of LUCENE-2074) + +* LUCENE-2658: Exceptions while processing term vectors enabled for multiple + fields could lead to invalid ArrayIndexOutOfBoundsExceptions. + (Robert Muir, Mike McCandless) + +* LUCENE-2235: Implement missing PerFieldAnalyzerWrapper.getOffsetGap(). + (Javier Godoy via Uwe Schindler) + +* LUCENE-2328: Fixed memory leak in how IndexWriter/Reader tracked + already sync'd files. (Earwin Burrfoot via Mike McCandless) + +* LUCENE-2549: Fix TimeLimitingCollector#TimeExceededException to record + the absolute docid. (Uwe Schindler) + +* LUCENE-2533: fix FileSwitchDirectory.listAll to not return dups when + primary & secondary dirs share the same underlying directory. + (Michael McCandless) + +* LUCENE-2365: IndexWriter.newestSegment (used normally for testing) + is fixed to return null if there are no segments. (Karthick + Sankarachary via Mike McCandless) + +* LUCENE-2730: Fix two rare deadlock cases in IndexWriter (Mike McCandless) + +* LUCENE-2744: CheckIndex was stating total number of fields, + not the number that have norms enabled, on the "test: field + norms..." output. (Mark Kristensson via Mike McCandless) + +* LUCENE-2759: Fixed two near-real-time cases where doc store files + may be opened for read even though they are still open for write. + (Mike McCandless) + +* LUCENE-2618: Fix rare thread safety issue whereby + IndexWriter.optimize could sometimes return even though the index + wasn't fully optimized (Mike McCandless) + +* LUCENE-2767: Fix thread safety issue in addIndexes(IndexReader[]) + that could potentially result in index corruption. (Mike + McCandless) + +* LUCENE-2762: Fixed bug in IndexWriter causing it to hold open file + handles against deleted files when compound-file was enabled (the + default) and readers are pooled. As a result of this the peak + worst-case free disk space required during optimize is now 3X the + index size, when compound file is enabled (else 2X). (Mike + McCandless) + +* LUCENE-2216: OpenBitSet.hashCode returned different hash codes for + sets that only differed by trailing zeros. (Dawid Weiss, yonik) + +* LUCENE-2782: Fix rare potential thread hazard with + IndexWriter.commit (Mike McCandless) + +API Changes + +* LUCENE-2773: LogMergePolicy accepts a double noCFSRatio (default = + 0.1), which means any time a merged segment is greater than 10% of + the index size, it will be left in non-compound format even if + compound format is on. This change was made to reduce peak + transient disk usage during optimize which increased due to + LUCENE-2762. (Mike McCandless) + +Optimizations + +* LUCENE-2556: Improve memory usage after cloning TermAttribute. + (Adriano Crestani via Uwe Schindler) + +* LUCENE-2098: Improve the performance of BaseCharFilter, especially for + large documents. (Robin Wojciki, Koji Sekiguchi, Robert Muir) + +New features + +* LUCENE-2675 (2.9.4 only): Add support for Lucene 3.0 stored field files + also in 2.9. The file format did not change, only the version number was + upgraded to mark segments that have no compression. FieldsWriter still only + writes 2.9 segments as they could contain compressed fields. This cross-version + index format compatibility is provided here solely because Lucene 2.9 and 3.0 + have the same bugfix level, features, and the same index format with this slight + compression difference. In general, Lucene does not support reading newer + indexes with older library versions. (Uwe Schindler) + +Documentation + +* LUCENE-2239: Documented limitations in NIOFSDirectory and MMapDirectory due to + Java NIO behavior when a Thread is interrupted while blocking on IO. + (Simon Willnauer, Robert Muir) + +================== Release 2.9.3 / 3.0.2 ==================== + +Changes in backwards compatibility policy + +* LUCENE-2135: Added FieldCache.purge(IndexReader) method to the + interface. Anyone implementing FieldCache externally will need to + fix their code to implement this, on upgrading. (Mike McCandless) + +Changes in runtime behavior + +* LUCENE-2421: NativeFSLockFactory does not throw LockReleaseFailedException if + it cannot delete the lock file, since obtaining the lock does not fail if the + file is there. (Shai Erera) + +* LUCENE-2060 (2.9.3 only): Changed ConcurrentMergeScheduler's default for + maxNumThreads from 3 to 1, because in practice we get the most gains + from running a single merge in the backround. More than one + concurrent merge causes alot of thrashing (though it's possible on + SSD storage that there would be net gains). (Jason Rutherglen, Mike + McCandless) + +Bug fixes + +* LUCENE-2046 (2.9.3 only): IndexReader should not see the index as changed, after + IndexWriter.prepareCommit has been called but before + IndexWriter.commit is called. (Peter Keegan via Mike McCandless) + +* LUCENE-2119: Don't throw NegativeArraySizeException if you pass + Integer.MAX_VALUE as nDocs to IndexSearcher search methods. (Paul + Taylor via Mike McCandless) + +* LUCENE-2142: FieldCacheImpl.getStringIndex no longer throws an + exception when term count exceeds doc count. (Mike McCandless) + +* LUCENE-2104: NativeFSLock.release() would silently fail if the lock is held by + another thread/process. (Shai Erera via Uwe Schindler) + +* LUCENE-2283: Use shared memory pool for term vector and stored + fields buffers. This memory will be reclaimed if needed according to + the configured RAM Buffer Size for the IndexWriter. This also fixes + potentially excessive memory usage when many threads are indexing a + mix of small and large documents. (Tim Smith via Mike McCandless) + +* LUCENE-2300: If IndexWriter is pooling reader (because NRT reader + has been obtained), and addIndexes* is run, do not pool the + readers from the external directory. This is harmless (NRT reader is + correct), but a waste of resources. (Mike McCandless) + +* LUCENE-2422: Don't reuse byte[] in IndexInput/Output -- it gains + little performance, and ties up possibly large amounts of memory + for apps that index large docs. (Ross Woolf via Mike McCandless) + +* LUCENE-2387: Don't hang onto Fieldables from the last doc indexed, + in IndexWriter, nor the Reader in Tokenizer after close is + called. (Ruben Laguna, Uwe Schindler, Mike McCandless) + +* LUCENE-2417: IndexCommit did not implement hashCode() and equals() + consistently. Now they both take Directory and version into consideration. In + addition, all of IndexComnmit methods which threw + UnsupportedOperationException are now abstract. (Shai Erera) + +* LUCENE-2467: Fixed memory leaks in IndexWriter when large documents + are indexed. (Mike McCandless) + +* LUCENE-2473: Clicking on the "More Results" link in the luceneweb.war + demo resulted in ArrayIndexOutOfBoundsException. + (Sami Siren via Robert Muir) + +* LUCENE-2476: If any exception is hit init'ing IW, release the write + lock (previously we only released on IOException). (Tamas Cservenak + via Mike McCandless) + +* LUCENE-2478: Fix CachingWrapperFilter to not throw NPE when + Filter.getDocIdSet() returns null. (Uwe Schindler, Daniel Noll) + +* LUCENE-2468: Allow specifying how new deletions should be handled in + CachingWrapperFilter and CachingSpanFilter. By default, new + deletions are ignored in CachingWrapperFilter, since typically this + filter is AND'd with a query that correctly takes new deletions into + account. This should be a performance gain (higher cache hit rate) + in apps that reopen readers, or use near-real-time reader + (IndexWriter.getReader()), but may introduce invalid search results + (allowing deleted docs to be returned) for certain cases, so a new + expert ctor was added to CachingWrapperFilter to enforce deletions + at a performance cost. CachingSpanFilter by default recaches if + there are new deletions (Shay Banon via Mike McCandless) + +* LUCENE-2299: If you open an NRT reader while addIndexes* is running, + it may miss some segments (Earwin Burrfoot via Mike McCandless) + +* LUCENE-2397: Don't throw NPE from SnapshotDeletionPolicy.snapshot if + there are no commits yet (Shai Erera) + +* LUCENE-2424: Fix FieldDoc.toString to actually return its fields + (Stephen Green via Mike McCandless) + +* LUCENE-2311: Always pass a "fully loaded" (terms index & doc stores) + SegmentsReader to IndexWriter's mergedSegmentWarmer (if set), so + that warming is free to do whatever it needs to. (Earwin Burrfoot + via Mike McCandless) + +* LUCENE-3029: Fix corner case when MultiPhraseQuery is used with zero + position-increment tokens that would sometimes assign different + scores to identical docs. (Mike McCandless) + +* LUCENE-2486: Fixed intermittent FileNotFoundException on doc store + files when a mergedSegmentWarmer is set on IndexWriter. (Mike + McCandless) + +* LUCENE-2130: Fix performance issue when FuzzyQuery runs on a + multi-segment index (Michael McCandless) + +API Changes + +* LUCENE-2281: added doBeforeFlush to IndexWriter to allow extensions to perform + operations before flush starts. Also exposed doAfterFlush as protected instead + of package-private. (Shai Erera via Mike McCandless) + +* LUCENE-2356: Add IndexWriter.set/getReaderTermsIndexDivisor, to set + what IndexWriter passes for termsIndexDivisor to the readers it + opens internally when applying deletions or creating a + near-real-time reader. (Earwin Burrfoot via Mike McCandless) + +Optimizations + +* LUCENE-2494 (3.0.2 only): Use CompletionService in ParallelMultiSearcher + instead of simple polling for results. (Edward Drapkin, Simon Willnauer) + +* LUCENE-2135: On IndexReader.close, forcefully evict any entries from + the FieldCache rather than waiting for the WeakHashMap to release + the reference (Mike McCandless) + +* LUCENE-2161: Improve concurrency of IndexReader, especially in the + context of near real-time readers. (Mike McCandless) + +* LUCENE-2360: Small speedup to recycling of reused per-doc RAM in + IndexWriter (Robert Muir, Mike McCandless) + +Build + +* LUCENE-2488 (2.9.3 only): Support build with JDK 1.4 and exclude Java 1.5 + contrib modules on request (pass '-Dforce.jdk14.build=true') when + compiling/testing/packaging. This marks the benchmark contrib also + as Java 1.5, as it depends on fast-vector-highlighter. (Uwe Schindler) + +================== Release 2.9.2 / 3.0.1 ==================== + +Changes in backwards compatibility policy + +* LUCENE-2123 (3.0.1 only): Removed the protected inner class ScoreTerm + from FuzzyQuery. The change was needed because the comparator of this + class had to be changed in an incompatible way. The class was never + intended to be public. (Uwe Schindler, Mike McCandless) + +Bug fixes + + * LUCENE-2092: BooleanQuery was ignoring disableCoord in its hashCode + and equals methods, cause bad things to happen when caching + BooleanQueries. (Chris Hostetter, Mike McCandless) + + * LUCENE-2095: Fixes: when two threads call IndexWriter.commit() at + the same time, it's possible for commit to return control back to + one of the threads before all changes are actually committed. + (Sanne Grinovero via Mike McCandless) + + * LUCENE-2132 (3.0.1 only): Fix the demo result.jsp to use QueryParser + with a Version argument. (Brian Li via Robert Muir) + + * LUCENE-2166: Don't incorrectly keep warning about the same immense + term, when IndexWriter.infoStream is on. (Mike McCandless) + + * LUCENE-2158: At high indexing rates, NRT reader could temporarily + lose deletions. (Mike McCandless) + + * LUCENE-2182: DEFAULT_ATTRIBUTE_FACTORY was failing to load + implementation class when interface was loaded by a different + class loader. (Uwe Schindler, reported on java-user by Ahmed El-dawy) + + * LUCENE-2257: Increase max number of unique terms in one segment to + termIndexInterval (default 128) * ~2.1 billion = ~274 billion. + (Tom Burton-West via Mike McCandless) + + * LUCENE-2260: Fixed AttributeSource to not hold a strong + reference to the Attribute/AttributeImpl classes which prevents + unloading of custom attributes loaded by other classloaders + (e.g. in Solr plugins). (Uwe Schindler) + + * LUCENE-1941: Fix Min/MaxPayloadFunction returns 0 when + only one payload is present. (Erik Hatcher, Mike McCandless + via Uwe Schindler) + + * LUCENE-2270: Queries consisting of all zero-boost clauses + (for example, text:foo^0) sorted incorrectly and produced + invalid docids. (yonik) + +API Changes + + * LUCENE-1609 (3.0.1 only): Restore IndexReader.getTermInfosIndexDivisor + (it was accidentally removed in 3.0.0) (Mike McCandless) + + * LUCENE-1972 (3.0.1 only): Restore SortField.getComparatorSource + (it was accidentally removed in 3.0.0) (John Wang via Uwe Schindler) + + * LUCENE-2190: Added a new class CustomScoreProvider to function package + that can be subclassed to provide custom scoring to CustomScoreQuery. + The methods in CustomScoreQuery that did this before were deprecated + and replaced by a method getCustomScoreProvider(IndexReader) that + returns a custom score implementation using the above class. The change + is necessary with per-segment searching, as CustomScoreQuery is + a stateless class (like all other Queries) and does not know about + the currently searched segment. This API works similar to Filter's + getDocIdSet(IndexReader). (Paul chez Jamespot via Mike McCandless, + Uwe Schindler) + + * LUCENE-2080: Deprecate Version.LUCENE_CURRENT, as using this constant + will cause backwards compatibility problems when upgrading Lucene. See + the Version javadocs for additional information. + (Robert Muir) + +Optimizations + + * LUCENE-2086: When resolving deleted terms, do so in term sort order + for better performance (Bogdan Ghidireac via Mike McCandless) + + * LUCENE-2123 (partly, 3.0.1 only): Fixes a slowdown / memory issue + added by LUCENE-504. (Uwe Schindler, Robert Muir, Mike McCandless) + + * LUCENE-2258: Remove unneeded synchronization in FuzzyTermEnum. + (Uwe Schindler, Robert Muir) + +Test Cases + + * LUCENE-2114: Change TestFilteredSearch to test on multi-segment + index as well. (Simon Willnauer via Mike McCandless) + + * LUCENE-2211: Improves BaseTokenStreamTestCase to use a fake attribute + that checks if clearAttributes() was called correctly. + (Uwe Schindler, Robert Muir) + + * LUCENE-2207, LUCENE-2219: Improve BaseTokenStreamTestCase to check if + end() is implemented correctly. (Koji Sekiguchi, Robert Muir) + +Documentation + + * LUCENE-2114: Improve javadocs of Filter to call out that the + provided reader is per-segment (Simon Willnauer via Mike + McCandless) + +======================= Release 3.0.0 ======================= + +Changes in backwards compatibility policy + +* LUCENE-1979: Change return type of SnapshotDeletionPolicy#snapshot() + from IndexCommitPoint to IndexCommit. Code that uses this method + needs to be recompiled against Lucene 3.0 in order to work. The + previously deprecated IndexCommitPoint is also removed. + (Michael Busch) + +* o.a.l.Lock.isLocked() is now allowed to throw an IOException. + (Mike McCandless) + +* LUCENE-2030: CachingWrapperFilter and CachingSpanFilter now hide + the internal cache implementation for thread safety, before it was + declared protected. (Peter Lenahan, Uwe Schindler, Simon Willnauer) + +* LUCENE-2053: If you call Thread.interrupt() on a thread inside + Lucene, Lucene will do its best to interrupt the thread. However, + instead of throwing InterruptedException (which is a checked + exception), you'll get an oal.util.ThreadInterruptedException (an + unchecked exception, subclassing RuntimeException). The interrupt + status on the thread is cleared when this exception is thrown. + (Mike McCandless) + +* LUCENE-2052: Some methods in Lucene core were changed to accept + Java 5 varargs. This is not a backwards compatibility problem as + long as you not try to override such a method. We left common + overridden methods unchanged and added varargs to constructors, + static, or final methods (MultiSearcher,...). (Uwe Schindler) + +* LUCENE-1558: IndexReader.open(Directory) now opens a readOnly=true + reader, and new IndexSearcher(Directory) does the same. Note that + this is a change in the default from 2.9, when these methods were + previously deprecated. (Mike McCandless) + +* LUCENE-1753: Make not yet final TokenStreams final to enforce + decorator pattern. (Uwe Schindler) + +Changes in runtime behavior + +* LUCENE-1677: Remove the system property to set SegmentReader class + implementation. (Uwe Schindler) + +* LUCENE-1960: As a consequence of the removal of Field.Store.COMPRESS, + support for this type of fields was removed. Lucene 3.0 is still able + to read indexes with compressed fields, but as soon as merges occur + or the index is optimized, all compressed fields are decompressed + and converted to Field.Store.YES. Because of this, indexes with + compressed fields can suddenly get larger. Also the first merge with + decompression cannot be done in raw mode, it is therefore slower. + This change has no effect for code that uses such old indexes, + they behave as before (fields are automatically decompressed + during read). Indexes converted to Lucene 3.0 format cannot be read + anymore with previous versions. + It is recommended to optimize your indexes after upgrading to convert + to the new format and decompress all fields. + If you want compressed fields, you can use CompressionTools, that + creates compressed byte[] to be added as binary stored field. This + cannot be done automatically, as you also have to decompress such + fields when reading. You have to reindex to do that. + (Michael Busch, Uwe Schindler) + +* LUCENE-2060: Changed ConcurrentMergeScheduler's default for + maxNumThreads from 3 to 1, because in practice we get the most + gains from running a single merge in the background. More than one + concurrent merge causes a lot of thrashing (though it's possible on + SSD storage that there would be net gains). (Jason Rutherglen, + Mike McCandless) + +API Changes + +* LUCENE-1257, LUCENE-1984, LUCENE-1985, LUCENE-2057, LUCENE-1833, LUCENE-2012, + LUCENE-1998: Port to Java 1.5: + + - Add generics to public and internal APIs (see below). + - Replace new Integer(int), new Double(double),... by static valueOf() calls. + - Replace for-loops with Iterator by foreach loops. + - Replace StringBuffer with StringBuilder. + - Replace o.a.l.util.Parameter by Java 5 enums (see below). + - Add @Override annotations. + (Uwe Schindler, Robert Muir, Karl Wettin, Paul Elschot, Kay Kay, Shai Erera, + DM Smith) + +* Generify Lucene API: + + - TokenStream/AttributeSource: Now addAttribute()/getAttribute() return an + instance of the requested attribute interface and no cast needed anymore + (LUCENE-1855). + - NumericRangeQuery, NumericRangeFilter, and FieldCacheRangeFilter + now have Integer, Long, Float, Double as type param (LUCENE-1857). + - Document.getFields() returns List. + - Query.extractTerms(Set) + - CharArraySet and stop word sets in core/contrib + - PriorityQueue (LUCENE-1935) + - TopDocCollector + - DisjunctionMaxQuery (LUCENE-1984) + - MultiTermQueryWrapperFilter + - CloseableThreadLocal + - MapOfSets + - o.a.l.util.cache package + - lot's of internal APIs of IndexWriter + (Uwe Schindler, Michael Busch, Kay Kay, Robert Muir, Adriano Crestani) + +* LUCENE-1944, LUCENE-1856, LUCENE-1957, LUCENE-1960, LUCENE-1961, + LUCENE-1968, LUCENE-1970, LUCENE-1946, LUCENE-1971, LUCENE-1975, + LUCENE-1972, LUCENE-1978, LUCENE-944, LUCENE-1979, LUCENE-1973, LUCENE-2011: + Remove deprecated methods/constructors/classes: + + - Remove all String/File directory paths in IndexReader / + IndexSearcher / IndexWriter. + - Remove FSDirectory.getDirectory() + - Make FSDirectory abstract. + - Remove Field.Store.COMPRESS (see above). + - Remove Filter.bits(IndexReader) method and make + Filter.getDocIdSet(IndexReader) abstract. + - Remove old DocIdSetIterator methods and make the new ones abstract. + - Remove some methods in PriorityQueue. + - Remove old TokenStream API and backwards compatibility layer. + - Remove RangeQuery, RangeFilter and ConstantScoreRangeQuery. + - Remove SpanQuery.getTerms(). + - Remove ExtendedFieldCache, custom and auto caches, SortField.AUTO. + - Remove old-style custom sort. + - Remove legacy search setting in SortField. + - Remove Hits and all references from core and contrib. + - Remove HitCollector and its TopDocs support implementations. + - Remove term field and accessors in MultiTermQuery + (and fix Highlighter). + - Remove deprecated methods in BooleanQuery. + - Remove deprecated methods in Similarity. + - Remove BoostingTermQuery. + - Remove MultiValueSource. + - Remove Scorer.explain(int). + ...and some other minor ones (Uwe Schindler, Michael Busch, Mark Miller) + +* LUCENE-1925: Make IndexSearcher's subReaders and docStarts members + protected; add expert ctor to directly specify reader, subReaders + and docStarts. (John Wang, Tim Smith via Mike McCandless) + +* LUCENE-1945: All public classes that have a close() method now + also implement java.io.Closeable (IndexReader, IndexWriter, Directory,...). + (Uwe Schindler) + +* LUCENE-1998: Change all Parameter instances to Java 5 enums. This + is no backwards-break, only a change of the super class. Parameter + was deprecated and will be removed in a later version. + (DM Smith, Uwe Schindler) + +Bug fixes + +* LUCENE-1951: When the text provided to WildcardQuery has no wildcard + characters (ie matches a single term), don't lose the boost and + rewrite method settings. Also, rewrite to PrefixQuery if the + wildcard is form "foo*", for slightly faster performance. (Robert + Muir via Mike McCandless) + +* LUCENE-2013: SpanRegexQuery does not work with QueryScorer. + (Benjamin Keil via Mark Miller) + +* LUCENE-2088: addAttribute() should only accept interfaces that + extend Attribute. (Shai Erera, Uwe Schindler) + +* LUCENE-2045: Fix silly FileNotFoundException hit if you enable + infoStream on IndexWriter and then add an empty document and commit + (Shai Erera via Mike McCandless) + +* LUCENE-2046: IndexReader should not see the index as changed, after + IndexWriter.prepareCommit has been called but before + IndexWriter.commit is called. (Peter Keegan via Mike McCandless) + +New features + +* LUCENE-1933: Provide a convenience AttributeFactory that creates a + Token instance for all basic attributes. (Uwe Schindler) + +* LUCENE-2041: Parallelize the rest of ParallelMultiSearcher. Lots of + code refactoring and Java 5 concurrent support in MultiSearcher. + (Joey Surls, Simon Willnauer via Uwe Schindler) + +* LUCENE-2051: Add CharArraySet.copy() as a simple method to copy + any Set to a CharArraySet that is optimized, if Set is already + an CharArraySet. (Simon Willnauer) + +Optimizations + +* LUCENE-1183: Optimize Levenshtein Distance computation in + FuzzyQuery. (Cédrik Lime via Mike McCandless) + +* LUCENE-2006: Optimization of FieldDocSortedHitQueue to always + use Comparable interface. (Uwe Schindler, Mark Miller) + +* LUCENE-2087: Remove recursion in NumericRangeTermEnum. + (Uwe Schindler) + +Build + +* LUCENE-486: Remove test->demo dependencies. (Michael Busch) + +* LUCENE-2024: Raise build requirements to Java 1.5 and ANT 1.7.0 + (Uwe Schindler, Mike McCandless) + +======================= Release 2.9.1 ======================= + +Changes in backwards compatibility policy + + * LUCENE-2002: Add required Version matchVersion argument when + constructing QueryParser or MultiFieldQueryParser and, default (as + of 2.9) enablePositionIncrements to true to match + StandardAnalyzer's 2.9 default (Uwe Schindler, Mike McCandless) + +Bug fixes + + * LUCENE-1974: Fixed nasty bug in BooleanQuery (when it used + BooleanScorer for scoring), whereby some matching documents fail to + be collected. (Fulin Tang via Mike McCandless) + + * LUCENE-1124: Make sure FuzzyQuery always matches the precise term. + (stefatwork@gmail.com via Mike McCandless) + + * LUCENE-1976: Fix IndexReader.isCurrent() to return the right thing + when the reader is a near real-time reader. (Jake Mannix via Mike + McCandless) + + * LUCENE-1986: Fix NPE when scoring PayloadNearQuery (Peter Keegan, + Mark Miller via Mike McCandless) + + * LUCENE-1992: Fix thread hazard if a merge is committing just as an + exception occurs during sync (Uwe Schindler, Mike McCandless) + + * LUCENE-1995: Note in javadocs that IndexWriter.setRAMBufferSizeMB + cannot exceed 2048 MB, and throw IllegalArgumentException if it + does. (Aaron McKee, Yonik Seeley, Mike McCandless) + + * LUCENE-2004: Fix Constants.LUCENE_MAIN_VERSION to not be inlined + by client code. (Uwe Schindler) + + * LUCENE-2016: Replace illegal U+FFFF character with the replacement + char (U+FFFD) during indexing, to prevent silent index corruption. + (Peter Keegan, Mike McCandless) + +API Changes + + * Un-deprecate search(Weight weight, Filter filter, int n) from + Searchable interface (deprecated by accident). (Uwe Schindler) + + * Un-deprecate o.a.l.util.Version constants. (Mike McCandless) + + * LUCENE-1987: Un-deprecate some ctors of Token, as they will not + be removed in 3.0 and are still useful. Also add some missing + o.a.l.util.Version constants for enabling invalid acronym + settings in StandardAnalyzer to be compatible with the coming + Lucene 3.0. (Uwe Schindler) + + * LUCENE-1973: Un-deprecate IndexSearcher.setDefaultFieldSortScoring, + to allow controlling per-IndexSearcher whether scores are computed + when sorting by field. (Uwe Schindler, Mike McCandless) + + * LUCENE-2043: Make IndexReader.commit(Map) public. + (Mike McCandless) + +Documentation + + * LUCENE-1955: Fix Hits deprecation notice to point users in right + direction. (Mike McCandless, Mark Miller) + + * Fix javadoc about score tracking done by search methods in Searcher + and IndexSearcher. (Mike McCandless) + + * LUCENE-2008: Javadoc improvements for TokenStream/Tokenizer/Token + (Luke Nezda via Mike McCandless) + +======================= Release 2.9.0 ======================= + +Changes in backwards compatibility policy + + * LUCENE-1575: Searchable.search(Weight, Filter, int, Sort) no + longer computes a document score for each hit by default. If + document score tracking is still needed, you can call + IndexSearcher.setDefaultFieldSortScoring(true, true) to enable + both per-hit and maxScore tracking; however, this is deprecated + and will be removed in 3.0. + + Alternatively, use Searchable.search(Weight, Filter, Collector) + and pass in a TopFieldCollector instance, using the following code + sample: + + + TopFieldCollector tfc = TopFieldCollector.create(sort, numHits, fillFields, + true /* trackDocScores */, + true /* trackMaxScore */, + false /* docsInOrder */); + searcher.search(query, tfc); + TopDocs results = tfc.topDocs(); + + + Note that your Sort object cannot use SortField.AUTO when you + directly instantiate TopFieldCollector. + + Also, the method search(Weight, Filter, Collector) was added to + the Searchable interface and the Searcher abstract class to + replace the deprecated HitCollector versions. If you either + implement Searchable or extend Searcher, you should change your + code to implement this method. If you already extend + IndexSearcher, no further changes are needed to use Collector. + + Finally, the values Float.NaN and Float.NEGATIVE_INFINITY are not + valid scores. Lucene uses these values internally in certain + places, so if you have hits with such scores, it will cause + problems. (Shai Erera via Mike McCandless) + + * LUCENE-1687: All methods and parsers from the interface ExtendedFieldCache + have been moved into FieldCache. ExtendedFieldCache is now deprecated and + contains only a few declarations for binary backwards compatibility. + ExtendedFieldCache will be removed in version 3.0. Users of FieldCache and + ExtendedFieldCache will be able to plug in Lucene 2.9 without recompilation. + The auto cache (FieldCache.getAuto) is now deprecated. Due to the merge of + ExtendedFieldCache and FieldCache, FieldCache can now additionally return + long[] and double[] arrays in addition to int[] and float[] and StringIndex. + + The interface changes are only notable for users implementing the interfaces, + which was unlikely done, because there is no possibility to change + Lucene's FieldCache implementation. (Grant Ingersoll, Uwe Schindler) + + * LUCENE-1630, LUCENE-1771: Weight, previously an interface, is now an abstract + class. Some of the method signatures have changed, but it should be fairly + easy to see what adjustments must be made to existing code to sync up + with the new API. You can find more detail in the API Changes section. + + Going forward Searchable will be kept for convenience only and may + be changed between minor releases without any deprecation + process. It is not recommended that you implement it, but rather extend + Searcher. + (Shai Erera, Chris Hostetter, Martin Ruckli, Mark Miller via Mike McCandless) + + * LUCENE-1422, LUCENE-1693: The new Attribute based TokenStream API (see below) + has some backwards breaks in rare cases. We did our best to make the + transition as easy as possible and you are not likely to run into any problems. + If your tokenizers still implement next(Token) or next(), the calls are + automatically wrapped. The indexer and query parser use the new API + (eg use incrementToken() calls). All core TokenStreams are implemented using + the new API. You can mix old and new API style TokenFilters/TokenStream. + Problems only occur when you have done the following: + You have overridden next(Token) or next() in one of the non-abstract core + TokenStreams/-Filters. These classes should normally be final, but some + of them are not. In this case, next(Token)/next() would never be called. + To fail early with a hard compile/runtime error, the next(Token)/next() + methods in these TokenStreams/-Filters were made final in this release. + (Michael Busch, Uwe Schindler) + + * LUCENE-1763: MergePolicy now requires an IndexWriter instance to + be passed upon instantiation. As a result, IndexWriter was removed + as a method argument from all MergePolicy methods. (Shai Erera via + Mike McCandless) + + * LUCENE-1748: LUCENE-1001 introduced PayloadSpans, but this was a back + compat break and caused custom SpanQuery implementations to fail at runtime + in a variety of ways. This issue attempts to remedy things by causing + a compile time break on custom SpanQuery implementations and removing + the PayloadSpans class, with its functionality now moved to Spans. To + help in alleviating future back compat pain, Spans has been changed from + an interface to an abstract class. + (Hugh Cayless, Mark Miller) + + * LUCENE-1808: Query.createWeight has been changed from protected to + public. This will be a back compat break if you have overridden this + method - but you are likely already affected by the LUCENE-1693 (make Weight + abstract rather than an interface) back compat break if you have overridden + Query.creatWeight, so we have taken the opportunity to make this change. + (Tim Smith, Shai Erera via Mark Miller) + + * LUCENE-1708 - IndexReader.document() no longer checks if the document is + deleted. You can call IndexReader.isDeleted(n) prior to calling document(n). + (Shai Erera via Mike McCandless) + + +Changes in runtime behavior + + * LUCENE-1424: QueryParser now by default uses constant score auto + rewriting when it generates a WildcardQuery and PrefixQuery (it + already does so for TermRangeQuery, as well). Call + setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) + to revert to slower BooleanQuery rewriting method. (Mark Miller via Mike + McCandless) + + * LUCENE-1575: As of 2.9, the core collectors as well as + IndexSearcher's search methods that return top N results, no + longer filter documents with scores <= 0.0. If you rely on this + functionality you can use PositiveScoresOnlyCollector like this: + + + TopDocsCollector tdc = new TopScoreDocCollector(10); + Collector c = new PositiveScoresOnlyCollector(tdc); + searcher.search(query, c); + TopDocs hits = tdc.topDocs(); + ... + + + * LUCENE-1604: IndexReader.norms(String field) is now allowed to + return null if the field has no norms, as long as you've + previously called IndexReader.setDisableFakeNorms(true). This + setting now defaults to false (to preserve the fake norms back + compatible behavior) but in 3.0 will be hardwired to true. (Shon + Vella via Mike McCandless). + + * LUCENE-1624: If you open IndexWriter with create=true and + autoCommit=false on an existing index, IndexWriter no longer + writes an empty commit when it's created. (Paul Taylor via Mike + McCandless) + + * LUCENE-1593: When you call Sort() or Sort.setSort(String field, + boolean reverse), the resulting SortField array no longer ends + with SortField.FIELD_DOC (it was unnecessary as Lucene breaks ties + internally by docID). (Shai Erera via Michael McCandless) + + * LUCENE-1542: When the first token(s) have 0 position increment, + IndexWriter used to incorrectly record the position as -1, if no + payload is present, or Integer.MAX_VALUE if a payload is present. + This causes positional queries to fail to match. The bug is now + fixed, but if your app relies on the buggy behavior then you must + call IndexWriter.setAllowMinus1Position(). That API is deprecated + so you must fix your application, and rebuild your index, to not + rely on this behavior by the 3.0 release of Lucene. (Jonathan + Mamou, Mark Miller via Mike McCandless) + + + * LUCENE-1715: Finalizers have been removed from the 4 core classes + that still had them, since they will cause GC to take longer, thus + tying up memory for longer, and at best they mask buggy app code. + DirectoryReader (returned from IndexReader.open) & IndexWriter + previously released the write lock during finalize. + SimpleFSDirectory.FSIndexInput closed the descriptor in its + finalizer, and NativeFSLock released the lock. It's possible + applications will be affected by this, but only if the application + is failing to close reader/writers. (Brian Groose via Mike + McCandless) + + * LUCENE-1717: Fixed IndexWriter to account for RAM usage of + buffered deletions. (Mike McCandless) + + * LUCENE-1727: Ensure that fields are stored & retrieved in the + exact order in which they were added to the document. This was + true in all Lucene releases before 2.3, but was broken in 2.3 and + 2.4, and is now fixed in 2.9. (Mike McCandless) + + * LUCENE-1678: The addition of Analyzer.reusableTokenStream + accidentally broke back compatibility of external analyzers that + subclassed core analyzers that implemented tokenStream but not + reusableTokenStream. This is now fixed, such that if + reusableTokenStream is invoked on such a subclass, that method + will forcefully fallback to tokenStream. (Mike McCandless) + + * LUCENE-1801: Token.clear() and Token.clearNoTermBuffer() now also clear + startOffset, endOffset and type. This is not likely to affect any + Tokenizer chains, as Tokenizers normally always set these three values. + This change was made to be conform to the new AttributeImpl.clear() and + AttributeSource.clearAttributes() to work identical for Token as one for all + AttributeImpl and the 6 separate AttributeImpls. (Uwe Schindler, Michael Busch) + + * LUCENE-1483: When searching over multiple segments, a new Scorer is now created + for each segment. Searching has been telescoped out a level and IndexSearcher now + operates much like MultiSearcher does. The Weight is created only once for the top + level Searcher, but each Scorer is passed a per-segment IndexReader. This will + result in doc ids in the Scorer being internal to the per-segment IndexReader. It + has always been outside of the API to count on a given IndexReader to contain every + doc id in the index - and if you have been ignoring MultiSearcher in your custom code + and counting on this fact, you will find your code no longer works correctly. If a + custom Scorer implementation uses any caches/filters that rely on being based on the + top level IndexReader, it will need to be updated to correctly use contextless + caches/filters eg you can't count on the IndexReader to contain any given doc id or + all of the doc ids. (Mark Miller, Mike McCandless) + + * LUCENE-1846: DateTools now uses the US locale to format the numbers in its + date/time strings instead of the default locale. For most locales there will + be no change in the index format, as DateFormatSymbols is using ASCII digits. + The usage of the US locale is important to guarantee correct ordering of + generated terms. (Uwe Schindler) + + * LUCENE-1860: MultiTermQuery now defaults to + CONSTANT_SCORE_AUTO_REWRITE_DEFAULT rewrite method (previously it + was SCORING_BOOLEAN_QUERY_REWRITE). This means that PrefixQuery + and WildcardQuery will now produce constant score for all matching + docs, equal to the boost of the query. (Mike McCandless) + +API Changes + + * LUCENE-1419: Add expert API to set custom indexing chain. This API is + package-protected for now, so we don't have to officially support it. + Yet, it will give us the possibility to try out different consumers + in the chain. (Michael Busch) + + * LUCENE-1427: DocIdSet.iterator() is now allowed to throw + IOException. (Paul Elschot, Mike McCandless) + + * LUCENE-1422, LUCENE-1693: New TokenStream API that uses a new class called + AttributeSource instead of the Token class, which is now a utility class that + holds common Token attributes. All attributes that the Token class had have + been moved into separate classes: TermAttribute, OffsetAttribute, + PositionIncrementAttribute, PayloadAttribute, TypeAttribute and FlagsAttribute. + The new API is much more flexible; it allows to combine the Attributes + arbitrarily and also to define custom Attributes. The new API has the same + performance as the old next(Token) approach. For conformance with this new + API Tee-/SinkTokenizer was deprecated and replaced by a new TeeSinkTokenFilter. + (Michael Busch, Uwe Schindler; additional contributions and bug fixes by + Daniel Shane, Doron Cohen) + + * LUCENE-1467: Add nextDoc() and next(int) methods to OpenBitSetIterator. + These methods can be used to avoid additional calls to doc(). + (Michael Busch) + + * LUCENE-1468: Deprecate Directory.list(), which sometimes (in + FSDirectory) filters out files that don't look like index files, in + favor of new Directory.listAll(), which does no filtering. Also, + listAll() will never return null; instead, it throws an IOException + (or subclass). Specifically, FSDirectory.listAll() will throw the + newly added NoSuchDirectoryException if the directory does not + exist. (Marcel Reutegger, Mike McCandless) + + * LUCENE-1546: Add IndexReader.flush(Map commitUserData), allowing + you to record an opaque commitUserData (maps String -> String) into + the commit written by IndexReader. This matches IndexWriter's + commit methods. (Jason Rutherglen via Mike McCandless) + + * LUCENE-652: Added org.apache.lucene.document.CompressionTools, to + enable compressing & decompressing binary content, external to + Lucene's indexing. Deprecated Field.Store.COMPRESS. + + * LUCENE-1561: Renamed Field.omitTf to Field.omitTermFreqAndPositions + (Otis Gospodnetic via Mike McCandless) + + * LUCENE-1500: Added new InvalidTokenOffsetsException to Highlighter methods + to denote issues when offsets in TokenStream tokens exceed the length of the + provided text. (Mark Harwood) + + * LUCENE-1575, LUCENE-1483: HitCollector is now deprecated in favor of + a new Collector abstract class. For easy migration, people can use + HitCollectorWrapper which translates (wraps) HitCollector into + Collector. Note that this class is also deprecated and will be + removed when HitCollector is removed. Also TimeLimitedCollector + is deprecated in favor of the new TimeLimitingCollector which + extends Collector. (Shai Erera, Mark Miller, Mike McCandless) + + * LUCENE-1592: The method TermsEnum.skipTo() was deprecated, because + it is used nowhere in core/contrib and there is only a very ineffective + default implementation available. If you want to position a TermEnum + to another Term, create a new one using IndexReader.terms(Term). + (Uwe Schindler) + + * LUCENE-1621: MultiTermQuery.getTerm() has been deprecated as it does + not make sense for all subclasses of MultiTermQuery. Check individual + subclasses to see if they support getTerm(). (Mark Miller) + + * LUCENE-1636: Make TokenFilter.input final so it's set only + once. (Wouter Heijke, Uwe Schindler via Mike McCandless). + + * LUCENE-1658, LUCENE-1451: Renamed FSDirectory to SimpleFSDirectory + (but left an FSDirectory base class). Added an FSDirectory.open + static method to pick a good default FSDirectory implementation + given the OS. FSDirectories should now be instantiated using + FSDirectory.open or with public constructors rather than + FSDirectory.getDirectory(), which has been deprecated. + (Michael McCandless, Uwe Schindler, yonik) + + * LUCENE-1665: Deprecate SortField.AUTO, to be removed in 3.0. + Instead, when sorting by field, the application should explicitly + state the type of the field. (Mike McCandless) + + * LUCENE-1660: StopFilter, StandardAnalyzer, StopAnalyzer now + require up front specification of enablePositionIncrement (Mike + McCandless) + + * LUCENE-1614: DocIdSetIterator's next() and skipTo() were deprecated in favor + of the new nextDoc() and advance(). The new methods return the doc Id they + landed on, saving an extra call to doc() in most cases. + For easy migration of the code, you can change the calls to next() to + nextDoc() != DocIdSetIterator.NO_MORE_DOCS and similarly for skipTo(). + However it is advised that you take advantage of the returned doc ID and not + call doc() following those two. + Also, doc() was deprecated in favor of docID(). docID() should return -1 or + NO_MORE_DOCS if nextDoc/advance were not called yet, or NO_MORE_DOCS if the + iterator has exhausted. Otherwise it should return the current doc ID. + (Shai Erera via Mike McCandless) + + * LUCENE-1672: All ctors/opens and other methods using String/File to + specify the directory in IndexReader, IndexWriter, and IndexSearcher + were deprecated. You should instantiate the Directory manually before + and pass it to these classes (LUCENE-1451, LUCENE-1658). + (Uwe Schindler) + + * LUCENE-1407: Move RemoteSearchable, RemoteCachingWrapperFilter out + of Lucene's core into new contrib/remote package. Searchable no + longer extends java.rmi.Remote (Simon Willnauer via Mike + McCandless) + + * LUCENE-1677: The global property + org.apache.lucene.SegmentReader.class, and + ReadOnlySegmentReader.class are now deprecated, to be removed in + 3.0. src/gcj/* has been removed. (Earwin Burrfoot via Mike + McCandless) + + * LUCENE-1673: Deprecated NumberTools in favour of the new + NumericRangeQuery and its new indexing format for numeric or + date values. (Uwe Schindler) + + * LUCENE-1630, LUCENE-1771: Weight is now an abstract class, and adds + a scorer(IndexReader, boolean /* scoreDocsInOrder */, boolean /* + topScorer */) method instead of scorer(IndexReader). IndexSearcher uses + this method to obtain a scorer matching the capabilities of the Collector + wrt orderedness of docIDs. Some Scorers (like BooleanScorer) are much more + efficient if out-of-order documents scoring is allowed by a Collector. + Collector must now implement acceptsDocsOutOfOrder. If you write a + Collector which does not care about doc ID orderness, it is recommended + that you return true. Weight has a scoresDocsOutOfOrder method, which by + default returns false. If you create a Weight which will score documents + out of order if requested, you should override that method to return true. + BooleanQuery's setAllowDocsOutOfOrder and getAllowDocsOutOfOrder have been + deprecated as they are not needed anymore. BooleanQuery will now score docs + out of order when used with a Collector that can accept docs out of order. + Finally, Weight#explain now takes a sub-reader and sub-docID, rather than + a top level reader and docID. + (Shai Erera, Chris Hostetter, Martin Ruckli, Mark Miller via Mike McCandless) + + * LUCENE-1466, LUCENE-1906: Added CharFilter and MappingCharFilter, which allows + chaining & mapping of characters before tokenizers run. CharStream (subclass of + Reader) is the base class for custom java.io.Reader's, that support offset + correction. Tokenizers got an additional method correctOffset() that is passed + down to the underlying CharStream if input is a subclass of CharStream/-Filter. + (Koji Sekiguchi via Mike McCandless, Uwe Schindler) + + * LUCENE-1703: Add IndexWriter.waitForMerges. (Tim Smith via Mike + McCandless) + + * LUCENE-1625: CheckIndex's programmatic API now returns separate + classes detailing the status of each component in the index, and + includes more detailed status than previously. (Tim Smith via + Mike McCandless) + + * LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed to + TermRangeQuery and TermRangeFilter. TermRangeQuery is in constant + score auto rewrite mode by default. The new classes also have new + ctors taking field and term ranges as Strings (see also + LUCENE-1424). (Uwe Schindler) + + * LUCENE-1609: The termInfosIndexDivisor must now be specified + up-front when opening the IndexReader. Attempts to call + IndexReader.setTermInfosIndexDivisor will hit an + UnsupportedOperationException. This was done to enable removal of + all synchronization in TermInfosReader, which previously could + cause threads to pile up in certain cases. (Dan Rosher via Mike + McCandless) + + * LUCENE-1688: Deprecate static final String stop word array in and + StopAnalzyer and replace it with an immutable implementation of + CharArraySet. (Simon Willnauer via Mark Miller) + + * LUCENE-1742: SegmentInfos, SegmentInfo and SegmentReader have been + made public as expert, experimental APIs. These APIs may suddenly + change from release to release (Jason Rutherglen via Mike + McCandless). + + * LUCENE-1754: QueryWeight.scorer() can return null if no documents + are going to be matched by the query. Similarly, + Filter.getDocIdSet() can return null if no documents are going to + be accepted by the Filter. Note that these 'can' return null, + however they don't have to and can return a Scorer/DocIdSet which + does not match / reject all documents. This is already the + behavior of some QueryWeight/Filter implementations, and is + documented here just for emphasis. (Shai Erera via Mike + McCandless) + + * LUCENE-1705: Added IndexWriter.deleteAllDocuments. (Tim Smith via + Mike McCandless) + + * LUCENE-1460: Changed TokenStreams/TokenFilters in contrib to + use the new TokenStream API. (Robert Muir, Michael Busch) + + * LUCENE-1748: LUCENE-1001 introduced PayloadSpans, but this was a back + compat break and caused custom SpanQuery implementations to fail at runtime + in a variety of ways. This issue attempts to remedy things by causing + a compile time break on custom SpanQuery implementations and removing + the PayloadSpans class, with its functionality now moved to Spans. To + help in alleviating future back compat pain, Spans has been changed from + an interface to an abstract class. + (Hugh Cayless, Mark Miller) + + * LUCENE-1808: Query.createWeight has been changed from protected to + public. (Tim Smith, Shai Erera via Mark Miller) + + * LUCENE-1826: Add constructors that take AttributeSource and + AttributeFactory to all Tokenizer implementations. + (Michael Busch) + + * LUCENE-1847: Similarity#idf for both a Term and Term Collection have + been deprecated. New versions that return an IDFExplanation have been + added. (Yasoja Seneviratne, Mike McCandless, Mark Miller) + + * LUCENE-1877: Made NativeFSLockFactory the default for + the new FSDirectory API (open(), FSDirectory subclass ctors). + All FSDirectory system properties were deprecated and all lock + implementations use no lock prefix if the locks are stored inside + the index directory. Because the deprecated String/File ctors of + IndexWriter and IndexReader (LUCENE-1672) and FSDirectory.getDirectory() + still use the old SimpleFSLockFactory and the new API + NativeFSLockFactory, we strongly recommend not to mix deprecated + and new API. (Uwe Schindler, Mike McCandless) + + * LUCENE-1911: Added a new method isCacheable() to DocIdSet. This method + should return true, if the underlying implementation does not use disk + I/O and is fast enough to be directly cached by CachingWrapperFilter. + OpenBitSet, SortedVIntList, and DocIdBitSet are such candidates. + The default implementation of the abstract DocIdSet class returns false. + In this case, CachingWrapperFilter copies the DocIdSetIterator into an + OpenBitSet for caching. (Uwe Schindler, Thomas Becker) + +Bug fixes + + * LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals() + implementation - Leads to Solr Cache misses. + (Todd Feak, Mark Miller via yonik) + + * LUCENE-1327: Fix TermSpans#skipTo() to behave as specified in javadocs + of Terms#skipTo(). (Michael Busch) + + * LUCENE-1573: Do not ignore InterruptedException (caused by + Thread.interrupt()) nor enter deadlock/spin loop. Now, an interrupt + will cause a RuntimeException to be thrown. In 3.0 we will change + public APIs to throw InterruptedException. (Jeremy Volkman via + Mike McCandless) + + * LUCENE-1590: Fixed stored-only Field instances do not change the + value of omitNorms, omitTermFreqAndPositions in FieldInfo; when you + retrieve such fields they will now have omitNorms=true and + omitTermFreqAndPositions=false (though these values are unused). + (Uwe Schindler via Mike McCandless) + + * LUCENE-1587: RangeQuery#equals() could consider a RangeQuery + without a collator equal to one with a collator. + (Mark Platvoet via Mark Miller) + + * LUCENE-1600: Don't call String.intern unnecessarily in some cases + when loading documents from the index. (P Eger via Mike + McCandless) + + * LUCENE-1611: Fix case where OutOfMemoryException in IndexWriter + could cause "infinite merging" to happen. (Christiaan Fluit via + Mike McCandless) + + * LUCENE-1623: Properly handle back-compatibility of 2.3.x indexes that + contain field names with non-ascii characters. (Mike Streeton via + Mike McCandless) + + * LUCENE-1593: MultiSearcher and ParallelMultiSearcher did not break ties (in + sort) by doc Id in a consistent manner (i.e., if Sort.FIELD_DOC was used vs. + when it wasn't). (Shai Erera via Michael McCandless) + + * LUCENE-1647: Fix case where IndexReader.undeleteAll would cause + the segment's deletion count to be incorrect. (Mike McCandless) + + * LUCENE-1542: When the first token(s) have 0 position increment, + IndexWriter used to incorrectly record the position as -1, if no + payload is present, or Integer.MAX_VALUE if a payload is present. + This causes positional queries to fail to match. The bug is now + fixed, but if your app relies on the buggy behavior then you must + call IndexWriter.setAllowMinus1Position(). That API is deprecated + so you must fix your application, and rebuild your index, to not + rely on this behavior by the 3.0 release of Lucene. (Jonathan + Mamou, Mark Miller via Mike McCandless) + + * LUCENE-1658: Fixed MMapDirectory to correctly throw IOExceptions + on EOF, removed numeric overflow possibilities and added support + for a hack to unmap the buffers on closing IndexInput. + (Uwe Schindler) + + * LUCENE-1681: Fix infinite loop caused by a call to DocValues methods + getMinValue, getMaxValue, getAverageValue. (Simon Willnauer via Mark Miller) + + * LUCENE-1599: Add clone support for SpanQuerys. SpanRegexQuery counts + on this functionality and does not work correctly without it. + (Billow Gao, Mark Miller) + + * LUCENE-1718: Fix termInfosIndexDivisor to carry over to reopened + readers (Mike McCandless) + + * LUCENE-1583: SpanOrQuery skipTo() doesn't always move forwards as Spans + documentation indicates it should. (Moti Nisenson via Mark Miller) + + * LUCENE-1566: Sun JVM Bug + http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6478546 causes + invalid OutOfMemoryError when reading too many bytes at once from + a file on 32bit JVMs that have a large maximum heap size. This + fix adds set/getReadChunkSize to FSDirectory so that large reads + are broken into chunks, to work around this JVM bug. On 32bit + JVMs the default chunk size is 100 MB; on 64bit JVMs, which don't + show the bug, the default is Integer.MAX_VALUE. (Simon Willnauer + via Mike McCandless) + + * LUCENE-1448: Added TokenStream.end() to perform end-of-stream + operations (ie to return the end offset of the tokenization). + This is important when multiple fields with the same name are added + to a document, to ensure offsets recorded in term vectors for all + of the instances are correct. + (Mike McCandless, Mark Miller, Michael Busch) + + * LUCENE-1805: CloseableThreadLocal did not allow a null Object in get(), + although it does allow it in set(Object). Fix get() to not assert the object + is not null. (Shai Erera via Mike McCandless) + + * LUCENE-1801: Changed all Tokenizers or TokenStreams in core/contrib) + that are the source of Tokens to always call + AttributeSource.clearAttributes() first. (Uwe Schindler) + + * LUCENE-1819: MatchAllDocsQuery.toString(field) should produce output + that is parsable by the QueryParser. (John Wang, Mark Miller) + + * LUCENE-1836: Fix localization bug in the new query parser and add + new LocalizedTestCase as base class for localization junit tests. + (Robert Muir, Uwe Schindler via Michael Busch) + + * LUCENE-1847: PhraseQuery/TermQuery/SpanQuery use IndexReader specific stats + in their Weight#explain methods - these stats should be corpus wide. + (Yasoja Seneviratne, Mike McCandless, Mark Miller) + + * LUCENE-1885: Fix the bug that NativeFSLock.isLocked() did not work, + if the lock was obtained by another NativeFSLock(Factory) instance. + Because of this IndexReader.isLocked() and IndexWriter.isLocked() did + not work correctly. (Uwe Schindler) + + * LUCENE-1899: Fix O(N^2) CPU cost when setting docIDs in order in an + OpenBitSet, due to an inefficiency in how the underlying storage is + reallocated. (Nadav Har'El via Mike McCandless) + + * LUCENE-1918: Fixed cases where a ParallelReader would + generate exceptions on being passed to + IndexWriter.addIndexes(IndexReader[]). First case was when the + ParallelReader was empty. Second case was when the ParallelReader + used to contain documents with TermVectors, but all such documents + have been deleted. (Christian Kohlschütter via Mike McCandless) + +New features + + * LUCENE-1411: Added expert API to open an IndexWriter on a prior + commit, obtained from IndexReader.listCommits. This makes it + possible to rollback changes to an index even after you've closed + the IndexWriter that made the changes, assuming you are using an + IndexDeletionPolicy that keeps past commits around. This is useful + when building transactional support on top of Lucene. (Mike + McCandless) + + * LUCENE-1382: Add an optional arbitrary Map (String -> String) + "commitUserData" to IndexWriter.commit(), which is stored in the + segments file and is then retrievable via + IndexReader.getCommitUserData instance and static methods. + (Shalin Shekhar Mangar via Mike McCandless) + + * LUCENE-1420: Similarity now has a computeNorm method that allows + custom Similarity classes to override how norm is computed. It's + provided a FieldInvertState instance that contains details from + inverting the field. The default impl is boost * + lengthNorm(numTerms), to be backwards compatible. Also added + {set/get}DiscountOverlaps to DefaultSimilarity, to control whether + overlapping tokens (tokens with 0 position increment) should be + counted in lengthNorm. (Andrzej Bialecki via Mike McCandless) + + * LUCENE-1424: Moved constant score query rewrite capability into + MultiTermQuery, allowing TermRangeQuery, PrefixQuery and WildcardQuery + to switch between constant-score rewriting or BooleanQuery + expansion rewriting via a new setRewriteMethod method. + Deprecated ConstantScoreRangeQuery (Mark Miller via Mike + McCandless) + + * LUCENE-1461: Added FieldCacheRangeFilter, a RangeFilter for + single-term fields that uses FieldCache to compute the filter. If + your documents all have a single term for a given field, and you + need to create many RangeFilters with varying lower/upper bounds, + then this is likely a much faster way to create the filters than + RangeFilter. FieldCacheRangeFilter allows ranges on all data types, + FieldCache supports (term ranges, byte, short, int, long, float, double). + However, it comes at the expense of added RAM consumption and slower + first-time usage due to populating the FieldCache. It also does not + support collation (Tim Sturge, Matt Ericson via Mike McCandless and + Uwe Schindler) + + * LUCENE-1296: add protected method CachingWrapperFilter.docIdSetToCache + to allow subclasses to choose which DocIdSet implementation to use + (Paul Elschot via Mike McCandless) + + * LUCENE-1390: Added ASCIIFoldingFilter, a Filter that converts + alphabetic, numeric, and symbolic Unicode characters which are not in + the first 127 ASCII characters (the "Basic Latin" Unicode block) into + their ASCII equivalents, if one exists. ISOLatin1AccentFilter, which + handles a subset of this filter, has been deprecated. + (Andi Vajda, Steven Rowe via Mark Miller) + + * LUCENE-1478: Added new SortField constructor allowing you to + specify a custom FieldCache parser to generate numeric values from + terms for a field. (Uwe Schindler via Mike McCandless) + + * LUCENE-1528: Add support for Ideographic Space to the queryparser. + (Luis Alves via Michael Busch) + + * LUCENE-1487: Added FieldCacheTermsFilter, to filter by multiple + terms on single-valued fields. The filter loads the FieldCache + for the field the first time it's called, and subsequent usage of + that field, even with different Terms in the filter, are fast. + (Tim Sturge, Shalin Shekhar Mangar via Mike McCandless). + + * LUCENE-1314: Add clone(), clone(boolean readOnly) and + reopen(boolean readOnly) to IndexReader. Cloning an IndexReader + gives you a new reader which you can make changes to (deletions, + norms) without affecting the original reader. Now, with clone or + reopen you can change the readOnly of the original reader. (Jason + Rutherglen, Mike McCandless) + + * LUCENE-1506: Added FilteredDocIdSet, an abstract class which you + subclass to implement the "match" method to accept or reject each + docID. Unlike ChainedFilter (under contrib/misc), + FilteredDocIdSet never requires you to materialize the full + bitset. Instead, match() is called on demand per docID. (John + Wang via Mike McCandless) + + * LUCENE-1398: Add ReverseStringFilter to contrib/analyzers, a filter + to reverse the characters in each token. (Koji Sekiguchi via yonik) + + * LUCENE-1551: Add expert IndexReader.reopen(IndexCommit) to allow + efficiently opening a new reader on a specific commit, sharing + resources with the original reader. (Torin Danil via Mike + McCandless) + + * LUCENE-1434: Added org.apache.lucene.util.IndexableBinaryStringTools, + to encode byte[] as String values that are valid terms, and + maintain sort order of the original byte[] when the bytes are + interpreted as unsigned. (Steven Rowe via Mike McCandless) + + * LUCENE-1543: Allow MatchAllDocsQuery to optionally use norms from + a specific fields to set the score for a document. (Karl Wettin + via Mike McCandless) + + * LUCENE-1586: Add IndexReader.getUniqueTermCount(). (Mike + McCandless via Derek) + + * LUCENE-1516: Added "near real-time search" to IndexWriter, via a + new expert getReader() method. This method returns a reader that + searches the full index, including any uncommitted changes in the + current IndexWriter session. This should result in a faster + turnaround than the normal approach of commiting the changes and + then reopening a reader. (Jason Rutherglen via Mike McCandless) + + * LUCENE-1603: Added new MultiTermQueryWrapperFilter, to wrap any + MultiTermQuery as a Filter. Also made some improvements to + MultiTermQuery: return DocIdSet.EMPTY_DOCIDSET if there are no + terms in the enum; track the total number of terms it visited + during rewrite (getTotalNumberOfTerms). FilteredTermEnum is also + more friendly to subclassing. (Uwe Schindler via Mike McCandless) + + * LUCENE-1605: Added BitVector.subset(). (Jeremy Volkman via Mike + McCandless) + + * LUCENE-1618: Added FileSwitchDirectory that enables files with + specified extensions to be stored in a primary directory and the + rest of the files to be stored in the secondary directory. For + example, this can be useful for the large doc-store (stored + fields, term vectors) files in FSDirectory and the rest of the + index files in a RAMDirectory. (Jason Rutherglen via Mike + McCandless) + + * LUCENE-1494: Added FieldMaskingSpanQuery which can be used to + cross-correlate Spans from different fields. + (Paul Cowan and Chris Hostetter) + + * LUCENE-1634: Add calibrateSizeByDeletes to LogMergePolicy, to take + deletions into account when considering merges. (Yasuhiro Matsuda + via Mike McCandless) + + * LUCENE-1550: Added new n-gram based String distance measure for spell checking. + See the Javadocs for NGramDistance.java for a reference paper on why + this is helpful (Tom Morton via Grant Ingersoll) + + * LUCENE-1470, LUCENE-1582, LUCENE-1602, LUCENE-1673, LUCENE-1701, LUCENE-1712: + Added NumericRangeQuery and NumericRangeFilter, a fast alternative to + RangeQuery/RangeFilter for numeric searches. They depend on a specific + structure of terms in the index that can be created by indexing + using the new NumericField or NumericTokenStream classes. NumericField + can only be used for indexing and optionally stores the values as + string representation in the doc store. Documents returned from + IndexReader/IndexSearcher will return only the String value using + the standard Fieldable interface. NumericFields can be sorted on + and loaded into the FieldCache. (Uwe Schindler, Yonik Seeley, + Mike McCandless) + + * LUCENE-1405: Added support for Ant resource collections in contrib/ant + task. (Przemyslaw Sztoch via Erik Hatcher) + + * LUCENE-1699: Allow setting a TokenStream on Field/Fieldable for indexing + in conjunction with any other ways to specify stored field values, + currently binary or string values. (yonik) + + * LUCENE-1701: Made the standard FieldCache.Parsers public and added + parsers for fields generated using NumericField/NumericTokenStream. + All standard parsers now also implement Serializable and enforce + their singleton status. (Uwe Schindler, Mike McCandless) + + * LUCENE-1741: User configurable maximum chunk size in MMapDirectory. + On 32 bit platforms, the address space can be very fragmented, so + one big ByteBuffer for the whole file may not fit into address space. + (Eks Dev via Uwe Schindler) + + * LUCENE-1644: Enable 4 rewrite modes for queries deriving from + MultiTermQuery (WildcardQuery, PrefixQuery, TermRangeQuery, + NumericRangeQuery): CONSTANT_SCORE_FILTER_REWRITE first creates a + filter and then assigns constant score (boost) to docs; + CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE create a BooleanQuery but + uses a constant score (boost); SCORING_BOOLEAN_QUERY_REWRITE also + creates a BooleanQuery but keeps the BooleanQuery's scores; + CONSTANT_SCORE_AUTO_REWRITE tries to pick the most performant + constant-score rewrite method. (Mike McCandless) + + * LUCENE-1448: Added TokenStream.end(), to perform end-of-stream + operations. This is currently used to fix offset problems when + multiple fields with the same name are added to a document. + (Mike McCandless, Mark Miller, Michael Busch) + + * LUCENE-1776: Add an option to not collect payloads for an ordered + SpanNearQuery. Payloads were not lazily loaded in this case as + the javadocs implied. If you have payloads and want to use an ordered + SpanNearQuery that does not need to use the payloads, you can + disable loading them with a new constructor switch. (Mark Miller) + + * LUCENE-1341: Added PayloadNearQuery to enable SpanNearQuery functionality + with payloads (Peter Keegan, Grant Ingersoll, Mark Miller) + + * LUCENE-1790: Added PayloadTermQuery to enable scoring of payloads + based on the maximum payload seen for a document. + Slight refactoring of Similarity and other payload queries (Grant Ingersoll, Mark Miller) + + * LUCENE-1749: Addition of FieldCacheSanityChecker utility, and + hooks to use it in all existing Lucene Tests. This class can + be used by any application to inspect the FieldCache and provide + diagnostic information about the possibility of inconsistent + FieldCache usage. Namely: FieldCache entries for the same field + with different datatypes or parsers; and FieldCache entries for + the same field in both a reader, and one of it's (descendant) sub + readers. + (Chris Hostetter, Mark Miller) + + * LUCENE-1789: Added utility class + oal.search.function.MultiValueSource to ease the transition to + segment based searching for any apps that directly call + oal.search.function.* APIs. This class wraps any other + ValueSource, but takes care when composite (multi-segment) are + passed to not double RAM usage in the FieldCache. (Chris + Hostetter, Mark Miller, Mike McCandless) + +Optimizations + + * LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing + scores of the query, since they are just discarded. Also, made it + more efficient (single pass) by not creating & populating an + intermediate OpenBitSet (Paul Elschot, Mike McCandless) + + * LUCENE-1443: Performance improvement for OpenBitSetDISI.inPlaceAnd() + (Paul Elschot via yonik) + + * LUCENE-1484: Remove synchronization of IndexReader.document() by + using CloseableThreadLocal internally. (Jason Rutherglen via Mike + McCandless). + + * LUCENE-1124: Short circuit FuzzyQuery.rewrite when input token length + is small compared to minSimilarity. (Timo Nentwig, Mark Miller) + + * LUCENE-1316: MatchAllDocsQuery now avoids the synchronized + IndexReader.isDeleted() call per document, by directly accessing + the underlying deleteDocs BitVector. This improves performance + with non-readOnly readers, especially in a multi-threaded + environment. (Todd Feak, Yonik Seeley, Jason Rutherglen via Mike + McCandless) + + * LUCENE-1483: When searching over multiple segments we now visit + each sub-reader one at a time. This speeds up warming, since + FieldCache entries (if required) can be shared across reopens for + those segments that did not change, and also speeds up searches + that sort by relevance or by field values. (Mark Miller, Mike + McCandless) + + * LUCENE-1575: The new Collector class decouples collect() from + score computation. Collector.setScorer is called to establish the + current Scorer in-use per segment. Collectors that require the + score should then call Scorer.score() per hit inside + collect(). (Shai Erera via Mike McCandless) + + * LUCENE-1596: MultiTermDocs speedup when set with + MultiTermDocs.seek(MultiTermEnum) (yonik) + + * LUCENE-1653: Avoid creating a Calendar in every call to + DateTools#dateToString, DateTools#timeToString and + DateTools#round. (Shai Erera via Mark Miller) + + * LUCENE-1688: Deprecate static final String stop word array and + replace it with an immutable implementation of CharArraySet. + Removes conversions between Set and array. + (Simon Willnauer via Mark Miller) + + * LUCENE-1754: BooleanQuery.queryWeight.scorer() will return null if + it won't match any documents (e.g. if there are no required and + optional scorers, or not enough optional scorers to satisfy + minShouldMatch). (Shai Erera via Mike McCandless) + + * LUCENE-1607: To speed up string interning for commonly used + strings, the StringHelper.intern() interface was added with a + default implementation that uses a lockless cache. + (Earwin Burrfoot, yonik) + + * LUCENE-1800: QueryParser should use reusable TokenStreams. (yonik) + + +Documentation + + * LUCENE-1908: Scoring documentation imrovements in Similarity javadocs. + (Mark Miller, Shai Erera, Ted Dunning, Jiri Kuhn, Marvin Humphrey, Doron Cohen) + + * LUCENE-1872: NumericField javadoc improvements + (Michael McCandless, Uwe Schindler) + + * LUCENE-1875: Make TokenStream.end javadoc less confusing. + (Uwe Schindler) + + * LUCENE-1862: Rectified duplicate package level javadocs for + o.a.l.queryParser and o.a.l.analysis.cn. + (Chris Hostetter) + + * LUCENE-1886: Improved hyperlinking in key Analysis javadocs + (Bernd Fondermann via Chris Hostetter) + + * LUCENE-1884: massive javadoc and comment cleanup, primarily dealing with + typos. + (Robert Muir via Chris Hostetter) + + * LUCENE-1898: Switch changes to use bullets rather than numbers and + update changes-to-html script to handle the new format. + (Steven Rowe, Mark Miller) + + * LUCENE-1900: Improve Searchable Javadoc. + (Nadav Har'El, Doron Cohen, Marvin Humphrey, Mark Miller) + + * LUCENE-1896: Improve Similarity#queryNorm javadocs. + (Jiri Kuhn, Mark Miller) + +Build + + * LUCENE-1440: Add new targets to build.xml that allow downloading + and executing the junit testcases from an older release for + backwards-compatibility testing. (Michael Busch) + + * LUCENE-1446: Add compatibility tag to common-build.xml and run + backwards-compatibility tests in the nightly build. (Michael Busch) + + * LUCENE-1529: Properly test "drop-in" replacement of jar with + backwards-compatibility tests. (Mike McCandless, Michael Busch) + + * LUCENE-1851: Change 'javacc' and 'clean-javacc' targets to build + and clean contrib/surround files. (Luis Alves via Michael Busch) + + * LUCENE-1854: tar task should use longfile="gnu" to avoid false file + name length warnings. (Mark Miller) + +Test Cases + + * LUCENE-1791: Enhancements to the QueryUtils and CheckHits utility + classes to wrap IndexReaders and Searchers in MultiReaders or + MultiSearcher when possible to help exercise more edge cases. + (Chris Hostetter, Mark Miller) + + * LUCENE-1852: Fix localization test failures. + (Robert Muir via Michael Busch) + + * LUCENE-1843: Refactored all tests that use assertAnalyzesTo() & others + in core and contrib to use a new BaseTokenStreamTestCase + base class. Also rewrote some tests to use this general analysis assert + functions instead of own ones (e.g. TestMappingCharFilter). + The new base class also tests tokenization with the TokenStream.next() + backwards layer enabled (using Token/TokenWrapper as attribute + implementation) and disabled (default for Lucene 3.0) + (Uwe Schindler, Robert Muir) + + * LUCENE-1836: Added a new LocalizedTestCase as base class for localization + junit tests. (Robert Muir, Uwe Schindler via Michael Busch) + +======================= Release 2.4.1 ======================= + +API Changes + +1. LUCENE-1186: Add Analyzer.close() to free internal ThreadLocal + resources. (Christian Kohlschütter via Mike McCandless) + +Bug fixes + +1. LUCENE-1452: Fixed silent data-loss case whereby binary fields are + truncated to 0 bytes during merging if the segments being merged + are non-congruent (same field name maps to different field + numbers). This bug was introduced with LUCENE-1219. (Andrzej + Bialecki via Mike McCandless). + +2. LUCENE-1429: Don't throw incorrect IllegalStateException from + IndexWriter.close() if you've hit an OOM when autoCommit is true. + (Mike McCandless) + +3. LUCENE-1474: If IndexReader.flush() is called twice when there were + pending deletions, it could lead to later false AssertionError + during IndexReader.open. (Mike McCandless) + +4. LUCENE-1430: Fix false AlreadyClosedException from IndexReader.open + (masking an actual IOException) that takes String or File path. + (Mike McCandless) + +5. LUCENE-1442: Multiple-valued NOT_ANALYZED fields can double-count + token offsets. (Mike McCandless) + +6. LUCENE-1453: Ensure IndexReader.reopen()/clone() does not result in + incorrectly closing the shared FSDirectory. This bug would only + happen if you use IndexReader.open() with a File or String argument. + The returned readers are wrapped by a FilterIndexReader that + correctly handles closing of directory after reopen()/clone(). + (Mark Miller, Uwe Schindler, Mike McCandless) + +7. LUCENE-1457: Fix possible overflow bugs during binary + searches. (Mark Miller via Mike McCandless) + +8. LUCENE-1459: Fix CachingWrapperFilter to not throw exception if + both bits() and getDocIdSet() methods are called. (Matt Jones via + Mike McCandless) + +9. LUCENE-1519: Fix int overflow bug during segment merging. (Deepak + via Mike McCandless) + +10. LUCENE-1521: Fix int overflow bug when flushing segment. + (Shon Vella via Mike McCandless). + +11. LUCENE-1544: Fix deadlock in IndexWriter.addIndexes(IndexReader[]). + (Mike McCandless via Doug Sale) + +12. LUCENE-1547: Fix rare thread safety issue if two threads call + IndexWriter commit() at the same time. (Mike McCandless) + +13. LUCENE-1465: NearSpansOrdered returns payloads from first possible match + rather than the correct, shortest match; Payloads could be returned even + if the max slop was exceeded; The wrong payload could be returned in + certain situations. (Jonathan Mamou, Greg Shackles, Mark Miller) + +14. LUCENE-1186: Add Analyzer.close() to free internal ThreadLocal + resources. (Christian Kohlschütter via Mike McCandless) + +15. LUCENE-1552: Fix IndexWriter.addIndexes(IndexReader[]) to properly + rollback IndexWriter's internal state on hitting an + exception. (Scott Garland via Mike McCandless) + +======================= Release 2.4.0 ======================= + +Changes in backwards compatibility policy + +1. LUCENE-1340: In a minor change to Lucene's backward compatibility + policy, we are now allowing the Fieldable interface to have + changes, within reason, and made on a case-by-case basis. If an + application implements it's own Fieldable, please be aware of + this. Otherwise, no need to be concerned. This is in effect for + all 2.X releases, starting with 2.4. Also note, that in all + likelihood, Fieldable will be changed in 3.0. + + +Changes in runtime behavior + + 1. LUCENE-1151: Fix StandardAnalyzer to not mis-identify host names + (eg lucene.apache.org) as an ACRONYM. To get back to the pre-2.4 + backwards compatible, but buggy, behavior, you can either call + StandardAnalyzer.setDefaultReplaceInvalidAcronym(false) (static + method), or, set system property + org.apache.lucene.analysis.standard.StandardAnalyzer.replaceInvalidAcronym + to "false" on JVM startup. All StandardAnalyzer instances created + after that will then show the pre-2.4 behavior. Alternatively, + you can call setReplaceInvalidAcronym(false) to change the + behavior per instance of StandardAnalyzer. This backwards + compatibility will be removed in 3.0 (hardwiring the value to + true). (Mike McCandless) + + 2. LUCENE-1044: IndexWriter with autoCommit=true now commits (such + that a reader can see the changes) far less often than it used to. + Previously, every flush was also a commit. You can always force a + commit by calling IndexWriter.commit(). Furthermore, in 3.0, + autoCommit will be hardwired to false (IndexWriter constructors + that take an autoCommit argument have been deprecated) (Mike + McCandless) + + 3. LUCENE-1335: IndexWriter.addIndexes(Directory[]) and + addIndexesNoOptimize no longer allow the same Directory instance + to be passed in more than once. Internally, IndexWriter uses + Directory and segment name to uniquely identify segments, so + adding the same Directory more than once was causing duplicates + which led to problems (Mike McCandless) + + 4. LUCENE-1396: Improve PhraseQuery.toString() so that gaps in the + positions are indicated with a ? and multiple terms at the same + position are joined with a |. (Andrzej Bialecki via Mike + McCandless) + +API Changes + + 1. LUCENE-1084: Changed all IndexWriter constructors to take an + explicit parameter for maximum field size. Deprecated all the + pre-existing constructors; these will be removed in release 3.0. + NOTE: these new constructors set autoCommit to false. (Steven + Rowe via Mike McCandless) + + 2. LUCENE-584: Changed Filter API to return a DocIdSet instead of a + java.util.BitSet. This allows using more efficient data structures + for Filters and makes them more flexible. This deprecates + Filter.bits(), so all filters that implement this outside + the Lucene code base will need to be adapted. See also the javadocs + of the Filter class. (Paul Elschot, Michael Busch) + + 3. LUCENE-1044: Added IndexWriter.commit() which flushes any buffered + adds/deletes and then commits a new segments file so readers will + see the changes. Deprecate IndexWriter.flush() in favor of + IndexWriter.commit(). (Mike McCandless) + + 4. LUCENE-325: Added IndexWriter.expungeDeletes methods, which + consult the MergePolicy to find merges necessary to merge away all + deletes from the index. This should be a somewhat lower cost + operation than optimize. (John Wang via Mike McCandless) + + 5. LUCENE-1233: Return empty array instead of null when no fields + match the specified name in these methods in Document: + getFieldables, getFields, getValues, getBinaryValues. (Stefan + Trcek vai Mike McCandless) + + 6. LUCENE-1234: Make BoostingSpanScorer protected. (Andi Vajda via Grant Ingersoll) + + 7. LUCENE-510: The index now stores strings as true UTF-8 bytes + (previously it was Java's modified UTF-8). If any text, either + stored fields or a token, has illegal UTF-16 surrogate characters, + these characters are now silently replaced with the Unicode + replacement character U+FFFD. This is a change to the index file + format. (Marvin Humphrey via Mike McCandless) + + 8. LUCENE-852: Let the SpellChecker caller specify IndexWriter mergeFactor + and RAM buffer size. (Otis Gospodnetic) + + 9. LUCENE-1290: Deprecate org.apache.lucene.search.Hits, Hit and HitIterator + and remove all references to these classes from the core. Also update demos + and tutorials. (Michael Busch) + +10. LUCENE-1288: Add getVersion() and getGeneration() to IndexCommit. + getVersion() returns the same value that IndexReader.getVersion() + returns when the reader is opened on the same commit. (Jason + Rutherglen via Mike McCandless) + +11. LUCENE-1311: Added IndexReader.listCommits(Directory) static + method to list all commits in a Directory, plus IndexReader.open + methods that accept an IndexCommit and open the index as of that + commit. These methods are only useful if you implement a custom + DeletionPolicy that keeps more than the last commit around. + (Jason Rutherglen via Mike McCandless) + +12. LUCENE-1325: Added IndexCommit.isOptimized(). (Shalin Shekhar + Mangar via Mike McCandless) + +13. LUCENE-1324: Added TokenFilter.reset(). (Shai Erera via Mike + McCandless) + +14. LUCENE-1340: Added Fieldable.omitTf() method to skip indexing term + frequency, positions and payloads. This saves index space, and + indexing/searching time. (Eks Dev via Mike McCandless) + +15. LUCENE-1219: Add basic reuse API to Fieldable for binary fields: + getBinaryValue/Offset/Length(); currently only lazy fields reuse + the provided byte[] result to getBinaryValue. (Eks Dev via Mike + McCandless) + +16. LUCENE-1334: Add new constructor for Term: Term(String fieldName) + which defaults term text to "". (DM Smith via Mike McCandless) + +17. LUCENE-1333: Added Token.reinit(*) APIs to re-initialize (reuse) a + Token. Also added term() method to return a String, with a + performance penalty clearly documented. Also implemented + hashCode() and equals() in Token, and fixed all core and contrib + analyzers to use the re-use APIs. (DM Smith via Mike McCandless) + +18. LUCENE-1329: Add optional readOnly boolean when opening an + IndexReader. A readOnly reader is not allowed to make changes + (deletions, norms) to the index; in exchanged, the isDeleted + method, often a bottleneck when searching with many threads, is + not synchronized. The default for readOnly is still false, but in + 3.0 the default will become true. (Jason Rutherglen via Mike + McCandless) + +19. LUCENE-1367: Add IndexCommit.isDeleted(). (Shalin Shekhar Mangar + via Mike McCandless) + +20. LUCENE-1061: Factored out all "new XXXQuery(...)" in + QueryParser.java into protected methods newXXXQuery(...) so that + subclasses can create their own subclasses of each Query type. + (John Wang via Mike McCandless) + +21. LUCENE-753: Added new Directory implementation + org.apache.lucene.store.NIOFSDirectory, which uses java.nio's + FileChannel to do file reads. On most non-Windows platforms, with + many threads sharing a single searcher, this may yield sizable + improvement to query throughput when compared to FSDirectory, + which only allows a single thread to read from an open file at a + time. (Jason Rutherglen via Mike McCandless) + +22. LUCENE-1371: Added convenience method TopDocs Searcher.search(Query query, int n). + (Mike McCandless) + +23. LUCENE-1356: Allow easy extensions of TopDocCollector by turning + constructor and fields from package to protected. (Shai Erera + via Doron Cohen) + +24. LUCENE-1375: Added convenience method IndexCommit.getTimestamp, + which is equivalent to + getDirectory().fileModified(getSegmentsFileName()). (Mike McCandless) + +23. LUCENE-1366: Rename Field.Index options to be more accurate: + TOKENIZED becomes ANALYZED; UN_TOKENIZED becomes NOT_ANALYZED; + NO_NORMS becomes NOT_ANALYZED_NO_NORMS and a new ANALYZED_NO_NORMS + is added. (Mike McCandless) + +24. LUCENE-1131: Added numDeletedDocs method to IndexReader (Otis Gospodnetic) + +Bug fixes + + 1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimize a single + clause query if minNumShouldMatch<=0. (Shai Erera via Michael Busch) + + 2. LUCENE-1169: Fixed bug in IndexSearcher.search(): searching with + a filter might miss some hits because scorer.skipTo() is called + without checking if the scorer is already at the right position. + scorer.skipTo(scorer.doc()) is not a NOOP, it behaves as + scorer.next(). (Eks Dev, Michael Busch) + + 3. LUCENE-1182: Added scorePayload to SimilarityDelegator (Andi Vajda via Grant Ingersoll) + + 4. LUCENE-1213: MultiFieldQueryParser was ignoring slop in case + of a single field phrase. (Trejkaz via Doron Cohen) + + 5. LUCENE-1228: IndexWriter.commit() was not updating the index version and as + result IndexReader.reopen() failed to sense index changes. (Doron Cohen) + + 6. LUCENE-1267: Added numDocs() and maxDoc() to IndexWriter; + deprecated docCount(). (Mike McCandless) + + 7. LUCENE-1274: Added new prepareCommit() method to IndexWriter, + which does phase 1 of a 2-phase commit (commit() does phase 2). + This is needed when you want to update an index as part of a + transaction involving external resources (eg a database). Also + deprecated abort(), renaming it to rollback(). (Mike McCandless) + + 8. LUCENE-1003: Stop RussianAnalyzer from removing numbers. + (TUSUR OpenTeam, Dmitry Lihachev via Otis Gospodnetic) + + 9. LUCENE-1152: SpellChecker fix around clearIndex and indexDictionary + methods, plus removal of IndexReader reference. + (Naveen Belkale via Otis Gospodnetic) + +10. LUCENE-1046: Removed dead code in SpellChecker + (Daniel Naber via Otis Gospodnetic) + +11. LUCENE-1189: Fixed the QueryParser to handle escaped characters within + quoted terms correctly. (Tomer Gabel via Michael Busch) + +12. LUCENE-1299: Fixed NPE in SpellChecker when IndexReader is not null and field is (Grant Ingersoll) + +13. LUCENE-1303: Fixed BoostingTermQuery's explanation to be marked as a Match + depending only upon the non-payload score part, regardless of the effect of + the payload on the score. Prior to this, score of a query containing a BTQ + differed from its explanation. (Doron Cohen) + +14. LUCENE-1310: Fixed SloppyPhraseScorer to work also for terms repeating more + than twice in the query. (Doron Cohen) + +15. LUCENE-1351: ISOLatin1AccentFilter now cleans additional ligatures (Cedrik Lime via Grant Ingersoll) + +16. LUCENE-1383: Workaround a nasty "leak" in Java's builtin + ThreadLocal, to prevent Lucene from causing unexpected + OutOfMemoryError in certain situations (notably J2EE + applications). (Chris Lu via Mike McCandless) + +New features + + 1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis + process. The flag is not indexed/stored and is thus only used by analysis. + + 2. LUCENE-1147: Add -segment option to CheckIndex tool so you can + check only a specific segment or segments in your index. (Mike + McCandless) + + 3. LUCENE-1045: Reopened this issue to add support for short and bytes. + + 4. LUCENE-584: Added new data structures to o.a.l.util, such as + OpenBitSet and SortedVIntList. These extend DocIdSet and can + directly be used for Filters with the new Filter API. Also changed + the core Filters to use OpenBitSet instead of java.util.BitSet. + (Paul Elschot, Michael Busch) + + 5. LUCENE-494: Added QueryAutoStopWordAnalyzer to allow for the automatic removal, from a query of frequently occurring terms. + This Analyzer is not intended for use during indexing. (Mark Harwood via Grant Ingersoll) + + 6. LUCENE-1044: Change Lucene to properly "sync" files after + committing, to ensure on a machine or OS crash or power cut, even + with cached writes, the index remains consistent. Also added + explicit commit() method to IndexWriter to force a commit without + having to close. (Mike McCandless) + + 7. LUCENE-997: Add search timeout (partial) support. + A TimeLimitedCollector was added to allow limiting search time. + It is a partial solution since timeout is checked only when + collecting a hit, and therefore a search for rare words in a + huge index might not stop within the specified time. + (Sean Timm via Doron Cohen) + + 8. LUCENE-1184: Allow SnapshotDeletionPolicy to be re-used across + close/re-open of IndexWriter while still protecting an open + snapshot (Tim Brennan via Mike McCandless) + + 9. LUCENE-1194: Added IndexWriter.deleteDocuments(Query) to delete + documents matching the specified query. Also added static unlock + and isLocked methods (deprecating the ones in IndexReader). (Mike + McCandless) + +10. LUCENE-1201: Add IndexReader.getIndexCommit() method. (Tim Brennan + via Mike McCandless) + +11. LUCENE-550: Added InstantiatedIndex implementation. Experimental + Index store similar to MemoryIndex but allows for multiple documents + in memory. (Karl Wettin via Grant Ingersoll) + +12. LUCENE-400: Added word based n-gram filter (in contrib/analyzers) called ShingleFilter and an Analyzer wrapper + that wraps another Analyzer's token stream with a ShingleFilter (Sebastian Kirsch, Steve Rowe via Grant Ingersoll) + +13. LUCENE-1166: Decomposition tokenfilter for languages like German and Swedish (Thomas Peuss via Grant Ingersoll) + +14. LUCENE-1187: ChainedFilter and BooleanFilter now work with new Filter API + and DocIdSetIterator-based filters. Backwards-compatibility with old + BitSet-based filters is ensured. (Paul Elschot via Michael Busch) + +15. LUCENE-1295: Added new method to MoreLikeThis for retrieving interesting terms and made retrieveTerms(int) public. (Grant Ingersoll) + +16. LUCENE-1298: MoreLikeThis can now accept a custom Similarity (Grant Ingersoll) + +17. LUCENE-1297: Allow other string distance measures for the SpellChecker + (Thomas Morton via Otis Gospodnetic) + +18. LUCENE-1001: Provide access to Payloads via Spans. All existing Span Query implementations in Lucene implement. (Mark Miller, Grant Ingersoll) + +19. LUCENE-1354: Provide programmatic access to CheckIndex (Grant Ingersoll, Mike McCandless) + +20. LUCENE-1279: Add support for Collators to RangeFilter/Query and Query Parser. (Steve Rowe via Grant Ingersoll) + +Optimizations + + 1. LUCENE-705: When building a compound file, use + RandomAccessFile.setLength() to tell the OS/filesystem to + pre-allocate space for the file. This may improve fragmentation + in how the CFS file is stored, and allows us to detect an upcoming + disk full situation before actually filling up the disk. (Mike + McCandless) + + 2. LUCENE-1120: Speed up merging of term vectors by bulk-copying the + raw bytes for each contiguous range of non-deleted documents. + (Mike McCandless) + + 3. LUCENE-1185: Avoid checking if the TermBuffer 'scratch' in + SegmentTermEnum is null for every call of scanTo(). + (Christian Kohlschuetter via Michael Busch) + + 4. LUCENE-1217: Internal to Field.java, use isBinary instead of + runtime type checking for possible speedup of binaryValue(). + (Eks Dev via Mike McCandless) + + 5. LUCENE-1183: Optimized TRStringDistance class (in contrib/spell) that uses + less memory than the previous version. (Cédrik LIME via Otis Gospodnetic) + + 6. LUCENE-1195: Improve term lookup performance by adding a LRU cache to the + TermInfosReader. In performance experiments the speedup was about 25% on + average on mid-size indexes with ~500,000 documents for queries with 3 + terms and about 7% on larger indexes with ~4.3M documents. (Michael Busch) + +Documentation + + 1. LUCENE-1236: Added some clarifying remarks to EdgeNGram*.java (Hiroaki Kawai via Grant Ingersoll) + + 2. LUCENE-1157 and LUCENE-1256: HTML changes log, created automatically + from CHANGES.txt. This HTML file is currently visible only via developers page. + (Steven Rowe via Doron Cohen) + + 3. LUCENE-1349: Fieldable can now be changed without breaking backward compatibility rules (within reason. See the note at + the top of this file and also on Fieldable.java). (Grant Ingersoll) + + 4. LUCENE-1873: Update documentation to reflect current Contrib area status. + (Steven Rowe, Mark Miller) + +Build + + 1. LUCENE-1153: Added JUnit JAR to new lib directory. Updated build to rely on local JUnit instead of ANT/lib. + + 2. LUCENE-1202: Small fixes to the way Clover is used to work better + with contribs. Of particular note: a single clover db is used + regardless of whether tests are run globally or in the specific + contrib directories. + + 3. LUCENE-1353: Javacc target in contrib/miscellaneous for + generating the precedence query parser. + +Test Cases + + 1. LUCENE-1238: Fixed intermittent failures of TestTimeLimitedCollector.testTimeoutMultiThreaded. + Within this fix, "greedy" flag was added to TimeLimitedCollector, to allow the wrapped + collector to collect also the last doc, after allowed-tTime passed. (Doron Cohen) + + 2. LUCENE-1348: relax TestTimeLimitedCollector to not fail due to + timeout exceeded (just because test machine is very busy). + +======================= Release 2.3.2 ======================= + +Bug fixes + + 1. LUCENE-1191: On hitting OutOfMemoryError in any index-modifying + methods in IndexWriter, do not commit any further changes to the + index to prevent risk of possible corruption. (Mike McCandless) + + 2. LUCENE-1197: Fixed issue whereby IndexWriter would flush by RAM + too early when TermVectors were in use. (Mike McCandless) + + 3. LUCENE-1198: Don't corrupt index if an exception happens inside + DocumentsWriter.init (Mike McCandless) + + 4. LUCENE-1199: Added defensive check for null indexReader before + calling close in IndexModifier.close() (Mike McCandless) + + 5. LUCENE-1200: Fix rare deadlock case in addIndexes* when + ConcurrentMergeScheduler is in use (Mike McCandless) + + 6. LUCENE-1208: Fix deadlock case on hitting an exception while + processing a document that had triggered a flush (Mike McCandless) + + 7. LUCENE-1210: Fix deadlock case on hitting an exception while + starting a merge when using ConcurrentMergeScheduler (Mike McCandless) + + 8. LUCENE-1222: Fix IndexWriter.doAfterFlush to always be called on + flush (Mark Ferguson via Mike McCandless) + + 9. LUCENE-1226: Fixed IndexWriter.addIndexes(IndexReader[]) to commit + successfully created compound files. (Michael Busch) + +10. LUCENE-1150: Re-expose StandardTokenizer's constants publicly; + this was accidentally lost with LUCENE-966. (Nicolas Lalevée via + Mike McCandless) + +11. LUCENE-1262: Fixed bug in BufferedIndexReader.refill whereby on + hitting an exception in readInternal, the buffer is incorrectly + filled with stale bytes such that subsequent calls to readByte() + return incorrect results. (Trejkaz via Mike McCandless) + +12. LUCENE-1270: Fixed intermittent case where IndexWriter.close() + would hang after IndexWriter.addIndexesNoOptimize had been + called. (Stu Hood via Mike McCandless) + +Build + + 1. LUCENE-1230: Include *pom.xml* in source release files. (Michael Busch) + + +======================= Release 2.3.1 ======================= + +Bug fixes + + 1. LUCENE-1168: Fixed corruption cases when autoCommit=false and + documents have mixed term vectors (Suresh Guvvala via Mike + McCandless). + + 2. LUCENE-1171: Fixed some cases where OOM errors could cause + deadlock in IndexWriter (Mike McCandless). + + 3. LUCENE-1173: Fixed corruption case when autoCommit=false and bulk + merging of stored fields is used (Yonik via Mike McCandless). + + 4. LUCENE-1163: Fixed bug in CharArraySet.contains(char[] buffer, int + offset, int len) that was ignoring offset and thus giving the + wrong answer. (Thomas Peuss via Mike McCandless) + + 5. LUCENE-1177: Fix rare case where IndexWriter.optimize might do too + many merges at the end. (Mike McCandless) + + 6. LUCENE-1176: Fix corruption case when documents with no term + vector fields are added before documents with term vector fields. + (Mike McCandless) + + 7. LUCENE-1179: Fixed assert statement that was incorrectly + preventing Fields with empty-string field name from working. + (Sergey Kabashnyuk via Mike McCandless) + +======================= Release 2.3.0 ======================= + +Changes in runtime behavior + + 1. LUCENE-994: Defaults for IndexWriter have been changed to maximize + out-of-the-box indexing speed. First, IndexWriter now flushes by + RAM usage (16 MB by default) instead of a fixed doc count (call + IndexWriter.setMaxBufferedDocs to get backwards compatible + behavior). Second, ConcurrentMergeScheduler is used to run merges + using background threads (call IndexWriter.setMergeScheduler(new + SerialMergeScheduler()) to get backwards compatible behavior). + Third, merges are chosen based on size in bytes of each segment + rather than document count of each segment (call + IndexWriter.setMergePolicy(new LogDocMergePolicy()) to get + backwards compatible behavior). + + NOTE: users of ParallelReader must change back all of these + defaults in order to ensure the docIDs "align" across all parallel + indices. + + (Mike McCandless) + + 2. LUCENE-1045: SortField.AUTO didn't work with long. When detecting + the field type for sorting automatically, numbers used to be + interpreted as int, then as float, if parsing the number as an int + failed. Now the detection checks for int, then for long, + then for float. (Daniel Naber) + +API Changes + + 1. LUCENE-843: Added IndexWriter.setRAMBufferSizeMB(...) to have + IndexWriter flush whenever the buffered documents are using more + than the specified amount of RAM. Also added new APIs to Token + that allow one to set a char[] plus offset and length to specify a + token (to avoid creating a new String() for each Token). (Mike + McCandless) + + 2. LUCENE-963: Add setters to Field to allow for re-using a single + Field instance during indexing. This is a sizable performance + gain, especially for small documents. (Mike McCandless) + + 3. LUCENE-969: Add new APIs to Token, TokenStream and Analyzer to + permit re-using of Token and TokenStream instances during + indexing. Changed Token to use a char[] as the store for the + termText instead of String. This gives faster tokenization + performance (~10-15%). (Mike McCandless) + + 4. LUCENE-847: Factored MergePolicy, which determines which merges + should take place and when, as well as MergeScheduler, which + determines when the selected merges should actually run, out of + IndexWriter. The default merge policy is now + LogByteSizeMergePolicy (see LUCENE-845) and the default merge + scheduler is now ConcurrentMergeScheduler (see + LUCENE-870). (Steven Parkes via Mike McCandless) + + 5. LUCENE-1052: Add IndexReader.setTermInfosIndexDivisor(int) method + that allows you to reduce memory usage of the termInfos by further + sub-sampling (over the termIndexInterval that was used during + indexing) which terms are loaded into memory. (Chuck Williams, + Doug Cutting via Mike McCandless) + + 6. LUCENE-743: Add IndexReader.reopen() method that re-opens an + existing IndexReader (see New features -> 8.) (Michael Busch) + + 7. LUCENE-1062: Add setData(byte[] data), + setData(byte[] data, int offset, int length), getData(), getOffset() + and clone() methods to o.a.l.index.Payload. Also add the field name + as arg to Similarity.scorePayload(). (Michael Busch) + + 8. LUCENE-982: Add IndexWriter.optimize(int maxNumSegments) method to + "partially optimize" an index down to maxNumSegments segments. + (Mike McCandless) + + 9. LUCENE-1080: Changed Token.DEFAULT_TYPE to be public. + +10. LUCENE-1064: Changed TopDocs constructor to be public. + (Shai Erera via Michael Busch) + +11. LUCENE-1079: DocValues cleanup: constructor now has no params, + and getInnerArray() now throws UnsupportedOperationException (Doron Cohen) + +12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns + the Object (if any) that was bumped from the queue to allow + re-use. (Shai Erera via Mike McCandless) + +13. LUCENE-1101: Token reuse 'contract' (defined LUCENE-969) + modified so it is token producer's responsibility + to call Token.clear(). (Doron Cohen) + +14. LUCENE-1118: Changed StandardAnalyzer to skip too-long (default > + 255 characters) tokens. You can increase this limit by calling + StandardAnalyzer.setMaxTokenLength(...). (Michael McCandless) + + +Bug fixes + + 1. LUCENE-933: QueryParser fixed to not produce empty sub + BooleanQueries "()" even if the Analyzer produced no + tokens for input. (Doron Cohen) + + 2. LUCENE-955: Fixed SegmentTermPositions to work correctly with the + first term in the dictionary. (Michael Busch) + + 3. LUCENE-951: Fixed NullPointerException in MultiLevelSkipListReader + that was thrown after a call of TermPositions.seek(). + (Rich Johnson via Michael Busch) + + 4. LUCENE-938: Fixed cases where an unhandled exception in + IndexWriter's methods could cause deletes to be lost. + (Steven Parkes via Mike McCandless) + + 5. LUCENE-962: Fixed case where an unhandled exception in + IndexWriter.addDocument or IndexWriter.updateDocument could cause + unreferenced files in the index to not be deleted + (Steven Parkes via Mike McCandless) + + 6. LUCENE-957: RAMDirectory fixed to properly handle directories + larger than Integer.MAX_VALUE. (Doron Cohen) + + 7. LUCENE-781: MultiReader fixed to not throw NPE if isCurrent(), + isOptimized() or getVersion() is called. Separated MultiReader + into two classes: MultiSegmentReader extends IndexReader, is + package-protected and is created automatically by IndexReader.open() + in case the index has multiple segments. The public MultiReader + now extends MultiSegmentReader and is intended to be used by users + who want to add their own subreaders. (Daniel Naber, Michael Busch) + + 8. LUCENE-970: FilterIndexReader now implements isOptimized(). Before + a call of isOptimized() would throw a NPE. (Michael Busch) + + 9. LUCENE-832: ParallelReader fixed to not throw NPE if isCurrent(), + isOptimized() or getVersion() is called. (Michael Busch) + +10. LUCENE-948: Fix FNFE exception caused by stale NFS client + directory listing caches when writers on different machines are + sharing an index over NFS and using a custom deletion policy (Mike + McCandless) + +11. LUCENE-978: Ensure TermInfosReader, FieldsReader, and FieldsReader + close any streams they had opened if an exception is hit in the + constructor. (Ning Li via Mike McCandless) + +12. LUCENE-985: If an extremely long term is in a doc (> 16383 chars), + we now throw an IllegalArgumentException saying the term is too + long, instead of cryptic ArrayIndexOutOfBoundsException. (Karl + Wettin via Mike McCandless) + +13. LUCENE-991: The explain() method of BoostingTermQuery had errors + when no payloads were present on a document. (Peter Keegan via + Grant Ingersoll) + +14. LUCENE-992: Fixed IndexWriter.updateDocument to be atomic again + (this was broken by LUCENE-843). (Ning Li via Mike McCandless) + +15. LUCENE-1008: Fixed corruption case when document with no term + vector fields is added after documents with term vector fields. + This bug was introduced with LUCENE-843. (Grant Ingersoll via + Mike McCandless) + +16. LUCENE-1006: Fixed QueryParser to accept a "" field value (zero + length quoted string.) (yonik) + +17. LUCENE-1010: Fixed corruption case when document with no term + vector fields is added after documents with term vector fields. + This case is hit during merge and would cause an EOFException. + This bug was introduced with LUCENE-984. (Andi Vajda via Mike + McCandless) + +19. LUCENE-1009: Fix merge slowdown with LogByteSizeMergePolicy when + autoCommit=false and documents are using stored fields and/or term + vectors. (Mark Miller via Mike McCandless) + +20. LUCENE-1011: Fixed corruption case when two or more machines, + sharing an index over NFS, can be writers in quick succession. + (Patrick Kimber via Mike McCandless) + +21. LUCENE-1028: Fixed Weight serialization for few queries: + DisjunctionMaxQuery, ValueSourceQuery, CustomScoreQuery. + Serialization check added for all queries. + (Kyle Maxwell via Doron Cohen) + +22. LUCENE-1048: Fixed incorrect behavior in Lock.obtain(...) when the + timeout argument is very large (eg Long.MAX_VALUE). Also added + Lock.LOCK_OBTAIN_WAIT_FOREVER constant to never timeout. (Nikolay + Diakov via Mike McCandless) + +23. LUCENE-1050: Throw LockReleaseFailedException in + Simple/NativeFSLockFactory if we fail to delete the lock file when + releasing the lock. (Nikolay Diakov via Mike McCandless) + +24. LUCENE-1071: Fixed SegmentMerger to correctly set payload bit in + the merged segment. (Michael Busch) + +25. LUCENE-1042: Remove throwing of IOException in getTermFreqVector(int, String, TermVectorMapper) to be consistent + with other getTermFreqVector calls. Also removed the throwing of the other IOException in that method to be consistent. (Karl Wettin via Grant Ingersoll) + +26. LUCENE-1096: Fixed Hits behavior when hits' docs are deleted + along with iterating the hits. Deleting docs already retrieved + now works seamlessly. If docs not yet retrieved are deleted + (e.g. from another thread), and then, relying on the initial + Hits.length(), an application attempts to retrieve more hits + than actually exist , a ConcurrentMidificationException + is thrown. (Doron Cohen) + +27. LUCENE-1068: Changed StandardTokenizer to fix an issue with it marking + the type of some tokens incorrectly. This is done by adding a new flag named + replaceInvalidAcronym which defaults to false, the current, incorrect behavior. Setting + this flag to true fixes the problem. This flag is a temporary fix and is already + marked as being deprecated. 3.x will implement the correct approach. (Shai Erera via Grant Ingersoll) + LUCENE-1140: Fixed NPE caused by 1068 (Alexei Dets via Grant Ingersoll) + +28. LUCENE-749: ChainedFilter behavior fixed when logic of + first filter is ANDNOT. (Antonio Bruno via Doron Cohen) + +29. LUCENE-508: Make sure SegmentTermEnum.prev() is accurate (= last + term) after next() returns false. (Steven Tamm via Mike + McCandless) + + +New features + + 1. LUCENE-906: Elision filter for French. + (Mathieu Lecarme via Otis Gospodnetic) + + 2. LUCENE-960: Added a SpanQueryFilter and related classes to allow for + not only filtering, but knowing where in a Document a Filter matches + (Grant Ingersoll) + + 3. LUCENE-868: Added new Term Vector access features. New callback + mechanism allows application to define how and where to read Term + Vectors from disk. This implementation contains several extensions + of the new abstract TermVectorMapper class. The new API should be + back-compatible. No changes in the actual storage of Term Vectors + has taken place. + 3.1 LUCENE-1038: Added setDocumentNumber() method to TermVectorMapper + to provide information about what document is being accessed. + (Karl Wettin via Grant Ingersoll) + + 4. LUCENE-975: Added PositionBasedTermVectorMapper that allows for + position based lookup of term vector information. + See item #3 above (LUCENE-868). + + 5. LUCENE-1011: Added simple tools (all in org.apache.lucene.store) + to verify that locking is working properly. LockVerifyServer runs + a separate server to verify locks. LockStressTest runs a simple + tool that rapidly obtains and releases locks. + VerifyingLockFactory is a LockFactory that wraps any other + LockFactory and consults the LockVerifyServer whenever a lock is + obtained or released, throwing an exception if an illegal lock + obtain occurred. (Patrick Kimber via Mike McCandless) + + 6. LUCENE-1015: Added FieldCache extension (ExtendedFieldCache) to + support doubles and longs. Added support into SortField for sorting + on doubles and longs as well. (Grant Ingersoll) + + 7. LUCENE-1020: Created basic index checking & repair tool + (o.a.l.index.CheckIndex). When run without -fix it does a + detailed test of all segments in the index and reports summary + information and any errors it hit. With -fix it will remove + segments that had errors. (Mike McCandless) + + 8. LUCENE-743: Add IndexReader.reopen() method that re-opens an + existing IndexReader by only loading those portions of an index + that have changed since the reader was (re)opened. reopen() can + be significantly faster than open(), depending on the amount of + index changes. SegmentReader, MultiSegmentReader, MultiReader, + and ParallelReader implement reopen(). (Michael Busch) + + 9. LUCENE-1040: CharArraySet useful for efficiently checking + set membership of text specified by char[]. (yonik) + +10. LUCENE-1073: Created SnapshotDeletionPolicy to facilitate taking a + live backup of an index without pausing indexing. (Mike + McCandless) + +11. LUCENE-1019: CustomScoreQuery enhanced to support multiple + ValueSource queries. (Kyle Maxwell via Doron Cohen) + +12. LUCENE-1095: Added an option to StopFilter to increase + positionIncrement of the token succeeding a stopped token. + Disabled by default. Similar option added to QueryParser + to consider token positions when creating PhraseQuery + and MultiPhraseQuery. Disabled by default (so by default + the query parser ignores position increments). + (Doron Cohen) + +13. LUCENE-1380: Added TokenFilter for setting position increment in special cases related to the ShingleFilter (Mck SembWever, Steve Rowe, Karl Wettin via Grant Ingersoll) + + + +Optimizations + + 1. LUCENE-937: CachingTokenFilter now uses an iterator to access the + Tokens that are cached in the LinkedList. This increases performance + significantly, especially when the number of Tokens is large. + (Mark Miller via Michael Busch) + + 2. LUCENE-843: Substantial optimizations to improve how IndexWriter + uses RAM for buffering documents and to speed up indexing (2X-8X + faster). A single shared hash table now records the in-memory + postings per unique term and is directly flushed into a single + segment. (Mike McCandless) + + 3. LUCENE-892: Fixed extra "buffer to buffer copy" that sometimes + takes place when using compound files. (Mike McCandless) + + 4. LUCENE-959: Remove synchronization in Document (yonik) + + 5. LUCENE-963: Add setters to Field to allow for re-using a single + Field instance during indexing. This is a sizable performance + gain, especially for small documents. (Mike McCandless) + + 6. LUCENE-939: Check explicitly for boundary conditions in FieldInfos + and don't rely on exceptions. (Michael Busch) + + 7. LUCENE-966: Very substantial speedups (~6X faster) for + StandardTokenizer (StandardAnalyzer) by using JFlex instead of + JavaCC to generate the tokenizer. + (Stanislaw Osinski via Mike McCandless) + + 8. LUCENE-969: Changed core tokenizers & filters to re-use Token and + TokenStream instances when possible to improve tokenization + performance (~10-15%). (Mike McCandless) + + 9. LUCENE-871: Speedup ISOLatin1AccentFilter (Ian Boston via Mike + McCandless) + +10. LUCENE-986: Refactored SegmentInfos from IndexReader into the new + subclass DirectoryIndexReader. SegmentReader and MultiSegmentReader + now extend DirectoryIndexReader and are the only IndexReader + implementations that use SegmentInfos to access an index and + acquire a write lock for index modifications. (Michael Busch) + +11. LUCENE-1007: Allow flushing in IndexWriter to be triggered by + either RAM usage or document count or both (whichever comes + first), by adding symbolic constant DISABLE_AUTO_FLUSH to disable + one of the flush triggers. (Ning Li via Mike McCandless) + +12. LUCENE-1043: Speed up merging of stored fields by bulk-copying the + raw bytes for each contiguous range of non-deleted documents. + (Robert Engels via Mike McCandless) + +13. LUCENE-693: Speed up nested conjunctions (~2x) that match many + documents, and a slight performance increase for top level + conjunctions. (yonik) + +14. LUCENE-1098: Make inner class StandardAnalyzer.SavedStreams static + and final. (Nathan Beyer via Michael Busch) + +Documentation + + 1. LUCENE-1051: Generate separate javadocs for core, demo and contrib + classes, as well as an unified view. Also add an appropriate menu + structure to the website. (Michael Busch) + + 2. LUCENE-746: Fix error message in AnalyzingQueryParser.getPrefixQuery. + (Ronnie Kolehmainen via Michael Busch) + +Build + + 1. LUCENE-908: Improvements and simplifications for how the MANIFEST + file and the META-INF dir are created. (Michael Busch) + + 2. LUCENE-935: Various improvements for the maven artifacts. Now the + artifacts also include the sources as .jar files. (Michael Busch) + + 3. Added apply-patch target to top-level build. Defaults to looking for + a patch in ${basedir}/../patches with name specified by -Dpatch.name. + Can also specify any location by -Dpatch.file property on the command + line. This should be helpful for easy application of patches, but it + is also a step towards integrating automatic patch application with + JIRA and Hudson, and is thus subject to change. (Grant Ingersoll) + + 4. LUCENE-935: Defined property "m2.repository.url" to allow setting + the url to a maven remote repository to deploy to. (Michael Busch) + + 5. LUCENE-1051: Include javadocs in the maven artifacts. (Michael Busch) + + 6. LUCENE-1055: Remove gdata-server from build files and its sources + from trunk. (Michael Busch) + + 7. LUCENE-935: Allow to deploy maven artifacts to a remote m2 repository + via scp and ssh authentication. (Michael Busch) + + 8. LUCENE-1123: Allow overriding the specification version for + MANIFEST.MF (Michael Busch) + +Test Cases + + 1. LUCENE-766: Test adding two fields with the same name but different + term vector setting. (Nicolas Lalevée via Doron Cohen) + +======================= Release 2.2.0 ======================= + +Changes in runtime behavior + +API Changes + + 1. LUCENE-793: created new exceptions and added them to throws clause + for many methods (all subclasses of IOException for backwards + compatibility): index.StaleReaderException, + index.CorruptIndexException, store.LockObtainFailedException. + This was done to better call out the possible root causes of an + IOException from these methods. (Mike McCandless) + + 2. LUCENE-811: make SegmentInfos class, plus a few methods from related + classes, package-private again (they were unnecessarily made public + as part of LUCENE-701). (Mike McCandless) + + 3. LUCENE-710: added optional autoCommit boolean to IndexWriter + constructors. When this is false, index changes are not committed + until the writer is closed. This gives explicit control over when + a reader will see the changes. Also added optional custom + deletion policy to explicitly control when prior commits are + removed from the index. This is intended to allow applications to + share an index over NFS by customizing when prior commits are + deleted. (Mike McCandless) + + 4. LUCENE-818: changed most public methods of IndexWriter, + IndexReader (and its subclasses), FieldsReader and RAMDirectory to + throw AlreadyClosedException if they are accessed after being + closed. (Mike McCandless) + + 5. LUCENE-834: Changed some access levels for certain Span classes to allow them + to be overridden. They have been marked expert only and not for public + consumption. (Grant Ingersoll) + + 6. LUCENE-796: Removed calls to super.* from various get*Query methods in + MultiFieldQueryParser, in order to allow sub-classes to override them. + (Steven Parkes via Otis Gospodnetic) + + 7. LUCENE-857: Removed caching from QueryFilter and deprecated QueryFilter + in favour of QueryWrapperFilter or QueryWrapperFilter + CachingWrapperFilter + combination when caching is desired. + (Chris Hostetter, Otis Gospodnetic) + + 8. LUCENE-869: Changed FSIndexInput and FSIndexOutput to inner classes of FSDirectory + to enable extensibility of these classes. (Michael Busch) + + 9. LUCENE-580: Added the public method reset() to TokenStream. This method does + nothing by default, but may be overwritten by subclasses to support consuming + the TokenStream more than once. (Michael Busch) + +10. LUCENE-580: Added a new constructor to Field that takes a TokenStream as + argument, available as tokenStreamValue(). This is useful to avoid the need of + "dummy analyzers" for pre-analyzed fields. (Karl Wettin, Michael Busch) + +11. LUCENE-730: Added the new methods to BooleanQuery setAllowDocsOutOfOrder() and + getAllowDocsOutOfOrder(). Deprecated the methods setUseScorer14() and + getUseScorer14(). The optimization patch LUCENE-730 (see Optimizations->3.) + improves performance for certain queries but results in scoring out of docid + order. This patch reverse this change, so now by default hit docs are scored + in docid order if not setAllowDocsOutOfOrder(true) is explicitly called. + This patch also enables the tests in QueryUtils again that check for docid + order. (Paul Elschot, Doron Cohen, Michael Busch) + +12. LUCENE-888: Added Directory.openInput(File path, int bufferSize) + to optionally specify the size of the read buffer. Also added + BufferedIndexInput.setBufferSize(int) to change the buffer size. + (Mike McCandless) + +13. LUCENE-923: Make SegmentTermPositionVector package-private. It does not need + to be public because it implements the public interface TermPositionVector. + (Michael Busch) + +Bug fixes + + 1. LUCENE-804: Fixed build.xml to pack a fully compilable src dist. (Doron Cohen) + + 2. LUCENE-813: Leading wildcard fixed to work with trailing wildcard. + Query parser modified to create a prefix query only for the case + that there is a single trailing wildcard (and no additional wildcard + or '?' in the query text). (Doron Cohen) + + 3. LUCENE-812: Add no-argument constructors to NativeFSLockFactory + and SimpleFSLockFactory. This enables all 4 builtin LockFactory + implementations to be specified via the System property + org.apache.lucene.store.FSDirectoryLockFactoryClass. (Mike McCandless) + + 4. LUCENE-821: The new single-norm-file introduced by LUCENE-756 + failed to reduce the number of open descriptors since it was still + opened once per field with norms. (yonik) + + 5. LUCENE-823: Make sure internal file handles are closed when + hitting an exception (eg disk full) while flushing deletes in + IndexWriter's mergeSegments, and also during + IndexWriter.addIndexes. (Mike McCandless) + + 6. LUCENE-825: If directory is removed after + FSDirectory.getDirectory() but before IndexReader.open you now get + a FileNotFoundException like Lucene pre-2.1 (before this fix you + got an NPE). (Mike McCandless) + + 7. LUCENE-800: Removed backslash from the TERM_CHAR list in the queryparser, + because the backslash is the escape character. Also changed the ESCAPED_CHAR + list to contain all possible characters, because every character that + follows a backslash should be considered as escaped. (Michael Busch) + + 8. LUCENE-372: QueryParser.parse() now ensures that the entire input string + is consumed. Now a ParseException is thrown if a query contains too many + closing parentheses. (Andreas Neumann via Michael Busch) + + 9. LUCENE-814: javacc build targets now fix line-end-style of generated files. + Now also deleting all javacc generated files before calling javacc. + (Steven Parkes, Doron Cohen) + +10. LUCENE-829: close readers in contrib/benchmark. (Karl Wettin, Doron Cohen) + +11. LUCENE-828: Minor fix for Term's equal(). + (Paul Cowan via Otis Gospodnetic) + +12. LUCENE-846: Fixed: if IndexWriter is opened with autoCommit=false, + and you call addIndexes, and hit an exception (eg disk full) then + when IndexWriter rolls back its internal state this could corrupt + the instance of IndexWriter (but, not the index itself) by + referencing already deleted segments. This bug was only present + in 2.2 (trunk), ie was never released. (Mike McCandless) + +13. LUCENE-736: Sloppy phrase query with repeating terms matches wrong docs. + For example query "B C B"~2 matches the doc "A B C D E". (Doron Cohen) + +14. LUCENE-789: Fixed: custom similarity is ignored when using MultiSearcher (problem reported + by Alexey Lef). Now the similarity applied by MultiSearcer.setSimilarity(sim) is being used. + Note that as before this fix, creating a multiSearcher from Searchers for whom custom similarity + was set has no effect - it is masked by the similarity of the MultiSearcher. This is as + designed, because MultiSearcher operates on Searchables (not Searchers). (Doron Cohen) + +15. LUCENE-880: Fixed DocumentWriter to close the TokenStreams after it + has written the postings. Then the resources associated with the + TokenStreams can safely be released. (Michael Busch) + +16. LUCENE-883: consecutive calls to Spellchecker.indexDictionary() + won't insert terms twice anymore. (Daniel Naber) + +17. LUCENE-881: QueryParser.escape() now also escapes the characters + '|' and '&' which are part of the queryparser syntax. (Michael Busch) + +18. LUCENE-886: Spellchecker clean up: exceptions aren't printed to STDERR + anymore and ignored, but re-thrown. Some javadoc improvements. + (Daniel Naber) + +19. LUCENE-698: FilteredQuery now takes the query boost into account for + scoring. (Michael Busch) + +20. LUCENE-763: Spellchecker: LuceneDictionary used to skip first word in + enumeration. (Christian Mallwitz via Daniel Naber) + +21. LUCENE-903: FilteredQuery explanation inaccuracy with boost. + Explanation tests now "deep" check the explanation details. + (Chris Hostetter, Doron Cohen) + +22. LUCENE-912: DisjunctionMaxScorer first skipTo(target) call ignores the + skip target param and ends up at the first match. + (Sudaakeran B. via Chris Hostetter & Doron Cohen) + +23. LUCENE-913: Two consecutive score() calls return different + scores for Boolean Queries. (Michael Busch, Doron Cohen) + +24. LUCENE-1013: Fix IndexWriter.setMaxMergeDocs to work "out of the + box", again, by moving set/getMaxMergeDocs up from + LogDocMergePolicy into LogMergePolicy. This fixes the API + breakage (non backwards compatible change) caused by LUCENE-994. + (Yonik Seeley via Mike McCandless) + +New features + + 1. LUCENE-759: Added two n-gram-producing TokenFilters. + (Otis Gospodnetic) + + 2. LUCENE-822: Added FieldSelector capabilities to Searchable for use with + RemoteSearcher, and other Searchable implementations. (Mark Miller, Grant Ingersoll) + + 3. LUCENE-755: Added the ability to store arbitrary binary metadata in the posting list. + These metadata are called Payloads. For every position of a Token one Payload in the form + of a variable length byte array can be stored in the prox file. + Remark: The APIs introduced with this feature are in experimental state and thus + contain appropriate warnings in the javadocs. + (Michael Busch) + + 4. LUCENE-834: Added BoostingTermQuery which can boost scores based on the + values of a payload (see #3 above.) (Grant Ingersoll) + + 5. LUCENE-834: Similarity has a new method for scoring payloads called + scorePayloads that can be overridden to take advantage of payload + storage (see #3 above) + + 6. LUCENE-834: Added isPayloadAvailable() onto TermPositions interface and + implemented it in the appropriate places (Grant Ingersoll) + + 7. LUCENE-853: Added RemoteCachingWrapperFilter to enable caching of Filters + on the remote side of the RMI connection. + (Matt Ericson via Otis Gospodnetic) + + 8. LUCENE-446: Added Solr's search.function for scores based on field + values, plus CustomScoreQuery for simple score (post) customization. + (Yonik Seeley, Doron Cohen) + + 9. LUCENE-1058: Added new TeeTokenFilter (like the UNIX 'tee' command) and SinkTokenizer which can be used to share tokens between two or more + Fields such that the other Fields do not have to go through the whole Analysis process over again. For instance, if you have two + Fields that share all the same analysis steps except one lowercases tokens and the other does not, you can coordinate the operations + between the two using the TeeTokenFilter and the SinkTokenizer. See TeeSinkTokenTest.java for examples. + (Grant Ingersoll, Michael Busch, Yonik Seeley) + +Optimizations + + 1. LUCENE-761: The proxStream is now cloned lazily in SegmentTermPositions + when nextPosition() is called for the first time. This allows using instances + of SegmentTermPositions instead of SegmentTermDocs without additional costs. + (Michael Busch) + + 2. LUCENE-431: RAMInputStream and RAMOutputStream extend IndexInput and + IndexOutput directly now. This avoids further buffering and thus avoids + unnecessary array copies. (Michael Busch) + + 3. LUCENE-730: Updated BooleanScorer2 to make use of BooleanScorer in some + cases and possibly improve scoring performance. Documents can now be + delivered out-of-order as they are scored (e.g. to HitCollector). + N.B. A bit of code had to be disabled in QueryUtils in order for + TestBoolean2 test to keep passing. + (Paul Elschot via Otis Gospodnetic) + + 4. LUCENE-882: Spellchecker doesn't store the ngrams anymore but only indexes + them to keep the spell index small. (Daniel Naber) + + 5. LUCENE-430: Delay allocation of the buffer after a clone of BufferedIndexInput. + Together with LUCENE-888 this will allow to adjust the buffer size + dynamically. (Paul Elschot, Michael Busch) + + 6. LUCENE-888: Increase buffer sizes inside CompoundFileWriter and + BufferedIndexOutput. Also increase buffer size in + BufferedIndexInput, but only when used during merging. Together, + these increases yield 10-18% overall performance gain vs the + previous 1K defaults. (Mike McCandless) + + 7. LUCENE-866: Adds multi-level skip lists to the posting lists. This speeds + up most queries that use skipTo(), especially on big indexes with large posting + lists. For average AND queries the speedup is about 20%, for queries that + contain very frequent and very unique terms the speedup can be over 80%. + (Michael Busch) + +Documentation + + 1. LUCENE 791 && INFRA-1173: Infrastructure moved the Wiki to + http://wiki.apache.org/lucene-java/ Updated the links in the docs and + wherever else I found references. (Grant Ingersoll, Joe Schaefer) + + 2. LUCENE-807: Fixed the javadoc for ScoreDocComparator.compare() to be + consistent with java.util.Comparator.compare(): Any integer is allowed to + be returned instead of only -1/0/1. + (Paul Cowan via Michael Busch) + + 3. LUCENE-875: Solved javadoc warnings & errors under jdk1.4. + Solved javadoc errors under jdk5 (jars in path for gdata). + Made "javadocs" target depend on "build-contrib" for first downloading + contrib jars configured for dynamic downloaded. (Note: when running + behind firewall, a firewall prompt might pop up) (Doron Cohen) + + 4. LUCENE-740: Added SNOWBALL-LICENSE.txt to the snowball package and a + remark about the license to NOTICE.TXT. (Steven Parkes via Michael Busch) + + 5. LUCENE-925: Added analysis package javadocs. (Grant Ingersoll and Doron Cohen) + + 6. LUCENE-926: Added document package javadocs. (Grant Ingersoll) + +Build + + 1. LUCENE-802: Added LICENSE.TXT and NOTICE.TXT to Lucene jars. + (Steven Parkes via Michael Busch) + + 2. LUCENE-885: "ant test" now includes all contrib tests. The new + "ant test-core" target can be used to run only the Core (non + contrib) tests. + (Chris Hostetter) + + 3. LUCENE-900: "ant test" now enables Java assertions (in Lucene packages). + (Doron Cohen) + + 4. LUCENE-894: Add custom build file for binary distributions that includes + targets to build the demos. (Chris Hostetter, Michael Busch) + + 5. LUCENE-904: The "package" targets in build.xml now also generate .md5 + checksum files. (Chris Hostetter, Michael Busch) + + 6. LUCENE-907: Include LICENSE.TXT and NOTICE.TXT in the META-INF dirs of + demo war, demo jar, and the contrib jars. (Michael Busch) + + 7. LUCENE-909: Demo targets for running the demo. (Doron Cohen) + + 8. LUCENE-908: Improves content of MANIFEST file and makes it customizable + for the contribs. Adds SNOWBALL-LICENSE.txt to META-INF of the snowball + jar and makes sure that the lucli jar contains LICENSE.txt and NOTICE.txt. + (Chris Hostetter, Michael Busch) + + 9. LUCENE-930: Various contrib building improvements to ensure contrib + dependencies are met, and test compilation errors fail the build. + (Steven Parkes, Chris Hostetter) + +10. LUCENE-622: Add ant target and pom.xml files for building maven artifacts + of the Lucene core and the contrib modules. + (Sami Siren, Karl Wettin, Michael Busch) + +======================= Release 2.1.0 ======================= + +Changes in runtime behavior + + 1. 's' and 't' have been removed from the list of default stopwords + in StopAnalyzer (also used in by StandardAnalyzer). Having e.g. 's' + as a stopword meant that 's-class' led to the same results as 'class'. + Note that this problem still exists for 'a', e.g. in 'a-class' as + 'a' continues to be a stopword. + (Daniel Naber) + + 2. LUCENE-478: Updated the list of Unicode code point ranges for CJK + (now split into CJ and K) in StandardAnalyzer. (John Wang and + Steven Rowe via Otis Gospodnetic) + + 3. Modified some CJK Unicode code point ranges in StandardTokenizer.jj, + and added a few more of them to increase CJK character coverage. + Also documented some of the ranges. + (Otis Gospodnetic) + + 4. LUCENE-489: Add support for leading wildcard characters (*, ?) to + QueryParser. Default is to disallow them, as before. + (Steven Parkes via Otis Gospodnetic) + + 5. LUCENE-703: QueryParser changed to default to use of ConstantScoreRangeQuery + for range queries. Added useOldRangeQuery property to QueryParser to allow + selection of old RangeQuery class if required. + (Mark Harwood) + + 6. LUCENE-543: WildcardQuery now performs a TermQuery if the provided term + does not contain a wildcard character (? or *), when previously a + StringIndexOutOfBoundsException was thrown. + (Michael Busch via Erik Hatcher) + + 7. LUCENE-726: Removed the use of deprecated doc.fields() method and + Enumeration. + (Michael Busch via Otis Gospodnetic) + + 8. LUCENE-436: Removed finalize() in TermInfosReader and SegmentReader, + and added a call to enumerators.remove() in TermInfosReader.close(). + The finalize() overrides were added to help with a pre-1.4.2 JVM bug + that has since been fixed, plus we no longer support pre-1.4.2 JVMs. + (Otis Gospodnetic) + + 9. LUCENE-771: The default location of the write lock is now the + index directory, and is named simply "write.lock" (without a big + digest prefix). The system properties "org.apache.lucene.lockDir" + nor "java.io.tmpdir" are no longer used as the global directory + for storing lock files, and the LOCK_DIR field of FSDirectory is + now deprecated. (Mike McCandless) + +New features + + 1. LUCENE-503: New ThaiAnalyzer and ThaiWordFilter in contrib/analyzers + (Samphan Raruenrom via Chris Hostetter) + + 2. LUCENE-545: New FieldSelector API and associated changes to + IndexReader and implementations. New Fieldable interface for use + with the lazy field loading mechanism. (Grant Ingersoll and Chuck + Williams via Grant Ingersoll) + + 3. LUCENE-676: Move Solr's PrefixFilter to Lucene core. (Yura + Smolsky, Yonik Seeley) + + 4. LUCENE-678: Added NativeFSLockFactory, which implements locking + using OS native locking (via java.nio.*). (Michael McCandless via + Yonik Seeley) + + 5. LUCENE-544: Added the ability to specify different boosts for + different fields when using MultiFieldQueryParser (Matt Ericson + via Otis Gospodnetic) + + 6. LUCENE-528: New IndexWriter.addIndexesNoOptimize() that doesn't + optimize the index when adding new segments, only performing + merges as needed. (Ning Li via Yonik Seeley) + + 7. LUCENE-573: QueryParser now allows backslash escaping in + quoted terms and phrases. (Michael Busch via Yonik Seeley) + + 8. LUCENE-716: QueryParser now allows specification of Unicode + characters in terms via a unicode escape of the form \uXXXX + (Michael Busch via Yonik Seeley) + + 9. LUCENE-709: Added RAMDirectory.sizeInBytes(), IndexWriter.ramSizeInBytes() + and IndexWriter.flushRamSegments(), allowing applications to + control the amount of memory used to buffer documents. + (Chuck Williams via Yonik Seeley) + +10. LUCENE-723: QueryParser now parses *:* as MatchAllDocsQuery + (Yonik Seeley) + +11. LUCENE-741: Command-line utility for modifying or removing norms + on fields in an existing index. This is mostly based on LUCENE-496 + and lives in contrib/miscellaneous. + (Chris Hostetter, Otis Gospodnetic) + +12. LUCENE-759: Added NGramTokenizer and EdgeNGramTokenizer classes and + their passing unit tests. + (Otis Gospodnetic) + +13. LUCENE-565: Added methods to IndexWriter to more efficiently + handle updating documents (the "delete then add" use case). This + is intended to be an eventual replacement for the existing + IndexModifier. Added IndexWriter.flush() (renamed from + flushRamSegments()) to flush all pending updates (held in RAM), to + the Directory. (Ning Li via Mike McCandless) + +14. LUCENE-762: Added in SIZE and SIZE_AND_BREAK FieldSelectorResult options + which allow one to retrieve the size of a field without retrieving the + actual field. (Chuck Williams via Grant Ingersoll) + +15. LUCENE-799: Properly handle lazy, compressed fields. + (Mike Klaas via Grant Ingersoll) + +API Changes + + 1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow + changing of termText via setTermText(). (Yonik Seeley) + + 2. org.apache.lucene.analysis.nl.WordlistLoader has been deprecated + and is supposed to be replaced with the WordlistLoader class in + package org.apache.lucene.analysis (Daniel Naber) + + 3. LUCENE-609: Revert return type of Document.getField(s) to Field + for backward compatibility, added new Document.getFieldable(s) + for access to new lazy loaded fields. (Yonik Seeley) + + 4. LUCENE-608: Document.fields() has been deprecated and a new method + Document.getFields() has been added that returns a List instead of + an Enumeration (Daniel Naber) + + 5. LUCENE-605: New Explanation.isMatch() method and new ComplexExplanation + subclass allows explain methods to produce Explanations which model + "matching" independent of having a positive value. + (Chris Hostetter) + + 6. LUCENE-621: New static methods IndexWriter.setDefaultWriteLockTimeout + and IndexWriter.setDefaultCommitLockTimeout for overriding default + timeout values for all future instances of IndexWriter (as well + as for any other classes that may reference the static values, + ie: IndexReader). + (Michael McCandless via Chris Hostetter) + + 7. LUCENE-638: FSDirectory.list() now only returns the directory's + Lucene-related files. Thanks to this change one can now construct + a RAMDirectory from a file system directory that contains files + not related to Lucene. + (Simon Willnauer via Daniel Naber) + + 8. LUCENE-635: Decoupling locking implementation from Directory + implementation. Added set/getLockFactory to Directory and moved + all locking code into subclasses of abstract class LockFactory. + FSDirectory and RAMDirectory still default to their prior locking + implementations, but now you can mix & match, for example using + SingleInstanceLockFactory (ie, in memory locking) locking with an + FSDirectory. Note that now you must call setDisableLocks before + the instantiation a FSDirectory if you wish to disable locking + for that Directory. + (Michael McCandless, Jeff Patterson via Yonik Seeley) + + 9. LUCENE-657: Made FuzzyQuery non-final and inner ScoreTerm protected. + (Steven Parkes via Otis Gospodnetic) + +10. LUCENE-701: Lockless commits: a commit lock is no longer required + when a writer commits and a reader opens the index. This includes + a change to the index file format (see docs/fileformats.html for + details). It also removes all APIs associated with the commit + lock & its timeout. Readers are now truly read-only and do not + block one another on startup. This is the first step to getting + Lucene to work correctly over NFS (second step is + LUCENE-710). (Mike McCandless) + +11. LUCENE-722: DEFAULT_MIN_DOC_FREQ was misspelled DEFALT_MIN_DOC_FREQ + in Similarity's MoreLikeThis class. The misspelling has been + replaced by the correct spelling. + (Andi Vajda via Daniel Naber) + +12. LUCENE-738: Reduce the size of the file that keeps track of which + documents are deleted when the number of deleted documents is + small. This changes the index file format and cannot be + read by previous versions of Lucene. (Doron Cohen via Yonik Seeley) + +13. LUCENE-756: Maintain all norms in a single .nrm file to reduce the + number of open files and file descriptors for the non-compound index + format. This changes the index file format, but maintains the + ability to read and update older indices. The first segment merge + on an older format index will create a single .nrm file for the new + segment. (Doron Cohen via Yonik Seeley) + +14. LUCENE-732: DateTools support has been added to QueryParser, with + setters for both the default Resolution, and per-field Resolution. + For backwards compatibility, DateField is still used if no Resolutions + are specified. (Michael Busch via Chris Hostetter) + +15. Added isOptimized() method to IndexReader. + (Otis Gospodnetic) + +16. LUCENE-773: Deprecate the FSDirectory.getDirectory(*) methods that + take a boolean "create" argument. Instead you should use + IndexWriter's "create" argument to create a new index. + (Mike McCandless) + +17. LUCENE-780: Add a static Directory.copy() method to copy files + from one Directory to another. (Jiri Kuhn via Mike McCandless) + +18. LUCENE-773: Added Directory.clearLock(String name) to forcefully + remove an old lock. The default implementation is to ask the + lockFactory (if non null) to clear the lock. (Mike McCandless) + +19. LUCENE-795: Directory.renameFile() has been deprecated as it is + not used anymore inside Lucene. (Daniel Naber) + +Bug fixes + + 1. Fixed the web application demo (built with "ant war-demo") which + didn't work because it used a QueryParser method that had + been removed (Daniel Naber) + + 2. LUCENE-583: ISOLatin1AccentFilter fails to preserve positionIncrement + (Yonik Seeley) + + 3. LUCENE-575: SpellChecker min score is incorrectly changed by suggestSimilar + (Karl Wettin via Yonik Seeley) + + 4. LUCENE-587: Explanation.toHtml was producing malformed HTML + (Chris Hostetter) + + 5. Fix to allow MatchAllDocsQuery to be used with RemoteSearcher (Yonik Seeley) + + 6. LUCENE-601: RAMDirectory and RAMFile made Serializable + (Karl Wettin via Otis Gospodnetic) + + 7. LUCENE-557: Fixes to BooleanQuery and FilteredQuery so that the score + Explanations match up with the real scores. + (Chris Hostetter) + + 8. LUCENE-607: ParallelReader's TermEnum fails to advance properly to + new fields (Chuck Williams, Christian Kohlschuetter via Yonik Seeley) + + 9. LUCENE-610,LUCENE-611: Simple syntax changes to allow compilation with ecj: + disambiguate inner class scorer's use of doc() in BooleanScorer2, + other test code changes. (DM Smith via Yonik Seeley) + +10. LUCENE-451: All core query types now use ComplexExplanations so that + boosts of zero don't confuse the BooleanWeight explain method. + (Chris Hostetter) + +11. LUCENE-593: Fixed LuceneDictionary's inner Iterator + (KÃ¥re Fiedler Christiansen via Otis Gospodnetic) + +12. LUCENE-641: fixed an off-by-one bug with IndexWriter.setMaxFieldLength() + (Daniel Naber) + +13. LUCENE-659: Make PerFieldAnalyzerWrapper delegate getPositionIncrementGap() + to the correct analyzer for the field. (Chuck Williams via Yonik Seeley) + +14. LUCENE-650: Fixed NPE in Locale specific String Sort when Document + has no value. + (Oliver Hutchison via Chris Hostetter) + +15. LUCENE-683: Fixed data corruption when reading lazy loaded fields. + (Yonik Seeley) + +16. LUCENE-678: Fixed bug in NativeFSLockFactory which caused the same + lock to be shared between different directories. + (Michael McCandless via Yonik Seeley) + +17. LUCENE-690: Fixed thread unsafe use of IndexInput by lazy loaded fields. + (Yonik Seeley) + +18. LUCENE-696: Fix bug when scorer for DisjunctionMaxQuery has skipTo() + called on it before next(). (Yonik Seeley) + +19. LUCENE-569: Fixed SpanNearQuery bug, for 'inOrder' queries it would fail + to recognize ordered spans if they overlapped with unordered spans. + (Paul Elschot via Chris Hostetter) + +20. LUCENE-706: Updated fileformats.xml|html concerning the docdelta value + in the frequency file. (Johan Stuyts, Doron Cohen via Grant Ingersoll) + +21. LUCENE-715: Fixed private constructor in IndexWriter.java to + properly release the acquired write lock if there is an + IOException after acquiring the write lock but before finishing + instantiation. (Matthew Bogosian via Mike McCandless) + +22. LUCENE-651: Multiple different threads requesting the same + FieldCache entry (often for Sorting by a field) at the same + time caused multiple generations of that entry, which was + detrimental to performance and memory use. + (Oliver Hutchison via Otis Gospodnetic) + +23. LUCENE-717: Fixed build.xml not to fail when there is no lib dir. + (Doron Cohen via Otis Gospodnetic) + +24. LUCENE-728: Removed duplicate/old MoreLikeThis and SimilarityQueries + classes from contrib/similarity, as their new home is under + contrib/queries. + (Otis Gospodnetic) + +25. LUCENE-669: Do not double-close the RandomAccessFile in + FSIndexInput/Output during finalize(). Besides sending an + IOException up to the GC, this may also be the cause intermittent + "The handle is invalid" IOExceptions on Windows when trying to + close readers or writers. (Michael Busch via Mike McCandless) + +26. LUCENE-702: Fix IndexWriter.addIndexes(*) to not corrupt the index + on any exceptions (eg disk full). The semantics of these methods + is now transactional: either all indices are merged or none are. + Also fixed IndexWriter.mergeSegments (called outside of + addIndexes(*) by addDocument, optimize, flushRamSegments) and + IndexReader.commit() (called by close) to clean up and keep the + instance state consistent to what's actually in the index (Mike + McCandless). + +27. LUCENE-129: Change finalizers to do "try {...} finally + {super.finalize();}" to make sure we don't miss finalizers in + classes above us. (Esmond Pitt via Mike McCandless) + +28. LUCENE-754: Fix a problem introduced by LUCENE-651, causing + IndexReaders to hang around forever, in addition to not + fixing the original FieldCache performance problem. + (Chris Hostetter, Yonik Seeley) + +29. LUCENE-140: Fix IndexReader.deleteDocument(int docNum) to + correctly raise ArrayIndexOutOfBoundsException when docNum is too + large. Previously, if docNum was only slightly too large (within + the same multiple of 8, ie, up to 7 ints beyond maxDoc), no + exception would be raised and instead the index would become + silently corrupted. The corruption then only appears much later, + in mergeSegments, when the corrupted segment is merged with + segment(s) after it. (Mike McCandless) + +30. LUCENE-768: Fix case where an Exception during deleteDocument, + undeleteAll or setNorm in IndexReader could leave the reader in a + state where close() fails to release the write lock. + (Mike McCandless) + +31. Remove "tvp" from known index file extensions because it is + never used. (Nicolas Lalevée via Bernhard Messer) + +32. LUCENE-767: Change how SegmentReader.maxDoc() is computed to not + rely on file length check and instead use the SegmentInfo's + docCount that's already stored explicitly in the index. This is a + defensive bug fix (ie, there is no known problem seen "in real + life" due to this, just a possible future problem). (Chuck + Williams via Mike McCandless) + +Optimizations + + 1. LUCENE-586: TermDocs.skipTo() is now more efficient for + multi-segment indexes. This will improve the performance of many + types of queries against a non-optimized index. (Andrew Hudson + via Yonik Seeley) + + 2. LUCENE-623: RAMDirectory.close now nulls out its reference to all + internal "files", allowing them to be GCed even if references to the + RAMDirectory itself still exist. (Nadav Har'El via Chris Hostetter) + + 3. LUCENE-629: Compressed fields are no longer uncompressed and + recompressed during segment merges (e.g. during indexing or + optimizing), thus improving performance . (Michael Busch via Otis + Gospodnetic) + + 4. LUCENE-388: Improve indexing performance when maxBufferedDocs is + large by keeping a count of buffered documents rather than + counting after each document addition. (Doron Cohen, Paul Smith, + Yonik Seeley) + + 5. Modified TermScorer.explain to use TermDocs.skipTo() instead of + looping through docs. (Grant Ingersoll) + + 6. LUCENE-672: New indexing segment merge policy flushes all + buffered docs to their own segment and delays a merge until + mergeFactor segments of a certain level have been accumulated. + This increases indexing performance in the presence of deleted + docs or partially full segments as well as enabling future + optimizations. + + NOTE: this also fixes an "under-merging" bug whereby it is + possible to get far too many segments in your index (which will + drastically slow down search, risks exhausting file descriptor + limit, etc.). This can happen when the number of buffered docs + at close, plus the number of docs in the last non-ram segment is + greater than mergeFactor. (Ning Li, Yonik Seeley) + + 7. Lazy loaded fields unnecessarily retained an extra copy of loaded + String data. (Yonik Seeley) + + 8. LUCENE-443: ConjunctionScorer performance increase. Speed up + any BooleanQuery with more than one mandatory clause. + (Abdul Chaudhry, Paul Elschot via Yonik Seeley) + + 9. LUCENE-365: DisjunctionSumScorer performance increase of + ~30%. Speeds up queries with optional clauses. (Paul Elschot via + Yonik Seeley) + + 10. LUCENE-695: Optimized BufferedIndexInput.readBytes() for medium + size buffers, which will speed up merging and retrieving binary + and compressed fields. (Nadav Har'El via Yonik Seeley) + + 11. LUCENE-687: Lazy skipping on proximity file speeds up most + queries involving term positions, including phrase queries. + (Michael Busch via Yonik Seeley) + + 12. LUCENE-714: Replaced 2 cases of manual for-loop array copying + with calls to System.arraycopy instead, in DocumentWriter.java. + (Nicolas Lalevee via Mike McCandless) + + 13. LUCENE-729: Non-recursive skipTo and next implementation of + TermDocs for a MultiReader. The old implementation could + recurse up to the number of segments in the index. (Yonik Seeley) + + 14. LUCENE-739: Improve segment merging performance by reusing + the norm array across different fields and doing bulk writes + of norms of segments with no deleted docs. + (Michael Busch via Yonik Seeley) + + 15. LUCENE-745: Add BooleanQuery.clauses(), allowing direct access + to the List of clauses and replaced the internal synchronized Vector + with an unsynchronized List. (Yonik Seeley) + + 16. LUCENE-750: Remove finalizers from FSIndexOutput and move the + FSIndexInput finalizer to the actual file so all clones don't + register a new finalizer. (Yonik Seeley) + +Test Cases + + 1. Added TestTermScorer.java (Grant Ingersoll) + + 2. Added TestWindowsMMap.java (Benson Margulies via Mike McCandless) + + 3. LUCENE-744 Append the user.name property onto the temporary directory + that is created so it doesn't interfere with other users. (Grant Ingersoll) + +Documentation + + 1. Added style sheet to xdocs named lucene.css and included in the + Anakia VSL descriptor. (Grant Ingersoll) + + 2. Added scoring.xml document into xdocs. Updated Similarity.java + scoring formula.(Grant Ingersoll and Steve Rowe. Updates from: + Michael McCandless, Doron Cohen, Chris Hostetter, Doug Cutting). + Issue 664. + + 3. Added javadocs for FieldSelectorResult.java. (Grant Ingersoll) + + 4. Moved xdocs directory to src/site/src/documentation/content/xdocs per + Issue 707. Site now builds using Forrest, just like the other Lucene + siblings. See http://wiki.apache.org/jakarta-lucene/HowToUpdateTheWebsite + for info on updating the website. (Grant Ingersoll with help from Steve Rowe, + Chris Hostetter, Doug Cutting, Otis Gospodnetic, Yonik Seeley) + + 5. Added in Developer and System Requirements sections under Resources (Grant Ingersoll) + + 6. LUCENE-713 Updated the Term Vector section of File Formats to include + documentation on how Offset and Position info are stored in the TVF file. + (Grant Ingersoll, Samir Abdou) + + 7. Added in link to Clover Test Code Coverage Reports under the Develop + section in Resources (Grant Ingersoll) + + 8. LUCENE-748: Added details for semantics of IndexWriter.close on + hitting an Exception. (Jed Wesley-Smith via Mike McCandless) + + 9. Added some text about what is contained in releases. + (Eric Haszlakiewicz via Grant Ingersoll) + + 10. LUCENE-758: Fix javadoc to clarify that RAMDirectory(Directory) + makes a full copy of the starting Directory. (Mike McCandless) + + 11. LUCENE-764: Fix javadocs to detail temporary space requirements + for IndexWriter's optimize(), addIndexes(*) and addDocument(...) + methods. (Mike McCandless) + +Build + + 1. Added in clover test code coverage per http://issues.apache.org/jira/browse/LUCENE-721 + To enable clover code coverage, you must have clover.jar in the ANT + classpath and specify -Drun.clover=true on the command line. + (Michael Busch and Grant Ingersoll) + + 2. Added a sysproperty in common-build.xml per Lucene 752 to map java.io.tmpdir to + ${build.dir}/test just like the tempDir sysproperty. + + 3. LUCENE-757 Added new target named init-dist that does setup for + distribution of both binary and source distributions. Called by package + and package-*-src + +======================= Release 2.0.0 ======================= + +API Changes + + 1. All deprecated methods and fields have been removed, except + DateField, which will still be supported for some time + so Lucene can read its date fields from old indexes + (Yonik Seeley & Grant Ingersoll) + + 2. DisjunctionSumScorer is no longer public. + (Paul Elschot via Otis Gospodnetic) + + 3. Creating a Field with both an empty name and an empty value + now throws an IllegalArgumentException + (Daniel Naber) + + 4. LUCENE-301: Added new IndexWriter({String,File,Directory}, + Analyzer) constructors that do not take a boolean "create" + argument. These new constructors will create a new index if + necessary, else append to the existing one. (Dan Armbrust via + Mike McCandless) + +New features + + 1. LUCENE-496: Command line tool for modifying the field norms of an + existing index; added to contrib/miscellaneous. (Chris Hostetter) + + 2. LUCENE-577: SweetSpotSimilarity added to contrib/miscellaneous. + (Chris Hostetter) + +Bug fixes + + 1. LUCENE-330: Fix issue of FilteredQuery not working properly within + BooleanQuery. (Paul Elschot via Erik Hatcher) + + 2. LUCENE-515: Make ConstantScoreRangeQuery and ConstantScoreQuery work + with RemoteSearchable. (Philippe Laflamme via Yonik Seeley) + + 3. Added methods to get/set writeLockTimeout and commitLockTimeout in + IndexWriter. These could be set in Lucene 1.4 using a system property. + This feature had been removed without adding the corresponding + getter/setter methods. (Daniel Naber) + + 4. LUCENE-413: Fixed ArrayIndexOutOfBoundsException exceptions + when using SpanQueries. (Paul Elschot via Yonik Seeley) + + 5. Implemented FilterIndexReader.getVersion() and isCurrent() + (Yonik Seeley) + + 6. LUCENE-540: Fixed a bug with IndexWriter.addIndexes(Directory[]) + that sometimes caused the index order of documents to change. + (Yonik Seeley) + + 7. LUCENE-526: Fixed a bug in FieldSortedHitQueue that caused + subsequent String sorts with different locales to sort identically. + (Paul Cowan via Yonik Seeley) + + 8. LUCENE-541: Add missing extractTerms() to DisjunctionMaxQuery + (Stefan Will via Yonik Seeley) + + 9. LUCENE-514: Added getTermArrays() and extractTerms() to + MultiPhraseQuery (Eric Jain & Yonik Seeley) + +10. LUCENE-512: Fixed ClassCastException in ParallelReader.getTermFreqVectors + (frederic via Yonik) + +11. LUCENE-352: Fixed bug in SpanNotQuery that manifested as + NullPointerException when "exclude" query was not a SpanTermQuery. + (Chris Hostetter) + +12. LUCENE-572: Fixed bug in SpanNotQuery hashCode, was ignoring exclude clause + (Chris Hostetter) + +13. LUCENE-561: Fixed some ParallelReader bugs. NullPointerException if the reader + didn't know about the field yet, reader didn't keep track if it had deletions, + and deleteDocument calls could circumvent synchronization on the subreaders. + (Chuck Williams via Yonik Seeley) + +14. LUCENE-556: Added empty extractTerms() implementation to MatchAllDocsQuery and + ConstantScoreQuery in order to allow their use with a MultiSearcher. + (Yonik Seeley) + +15. LUCENE-546: Removed 2GB file size limitations for RAMDirectory. + (Peter Royal, Michael Chan, Yonik Seeley) + +16. LUCENE-485: Don't hold commit lock while removing obsolete index + files. (Luc Vanlerberghe via cutting) + + +1.9.1 + +Bug fixes + + 1. LUCENE-511: Fix a bug in the BufferedIndexOutput optimization + introduced in 1.9-final. (Shay Banon & Steven Tamm via cutting) + +1.9 final + +Note that this release is mostly but not 100% source compatible with +the previous release of Lucene (1.4.3). In other words, you should +make sure your application compiles with this version of Lucene before +you replace the old Lucene JAR with the new one. Many methods have +been deprecated in anticipation of release 2.0, so deprecation +warnings are to be expected when upgrading from 1.4.3 to 1.9. + +Bug fixes + + 1. The fix that made IndexWriter.setMaxBufferedDocs(1) work had negative + effects on indexing performance and has thus been reverted. The + argument for setMaxBufferedDocs(int) must now at least be 2, otherwise + an exception is thrown. (Daniel Naber) + +Optimizations + + 1. Optimized BufferedIndexOutput.writeBytes() to use + System.arraycopy() in more cases, rather than copying byte-by-byte. + (Lukas Zapletal via Cutting) + +1.9 RC1 + +Requirements + + 1. To compile and use Lucene you now need Java 1.4 or later. + +Changes in runtime behavior + + 1. FuzzyQuery can no longer throw a TooManyClauses exception. If a + FuzzyQuery expands to more than BooleanQuery.maxClauseCount + terms only the BooleanQuery.maxClauseCount most similar terms + go into the rewritten query and thus the exception is avoided. + (Christoph) + + 2. Changed system property from "org.apache.lucene.lockdir" to + "org.apache.lucene.lockDir", so that its casing follows the existing + pattern used in other Lucene system properties. (Bernhard) + + 3. The terms of RangeQueries and FuzzyQueries are now converted to + lowercase by default (as it has been the case for PrefixQueries + and WildcardQueries before). Use setLowercaseExpandedTerms(false) + to disable that behavior but note that this also affects + PrefixQueries and WildcardQueries. (Daniel Naber) + + 4. Document frequency that is computed when MultiSearcher is used is now + computed correctly and "globally" across subsearchers and indices, while + before it used to be computed locally to each index, which caused + ranking across multiple indices not to be equivalent. + (Chuck Williams, Wolf Siberski via Otis, bug #31841) + + 5. When opening an IndexWriter with create=true, Lucene now only deletes + its own files from the index directory (looking at the file name suffixes + to decide if a file belongs to Lucene). The old behavior was to delete + all files. (Daniel Naber and Bernhard Messer, bug #34695) + + 6. The version of an IndexReader, as returned by getCurrentVersion() + and getVersion() doesn't start at 0 anymore for new indexes. Instead, it + is now initialized by the system time in milliseconds. + (Bernhard Messer via Daniel Naber) + + 7. Several default values cannot be set via system properties anymore, as + this has been considered inappropriate for a library like Lucene. For + most properties there are set/get methods available in IndexWriter which + you should use instead. This affects the following properties: + See IndexWriter for getter/setter methods: + org.apache.lucene.writeLockTimeout, org.apache.lucene.commitLockTimeout, + org.apache.lucene.minMergeDocs, org.apache.lucene.maxMergeDocs, + org.apache.lucene.maxFieldLength, org.apache.lucene.termIndexInterval, + org.apache.lucene.mergeFactor, + See BooleanQuery for getter/setter methods: + org.apache.lucene.maxClauseCount + See FSDirectory for getter/setter methods: + disableLuceneLocks + (Daniel Naber) + + 8. Fixed FieldCacheImpl to use user-provided IntParser and FloatParser, + instead of using Integer and Float classes for parsing. + (Yonik Seeley via Otis Gospodnetic) + + 9. Expert level search routines returning TopDocs and TopFieldDocs + no longer normalize scores. This also fixes bugs related to + MultiSearchers and score sorting/normalization. + (Luc Vanlerberghe via Yonik Seeley, LUCENE-469) + +New features + + 1. Added support for stored compressed fields (patch #31149) + (Bernhard Messer via Christoph) + + 2. Added support for binary stored fields (patch #29370) + (Drew Farris and Bernhard Messer via Christoph) + + 3. Added support for position and offset information in term vectors + (patch #18927). (Grant Ingersoll & Christoph) + + 4. A new class DateTools has been added. It allows you to format dates + in a readable format adequate for indexing. Unlike the existing + DateField class DateTools can cope with dates before 1970 and it + forces you to specify the desired date resolution (e.g. month, day, + second, ...) which can make RangeQuerys on those fields more efficient. + (Daniel Naber) + + 5. QueryParser now correctly works with Analyzers that can return more + than one token per position. For example, a query "+fast +car" + would be parsed as "+fast +(car automobile)" if the Analyzer + returns "car" and "automobile" at the same position whenever it + finds "car" (Patch #23307). + (Pierrick Brihaye, Daniel Naber) + + 6. Permit unbuffered Directory implementations (e.g., using mmap). + InputStream is replaced by the new classes IndexInput and + BufferedIndexInput. OutputStream is replaced by the new classes + IndexOutput and BufferedIndexOutput. InputStream and OutputStream + are now deprecated and FSDirectory is now subclassable. (cutting) + + 7. Add native Directory and TermDocs implementations that work under + GCJ. These require GCC 3.4.0 or later and have only been tested + on Linux. Use 'ant gcj' to build demo applications. (cutting) + + 8. Add MMapDirectory, which uses nio to mmap input files. This is + still somewhat slower than FSDirectory. However it uses less + memory per query term, since a new buffer is not allocated per + term, which may help applications which use, e.g., wildcard + queries. It may also someday be faster. (cutting & Paul Elschot) + + 9. Added javadocs-internal to build.xml - bug #30360 + (Paul Elschot via Otis) + +10. Added RangeFilter, a more generically useful filter than DateFilter. + (Chris M Hostetter via Erik) + +11. Added NumberTools, a utility class indexing numeric fields. + (adapted from code contributed by Matt Quail; committed by Erik) + +12. Added public static IndexReader.main(String[] args) method. + IndexReader can now be used directly at command line level + to list and optionally extract the individual files from an existing + compound index file. + (adapted from code contributed by Garrett Rooney; committed by Bernhard) + +13. Add IndexWriter.setTermIndexInterval() method. See javadocs. + (Doug Cutting) + +14. Added LucenePackage, whose static get() method returns java.util.Package, + which lets the caller get the Lucene version information specified in + the Lucene Jar. + (Doug Cutting via Otis) + +15. Added Hits.iterator() method and corresponding HitIterator and Hit objects. + This provides standard java.util.Iterator iteration over Hits. + Each call to the iterator's next() method returns a Hit object. + (Jeremy Rayner via Erik) + +16. Add ParallelReader, an IndexReader that combines separate indexes + over different fields into a single virtual index. (Doug Cutting) + +17. Add IntParser and FloatParser interfaces to FieldCache, so that + fields in arbitrarily formats can be cached as ints and floats. + (Doug Cutting) + +18. Added class org.apache.lucene.index.IndexModifier which combines + IndexWriter and IndexReader, so you can add and delete documents without + worrying about synchronization/locking issues. + (Daniel Naber) + +19. Lucene can now be used inside an unsigned applet, as Lucene's access + to system properties will not cause a SecurityException anymore. + (Jon Schuster via Daniel Naber, bug #34359) + +20. Added a new class MatchAllDocsQuery that matches all documents. + (John Wang via Daniel Naber, bug #34946) + +21. Added ability to omit norms on a per field basis to decrease + index size and memory consumption when there are many indexed fields. + See Field.setOmitNorms() + (Yonik Seeley, LUCENE-448) + +22. Added NullFragmenter to contrib/highlighter, which is useful for + highlighting entire documents or fields. + (Erik Hatcher) + +23. Added regular expression queries, RegexQuery and SpanRegexQuery. + Note the same term enumeration caveats apply with these queries as + apply to WildcardQuery and other term expanding queries. + These two new queries are not currently supported via QueryParser. + (Erik Hatcher) + +24. Added ConstantScoreQuery which wraps a filter and produces a score + equal to the query boost for every matching document. + (Yonik Seeley, LUCENE-383) + +25. Added ConstantScoreRangeQuery which produces a constant score for + every document in the range. One advantage over a normal RangeQuery + is that it doesn't expand to a BooleanQuery and thus doesn't have a maximum + number of terms the range can cover. Both endpoints may also be open. + (Yonik Seeley, LUCENE-383) + +26. Added ability to specify a minimum number of optional clauses that + must match in a BooleanQuery. See BooleanQuery.setMinimumNumberShouldMatch(). + (Paul Elschot, Chris Hostetter via Yonik Seeley, LUCENE-395) + +27. Added DisjunctionMaxQuery which provides the maximum score across its clauses. + It's very useful for searching across multiple fields. + (Chuck Williams via Yonik Seeley, LUCENE-323) + +28. New class ISOLatin1AccentFilter that replaces accented characters in the ISO + Latin 1 character set by their unaccented equivalent. + (Sven Duzont via Erik Hatcher) + +29. New class KeywordAnalyzer. "Tokenizes" the entire stream as a single token. + This is useful for data like zip codes, ids, and some product names. + (Erik Hatcher) + +30. Copied LengthFilter from contrib area to core. Removes words that are too + long and too short from the stream. + (David Spencer via Otis and Daniel) + +31. Added getPositionIncrementGap(String fieldName) to Analyzer. This allows + custom analyzers to put gaps between Field instances with the same field + name, preventing phrase or span queries crossing these boundaries. The + default implementation issues a gap of 0, allowing the default token + position increment of 1 to put the next field's first token into a + successive position. + (Erik Hatcher, with advice from Yonik) + +32. StopFilter can now ignore case when checking for stop words. + (Grant Ingersoll via Yonik, LUCENE-248) + +33. Add TopDocCollector and TopFieldDocCollector. These simplify the + implementation of hit collectors that collect only the + top-scoring or top-sorting hits. + +API Changes + + 1. Several methods and fields have been deprecated. The API documentation + contains information about the recommended replacements. It is planned + that most of the deprecated methods and fields will be removed in + Lucene 2.0. (Daniel Naber) + + 2. The Russian and the German analyzers have been moved to contrib/analyzers. + Also, the WordlistLoader class has been moved one level up in the + hierarchy and is now org.apache.lucene.analysis.WordlistLoader + (Daniel Naber) + + 3. The API contained methods that declared to throw an IOException + but that never did this. These declarations have been removed. If + your code tries to catch these exceptions you might need to remove + those catch clauses to avoid compile errors. (Daniel Naber) + + 4. Add a serializable Parameter Class to standardize parameter enum + classes in BooleanClause and Field. (Christoph) + + 5. Added rewrite methods to all SpanQuery subclasses that nest other SpanQuerys. + This allows custom SpanQuery subclasses that rewrite (for term expansion, for + example) to nest within the built-in SpanQuery classes successfully. + +Bug fixes + + 1. The JSP demo page (src/jsp/results.jsp) now properly closes the + IndexSearcher it opens. (Daniel Naber) + + 2. Fixed a bug in IndexWriter.addIndexes(IndexReader[] readers) that + prevented deletion of obsolete segments. (Christoph Goller) + + 3. Fix in FieldInfos to avoid the return of an extra blank field in + IndexReader.getFieldNames() (Patch #19058). (Mark Harwood via Bernhard) + + 4. Some combinations of BooleanQuery and MultiPhraseQuery (formerly + PhrasePrefixQuery) could provoke UnsupportedOperationException + (bug #33161). (Rhett Sutphin via Daniel Naber) + + 5. Small bug in skipTo of ConjunctionScorer that caused NullPointerException + if skipTo() was called without prior call to next() fixed. (Christoph) + + 6. Disable Similiarty.coord() in the scoring of most automatically + generated boolean queries. The coord() score factor is + appropriate when clauses are independently specified by a user, + but is usually not appropriate when clauses are generated + automatically, e.g., by a fuzzy, wildcard or range query. Matches + on such automatically generated queries are no longer penalized + for not matching all terms. (Doug Cutting, Patch #33472) + + 7. Getting a lock file with Lock.obtain(long) was supposed to wait for + a given amount of milliseconds, but this didn't work. + (John Wang via Daniel Naber, Bug #33799) + + 8. Fix FSDirectory.createOutput() to always create new files. + Previously, existing files were overwritten, and an index could be + corrupted when the old version of a file was longer than the new. + Now any existing file is first removed. (Doug Cutting) + + 9. Fix BooleanQuery containing nested SpanTermQuery's, which previously + could return an incorrect number of hits. + (Reece Wilton via Erik Hatcher, Bug #35157) + +10. Fix NullPointerException that could occur with a MultiPhraseQuery + inside a BooleanQuery. + (Hans Hjelm and Scotty Allen via Daniel Naber, Bug #35626) + +11. Fixed SnowballFilter to pass through the position increment from + the original token. + (Yonik Seeley via Erik Hatcher, LUCENE-437) + +12. Added Unicode range of Korean characters to StandardTokenizer, + grouping contiguous characters into a token rather than one token + per character. This change also changes the token type to "" + for Chinese and Japanese character tokens (previously it was ""). + (Cheolgoo Kang via Otis and Erik, LUCENE-444 and LUCENE-461) + +13. FieldsReader now looks at FieldInfo.storeOffsetWithTermVector and + FieldInfo.storePositionWithTermVector and creates the Field with + correct TermVector parameter. + (Frank Steinmann via Bernhard, LUCENE-455) + +14. Fixed WildcardQuery to prevent "cat" matching "ca??". + (Xiaozheng Ma via Bernhard, LUCENE-306) + +15. Fixed a bug where MultiSearcher and ParallelMultiSearcher could + change the sort order when sorting by string for documents without + a value for the sort field. + (Luc Vanlerberghe via Yonik, LUCENE-453) + +16. Fixed a sorting problem with MultiSearchers that can lead to + missing or duplicate docs due to equal docs sorting in an arbitrary order. + (Yonik Seeley, LUCENE-456) + +17. A single hit using the expert level sorted search methods + resulted in the score not being normalized. + (Yonik Seeley, LUCENE-462) + +18. Fixed inefficient memory usage when loading an index into RAMDirectory. + (Volodymyr Bychkoviak via Bernhard, LUCENE-475) + +19. Corrected term offsets returned by ChineseTokenizer. + (Ray Tsang via Erik Hatcher, LUCENE-324) + +20. Fixed MultiReader.undeleteAll() to correctly update numDocs. + (Robert Kirchgessner via Doug Cutting, LUCENE-479) + +21. Race condition in IndexReader.getCurrentVersion() and isCurrent() + fixed by acquiring the commit lock. + (Luc Vanlerberghe via Yonik Seeley, LUCENE-481) + +22. IndexWriter.setMaxBufferedDocs(1) didn't have the expected effect, + this has now been fixed. (Daniel Naber) + +23. Fixed QueryParser when called with a date in local form like + "[1/16/2000 TO 1/18/2000]". This query did not include the documents + of 1/18/2000, i.e. the last day was not included. (Daniel Naber) + +24. Removed sorting constraint that threw an exception if there were + not yet any values for the sort field (Yonik Seeley, LUCENE-374) + +Optimizations + + 1. Disk usage (peak requirements during indexing and optimization) + in case of compound file format has been improved. + (Bernhard, Dmitry, and Christoph) + + 2. Optimize the performance of certain uses of BooleanScorer, + TermScorer and IndexSearcher. In particular, a BooleanQuery + composed of TermQuery, with not all terms required, that returns a + TopDocs (e.g., through a Hits with no Sort specified) runs much + faster. (cutting) + + 3. Removed synchronization from reading of term vectors with an + IndexReader (Patch #30736). (Bernhard Messer via Christoph) + + 4. Optimize term-dictionary lookup to allocate far fewer terms when + scanning for the matching term. This speeds searches involving + low-frequency terms, where the cost of dictionary lookup can be + significant. (cutting) + + 5. Optimize fuzzy queries so the standard fuzzy queries with a prefix + of 0 now run 20-50% faster (Patch #31882). + (Jonathan Hager via Daniel Naber) + + 6. A Version of BooleanScorer (BooleanScorer2) added that delivers + documents in increasing order and implements skipTo. For queries + with required or forbidden clauses it may be faster than the old + BooleanScorer, for BooleanQueries consisting only of optional + clauses it is probably slower. The new BooleanScorer is now the + default. (Patch 31785 by Paul Elschot via Christoph) + + 7. Use uncached access to norms when merging to reduce RAM usage. + (Bug #32847). (Doug Cutting) + + 8. Don't read term index when random-access is not required. This + reduces time to open IndexReaders and they use less memory when + random access is not required, e.g., when merging segments. The + term index is now read into memory lazily at the first + random-access. (Doug Cutting) + + 9. Optimize IndexWriter.addIndexes(Directory[]) when the number of + added indexes is larger than mergeFactor. Previously this could + result in quadratic performance. Now performance is n log(n). + (Doug Cutting) + +10. Speed up the creation of TermEnum for indices with multiple + segments and deleted documents, and thus speed up PrefixQuery, + RangeQuery, WildcardQuery, FuzzyQuery, RangeFilter, DateFilter, + and sorting the first time on a field. + (Yonik Seeley, LUCENE-454) + +11. Optimized and generalized 32 bit floating point to byte + (custom 8 bit floating point) conversions. Increased the speed of + Similarity.encodeNorm() anywhere from 10% to 250%, depending on the JVM. + (Yonik Seeley, LUCENE-467) + +Infrastructure + + 1. Lucene's source code repository has converted from CVS to + Subversion. The new repository is at + http://svn.apache.org/repos/asf/lucene/java/trunk + + 2. Lucene's issue tracker has migrated from Bugzilla to JIRA. + Lucene's JIRA is at http://issues.apache.org/jira/browse/LUCENE + The old issues are still available at + http://issues.apache.org/bugzilla/show_bug.cgi?id=xxxx + (use the bug number instead of xxxx) + + +1.4.3 + + 1. The JSP demo page (src/jsp/results.jsp) now properly escapes error + messages which might contain user input (e.g. error messages about + query parsing). If you used that page as a starting point for your + own code please make sure your code also properly escapes HTML + characters from user input in order to avoid so-called cross site + scripting attacks. (Daniel Naber) + + 2. QueryParser changes in 1.4.2 broke the QueryParser API. Now the old + API is supported again. (Christoph) + + +1.4.2 + + 1. Fixed bug #31241: Sorting could lead to incorrect results (documents + missing, others duplicated) if the sort keys were not unique and there + were more than 100 matches. (Daniel Naber) + + 2. Memory leak in Sort code (bug #31240) eliminated. + (Rafal Krzewski via Christoph and Daniel) + + 3. FuzzyQuery now takes an additional parameter that specifies the + minimum similarity that is required for a term to match the query. + The QueryParser syntax for this is term~x, where x is a floating + point number >= 0 and < 1 (a bigger number means that a higher + similarity is required). Furthermore, a prefix can be specified + for FuzzyQuerys so that only those terms are considered similar that + start with this prefix. This can speed up FuzzyQuery greatly. + (Daniel Naber, Christoph Goller) + + 4. PhraseQuery and PhrasePrefixQuery now allow the explicit specification + of relative positions. (Christoph Goller) + + 5. QueryParser changes: Fix for ArrayIndexOutOfBoundsExceptions + (patch #9110); some unused method parameters removed; The ability + to specify a minimum similarity for FuzzyQuery has been added. + (Christoph Goller) + + 6. IndexSearcher optimization: a new ScoreDoc is no longer allocated + for every non-zero-scoring hit. This makes 'OR' queries that + contain common terms substantially faster. (cutting) + + +1.4.1 + + 1. Fixed a performance bug in hit sorting code, where values were not + correctly cached. (Aviran via cutting) + + 2. Fixed errors in file format documentation. (Daniel Naber) + + +1.4 final + + 1. Added "an" to the list of stop words in StopAnalyzer, to complement + the existing "a" there. Fix for bug 28960 + (http://issues.apache.org/bugzilla/show_bug.cgi?id=28960). (Otis) + + 2. Added new class FieldCache to manage in-memory caches of field term + values. (Tim Jones) + + 3. Added overloaded getFieldQuery method to QueryParser which + accepts the slop factor specified for the phrase (or the default + phrase slop for the QueryParser instance). This allows overriding + methods to replace a PhraseQuery with a SpanNearQuery instead, + keeping the proper slop factor. (Erik Hatcher) + + 4. Changed the encoding of GermanAnalyzer.java and GermanStemmer.java to + UTF-8 and changed the build encoding to UTF-8, to make changed files + compile. (Otis Gospodnetic) + + 5. Removed synchronization from term lookup under IndexReader methods + termFreq(), termDocs() or termPositions() to improve + multi-threaded performance. (cutting) + + 6. Fix a bug where obsolete segment files were not deleted on Win32. + + +1.4 RC3 + + 1. Fixed several search bugs introduced by the skipTo() changes in + release 1.4RC1. The index file format was changed a bit, so + collections must be re-indexed to take advantage of the skipTo() + optimizations. (Christoph Goller) + + 2. Added new Document methods, removeField() and removeFields(). + (Christoph Goller) + + 3. Fixed inconsistencies with index closing. Indexes and directories + are now only closed automatically by Lucene when Lucene opened + them automatically. (Christoph Goller) + + 4. Added new class: FilteredQuery. (Tim Jones) + + 5. Added a new SortField type for custom comparators. (Tim Jones) + + 6. Lock obtain timed out message now displays the full path to the lock + file. (Daniel Naber via Erik) + + 7. Fixed a bug in SpanNearQuery when ordered. (Paul Elschot via cutting) + + 8. Fixed so that FSDirectory's locks still work when the + java.io.tmpdir system property is null. (cutting) + + 9. Changed FilteredTermEnum's constructor to take no parameters, + as the parameters were ignored anyway (bug #28858) + +1.4 RC2 + + 1. GermanAnalyzer now throws an exception if the stopword file + cannot be found (bug #27987). It now uses LowerCaseFilter + (bug #18410) (Daniel Naber via Otis, Erik) + + 2. Fixed a few bugs in the file format documentation. (cutting) + + +1.4 RC1 + + 1. Changed the format of the .tis file, so that: + + - it has a format version number, which makes it easier to + back-compatibly change file formats in the future. + + - the term count is now stored as a long. This was the one aspect + of the Lucene's file formats which limited index size. + + - a few internal index parameters are now stored in the index, so + that they can (in theory) now be changed from index to index, + although there is not yet an API to do so. + + These changes are back compatible. The new code can read old + indexes. But old code will not be able read new indexes. (cutting) + + 2. Added an optimized implementation of TermDocs.skipTo(). A skip + table is now stored for each term in the .frq file. This only + adds a percent or two to overall index size, but can substantially + speedup many searches. (cutting) + + 3. Restructured the Scorer API and all Scorer implementations to take + advantage of an optimized TermDocs.skipTo() implementation. In + particular, PhraseQuerys and conjunctive BooleanQuerys are + faster when one clause has substantially fewer matches than the + others. (A conjunctive BooleanQuery is a BooleanQuery where all + clauses are required.) (cutting) + + 4. Added new class ParallelMultiSearcher. Combined with + RemoteSearchable this makes it easy to implement distributed + search systems. (Jean-Francois Halleux via cutting) + + 5. Added support for hit sorting. Results may now be sorted by any + indexed field. For details see the javadoc for + Searcher#search(Query, Sort). (Tim Jones via Cutting) + + 6. Changed FSDirectory to auto-create a full directory tree that it + needs by using mkdirs() instead of mkdir(). (Mladen Turk via Otis) + + 7. Added a new span-based query API. This implements, among other + things, nested phrases. See javadocs for details. (Doug Cutting) + + 8. Added new method Query.getSimilarity(Searcher), and changed + scorers to use it. This permits one to subclass a Query class so + that it can specify its own Similarity implementation, perhaps + one that delegates through that of the Searcher. (Julien Nioche + via Cutting) + + 9. Added MultiReader, an IndexReader that combines multiple other + IndexReaders. (Cutting) + +10. Added support for term vectors. See Field#isTermVectorStored(). + (Grant Ingersoll, Cutting & Dmitry) + +11. Fixed the old bug with escaping of special characters in query + strings: http://issues.apache.org/bugzilla/show_bug.cgi?id=24665 + (Jean-Francois Halleux via Otis) + +12. Added support for overriding default values for the following, + using system properties: + - default commit lock timeout + - default maxFieldLength + - default maxMergeDocs + - default mergeFactor + - default minMergeDocs + - default write lock timeout + (Otis) + +13. Changed QueryParser.jj to allow '-' and '+' within tokens: + http://issues.apache.org/bugzilla/show_bug.cgi?id=27491 + (Morus Walter via Otis) + +14. Changed so that the compound index format is used by default. + This makes indexing a bit slower, but vastly reduces the chances + of file handle problems. (Cutting) + + +1.3 final + + 1. Added catch of BooleanQuery$TooManyClauses in QueryParser to + throw ParseException instead. (Erik Hatcher) + + 2. Fixed a NullPointerException in Query.explain(). (Doug Cutting) + + 3. Added a new method IndexReader.setNorm(), that permits one to + alter the boosting of fields after an index is created. + + 4. Distinguish between the final position and length when indexing a + field. The length is now defined as the total number of tokens, + instead of the final position, as it was previously. Length is + used for score normalization (Similarity.lengthNorm()) and for + controlling memory usage (IndexWriter.maxFieldLength). In both of + these cases, the total number of tokens is a better value to use + than the final token position. Position is used in phrase + searching (see PhraseQuery and Token.setPositionIncrement()). + + 5. Fix StandardTokenizer's handling of CJK characters (Chinese, + Japanese and Korean ideograms). Previously contiguous sequences + were combined in a single token, which is not very useful. Now + each ideogram generates a separate token, which is more useful. + + +1.3 RC3 + + 1. Added minMergeDocs in IndexWriter. This can be raised to speed + indexing without altering the number of files, but only using more + memory. (Julien Nioche via Otis) + + 2. Fix bug #24786, in query rewriting. (bschneeman via Cutting) + + 3. Fix bug #16952, in demo HTML parser, skip comments in + javascript. (Christoph Goller) + + 4. Fix bug #19253, in demo HTML parser, add whitespace as needed to + output (Daniel Naber via Christoph Goller) + + 5. Fix bug #24301, in demo HTML parser, long titles no longer + hang things. (Christoph Goller) + + 6. Fix bug #23534, Replace use of file timestamp of segments file + with an index version number stored in the segments file. This + resolves problems when running on file systems with low-resolution + timestamps, e.g., HFS under MacOS X. (Christoph Goller) + + 7. Fix QueryParser so that TokenMgrError is not thrown, only + ParseException. (Erik Hatcher) + + 8. Fix some bugs introduced by change 11 of RC2. (Christoph Goller) + + 9. Fixed a problem compiling TestRussianStem. (Christoph Goller) + +10. Cleaned up some build stuff. (Erik Hatcher) + + +1.3 RC2 + + 1. Added getFieldNames(boolean) to IndexReader, SegmentReader, and + SegmentsReader. (Julien Nioche via otis) + + 2. Changed file locking to place lock files in + System.getProperty("java.io.tmpdir"), where all users are + permitted to write files. This way folks can open and correctly + lock indexes which are read-only to them. + + 3. IndexWriter: added a new method, addDocument(Document, Analyzer), + permitting one to easily use different analyzers for different + documents in the same index. + + 4. Minor enhancements to FuzzyTermEnum. + (Christoph Goller via Otis) + + 5. PriorityQueue: added insert(Object) method and adjusted IndexSearcher + and MultiIndexSearcher to use it. + (Christoph Goller via Otis) + + 6. Fixed a bug in IndexWriter that returned incorrect docCount(). + (Christoph Goller via Otis) + + 7. Fixed SegmentsReader to eliminate the confusing and slightly different + behaviour of TermEnum when dealing with an enumeration of all terms, + versus an enumeration starting from a specific term. + This patch also fixes incorrect term document frequencies when the same term + is present in multiple segments. + (Christoph Goller via Otis) + + 8. Added CachingWrapperFilter and PerFieldAnalyzerWrapper. (Erik Hatcher) + + 9. Added support for the new "compound file" index format (Dmitry + Serebrennikov) + +10. Added Locale setting to QueryParser, for use by date range parsing. + +11. Changed IndexReader so that it can be subclassed by classes + outside of its package. Previously it had package-private + abstract methods. Also modified the index merging code so that it + can work on an arbitrary IndexReader implementation, and added a + new method, IndexWriter.addIndexes(IndexReader[]), to take + advantage of this. (cutting) + +12. Added a limit to the number of clauses which may be added to a + BooleanQuery. The default limit is 1024 clauses. This should + stop most OutOfMemoryExceptions by prefix, wildcard and fuzzy + queries which run amok. (cutting) + +13. Add new method: IndexReader.undeleteAll(). This undeletes all + deleted documents which still remain in the index. (cutting) + + +1.3 RC1 + + 1. Fixed PriorityQueue's clear() method. + Fix for bug 9454, http://nagoya.apache.org/bugzilla/show_bug.cgi?id=9454 + (Matthijs Bomhoff via otis) + + 2. Changed StandardTokenizer.jj grammar for EMAIL tokens. + Fix for bug 9015, http://nagoya.apache.org/bugzilla/show_bug.cgi?id=9015 + (Dale Anson via otis) + + 3. Added the ability to disable lock creation by using disableLuceneLocks + system property. This is useful for read-only media, such as CD-ROMs. + (otis) + + 4. Added id method to Hits to be able to access the index global id. + Required for sorting options. + (carlson) + + 5. Added support for new range query syntax to QueryParser.jj. + (briangoetz) + + 6. Added the ability to retrieve HTML documents' META tag values to + HTMLParser.jj. + (Mark Harwood via otis) + + 7. Modified QueryParser to make it possible to programmatically specify the + default Boolean operator (OR or AND). + (Péter Halácsy via otis) + + 8. Made many search methods and classes non-final, per requests. + This includes IndexWriter and IndexSearcher, among others. + (cutting) + + 9. Added class RemoteSearchable, providing support for remote + searching via RMI. The test class RemoteSearchableTest.java + provides an example of how this can be used. (cutting) + + 10. Added PhrasePrefixQuery (and supporting MultipleTermPositions). The + test class TestPhrasePrefixQuery provides the usage example. + (Anders Nielsen via otis) + + 11. Changed the German stemming algorithm to ignore case while + stripping. The new algorithm is faster and produces more equal + stems from nouns and verbs derived from the same word. + (gschwarz) + + 12. Added support for boosting the score of documents and fields via + the new methods Document.setBoost(float) and Field.setBoost(float). + + Note: This changes the encoding of an indexed value. Indexes + should be re-created from scratch in order for search scores to + be correct. With the new code and an old index, searches will + yield very large scores for shorter fields, and very small scores + for longer fields. Once the index is re-created, scores will be + as before. (cutting) + + 13. Added new method Token.setPositionIncrement(). + + This permits, for the purpose of phrase searching, placing + multiple terms in a single position. This is useful with + stemmers that produce multiple possible stems for a word. + + This also permits the introduction of gaps between terms, so that + terms which are adjacent in a token stream will not be matched by + and exact phrase query. This makes it possible, e.g., to build + an analyzer where phrases are not matched over stop words which + have been removed. + + Finally, repeating a token with an increment of zero can also be + used to boost scores of matches on that token. (cutting) + + 14. Added new Filter class, QueryFilter. This constrains search + results to only match those which also match a provided query. + Results are cached, so that searches after the first on the same + index using this filter are very fast. + + This could be used, for example, with a RangeQuery on a formatted + date field to implement date filtering. One could re-use a + single QueryFilter that matches, e.g., only documents modified + within the last week. The QueryFilter and RangeQuery would only + need to be reconstructed once per day. (cutting) + + 15. Added a new IndexWriter method, getAnalyzer(). This returns the + analyzer used when adding documents to this index. (cutting) + + 16. Fixed a bug with IndexReader.lastModified(). Before, document + deletion did not update this. Now it does. (cutting) + + 17. Added Russian Analyzer. + (Boris Okner via otis) + + 18. Added a public, extensible scoring API. For details, see the + javadoc for org.apache.lucene.search.Similarity. + + 19. Fixed return of Hits.id() from float to int. (Terry Steichen via Peter). + + 20. Added getFieldNames() to IndexReader and Segment(s)Reader classes. + (Peter Mularien via otis) + + 21. Added getFields(String) and getValues(String) methods. + Contributed by Rasik Pandey on 2002-10-09 + (Rasik Pandey via otis) + + 22. Revised internal search APIs. Changes include: + + a. Queries are no longer modified during a search. This makes + it possible, e.g., to reuse the same query instance with + multiple indexes from multiple threads. + + b. Term-expanding queries (e.g. PrefixQuery, WildcardQuery, + etc.) now work correctly with MultiSearcher, fixing bugs 12619 + and 12667. + + c. Boosting BooleanQuery's now works, and is supported by the + query parser (problem reported by Lee Mallabone). Thus a query + like "(+foo +bar)^2 +baz" is now supported and equivalent to + "(+foo^2 +bar^2) +baz". + + d. New method: Query.rewrite(IndexReader). This permits a + query to re-write itself as an alternate, more primitive query. + Most of the term-expanding query classes (PrefixQuery, + WildcardQuery, etc.) are now implemented using this method. + + e. New method: Searchable.explain(Query q, int doc). This + returns an Explanation instance that describes how a particular + document is scored against a query. An explanation can be + displayed as either plain text, with the toString() method, or + as HTML, with the toHtml() method. Note that computing an + explanation is as expensive as executing the query over the + entire index. This is intended to be used in developing + Similarity implementations, and, for good performance, should + not be displayed with every hit. + + f. Scorer and Weight are public, not package protected. It now + possible for someone to write a Scorer implementation that is + not in the org.apache.lucene.search package. This is still + fairly advanced programming, and I don't expect anyone to do + this anytime soon, but at least now it is possible. + + g. Added public accessors to the primitive query classes + (TermQuery, PhraseQuery and BooleanQuery), permitting access to + their terms and clauses. + + Caution: These are extensive changes and they have not yet been + tested extensively. Bug reports are appreciated. + (cutting) + + 23. Added convenience RAMDirectory constructors taking File and String + arguments, for easy FSDirectory to RAMDirectory conversion. + (otis) + + 24. Added code for manual renaming of files in FSDirectory, since it + has been reported that java.io.File's renameTo(File) method sometimes + fails on Windows JVMs. + (Matt Tucker via otis) + + 25. Refactored QueryParser to make it easier for people to extend it. + Added the ability to automatically lower-case Wildcard terms in + the QueryParser. + (Tatu Saloranta via otis) + + +1.2 RC6 + + 1. Changed QueryParser.jj to have "?" be a special character which + allowed it to be used as a wildcard term. Updated TestWildcard + unit test also. (Ralf Hettesheimer via carlson) + +1.2 RC5 + + 1. Renamed build.properties to default.properties and updated + the BUILD.txt document to describe how to override the + default.property settings without having to edit the file. This + brings the build process closer to Scarab's build process. + (jon) + + 2. Added MultiFieldQueryParser class. (Kelvin Tan, via otis) + + 3. Updated "powered by" links. (otis) + + 4. Fixed instruction for setting up JavaCC - Bug #7017 (otis) + + 5. Added throwing exception if FSDirectory could not create directory + - Bug #6914 (Eugene Gluzberg via otis) + + 6. Update MultiSearcher, MultiFieldParse, Constants, DateFilter, + LowerCaseTokenizer javadoc (otis) + + 7. Added fix to avoid NullPointerException in results.jsp + (Mark Hayes via otis) + + 8. Changed Wildcard search to find 0 or more char instead of 1 or more + (Lee Mallobone, via otis) + + 9. Fixed error in offset issue in GermanStemFilter - Bug #7412 + (Rodrigo Reyes, via otis) + + 10. Added unit tests for wildcard search and DateFilter (otis) + + 11. Allow co-existence of indexed and non-indexed fields with the same name + (cutting/casper, via otis) + + 12. Add escape character to query parser. + (briangoetz) + + 13. Applied a patch that ensures that searches that use DateFilter + don't throw an exception when no matches are found. (David Smiley, via + otis) + + 14. Fixed bugs in DateFilter and wildcardquery unit tests. (cutting, otis, carlson) + + +1.2 RC4 + + 1. Updated contributions section of website. + Add XML Document #3 implementation to Document Section. + Also added Term Highlighting to Misc Section. (carlson) + + 2. Fixed NullPointerException for phrase searches containing + unindexed terms, introduced in 1.2RC3. (cutting) + + 3. Changed document deletion code to obtain the index write lock, + enforcing the fact that document addition and deletion cannot be + performed concurrently. (cutting) + + 4. Various documentation cleanups. (otis, acoliver) + + 5. Updated "powered by" links. (cutting, jon) + + 6. Fixed a bug in the GermanStemmer. (Bernhard Messer, via otis) + + 7. Changed Term and Query to implement Serializable. (scottganyo) + + 8. Fixed to never delete indexes added with IndexWriter.addIndexes(). + (cutting) + + 9. Upgraded to JUnit 3.7. (otis) + +1.2 RC3 + + 1. IndexWriter: fixed a bug where adding an optimized index to an + empty index failed. This was encountered using addIndexes to copy + a RAMDirectory index to an FSDirectory. + + 2. RAMDirectory: fixed a bug where RAMInputStream could not read + across more than across a single buffer boundary. + + 3. Fix query parser so it accepts queries with unicode characters. + (briangoetz) + + 4. Fix query parser so that PrefixQuery is used in preference to + WildcardQuery when there's only an asterisk at the end of the + term. Previously PrefixQuery would never be used. + + 5. Fix tests so they compile; fix ant file so it compiles tests + properly. Added test cases for Analyzers and PriorityQueue. + + 6. Updated demos, added Getting Started documentation. (acoliver) + + 7. Added 'contributions' section to website & docs. (carlson) + + 8. Removed JavaCC from source distribution for copyright reasons. + Folks must now download this separately from metamata in order to + compile Lucene. (cutting) + + 9. Substantially improved the performance of DateFilter by adding the + ability to reuse TermDocs objects. (cutting) + +10. Added IndexReader methods: + public static boolean indexExists(String directory); + public static boolean indexExists(File directory); + public static boolean indexExists(Directory directory); + public static boolean isLocked(Directory directory); + public static void unlock(Directory directory); + (cutting, otis) + +11. Fixed bugs in GermanAnalyzer (gschwarz) + + +1.2 RC2: + - added sources to distribution + - removed broken build scripts and libraries from distribution + - SegmentsReader: fixed potential race condition + - FSDirectory: fixed so that getDirectory(xxx,true) correctly + erases the directory contents, even when the directory + has already been accessed in this JVM. + - RangeQuery: Fix issue where an inclusive range query would + include the nearest term in the index above a non-existant + specified upper term. + - SegmentTermEnum: Fix NullPointerException in clone() method + when the Term is null. + - JDK 1.1 compatibility fix: disabled lock files for JDK 1.1, + since they rely on a feature added in JDK 1.2. + +1.2 RC1 (first Apache release): + - packages renamed from com.lucene to org.apache.lucene + - license switched from LGPL to Apache + - ant-only build -- no more makefiles + - addition of lock files--now fully thread & process safe + - addition of German stemmer + - MultiSearcher now supports low-level search API + - added RangeQuery, for term-range searching + - Analyzers can choose tokenizer based on field name + - misc bug fixes. + +1.01b (last Sourceforge release) + . a few bug fixes + . new Query Parser + . new prefix query (search for "foo*" matches "food") + +1.0 + +This release fixes a few serious bugs and also includes some +performance optimizations, a stemmer, and a few other minor +enhancements. + +0.04 + +Lucene now includes a grammar-based tokenizer, StandardTokenizer. + +The only tokenizer included in the previous release (LetterTokenizer) +identified terms consisting entirely of alphabetic characters. The +new tokenizer uses a regular-expression grammar to identify more +complex classes of terms, including numbers, acronyms, email +addresses, etc. + +StandardTokenizer serves two purposes: + + 1. It is a much better, general purpose tokenizer for use by + applications as is. + + The easiest way for applications to start using + StandardTokenizer is to use StandardAnalyzer. + + 2. It provides a good example of grammar-based tokenization. + + If an application has special tokenization requirements, it can + implement a custom tokenizer by copying the directory containing + the new tokenizer into the application and modifying it + accordingly. + +0.01 + +First open source release. + +The code has been re-organized into a new package and directory +structure for this release. It builds OK, but has not been tested +beyond that since the re-organization. diff --git a/lucene-java-3.4.0/lucene/JRE_VERSION_MIGRATION.txt b/lucene-java-3.4.0/lucene/JRE_VERSION_MIGRATION.txt new file mode 100644 index 0000000..5889849 --- /dev/null +++ b/lucene-java-3.4.0/lucene/JRE_VERSION_MIGRATION.txt @@ -0,0 +1,36 @@ +If possible, use the same JRE major version at both index and search time. +When upgrading to a different JRE major version, consider re-indexing. + +Different JRE major versions may implement different versions of Unicode, +which will change the way some parts of Lucene treat your text. + +For example: with Java 1.4, LetterTokenizer will split around the character U+02C6, +but with Java 5 it will not. +This is because Java 1.4 implements Unicode 3, but Java 5 implements Unicode 4. + +For reference, JRE major versions with their corresponding Unicode versions: +Java 1.4, Unicode 3.0 +Java 5, Unicode 4.0 +Java 6, Unicode 4.0 +Java 7, Unicode 6.0 + +In general, whether or not you need to re-index largely depends upon the data that +you are searching, and what was changed in any given Unicode version. For example, +if you are completely sure that your content is limited to the "Basic Latin" range +of Unicode, you can safely ignore this. + +Special Notes: + +LUCENE 2.9 TO 3.0, JAVA 1.4 TO JAVA 5 TRANSITION + +* StandardAnalyzer will return the same results under Java 5 as it did under +Java 1.4. This is because it is largely independent of the runtime JRE for +Unicode support, (with the exception of lowercasing). However, no changes to +casing have occurred in Unicode 4.0 that affect StandardAnalyzer, so if you are +using this Analyzer you are NOT affected. + +* SimpleAnalyzer, StopAnalyzer, LetterTokenizer, LowerCaseFilter, and +LowerCaseTokenizer may return different results, along with many other Analyzers +and TokenStreams in Lucene's contrib area. If you are using one of these +components, you may be affected. + diff --git a/lucene-java-3.4.0/lucene/LICENSE.txt b/lucene-java-3.4.0/lucene/LICENSE.txt new file mode 100644 index 0000000..1645c87 --- /dev/null +++ b/lucene-java-3.4.0/lucene/LICENSE.txt @@ -0,0 +1,335 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +Some code in src/java/org/apache/lucene/util/UnicodeUtil.java was +derived from unicode conversion examples available at +http://www.unicode.org/Public/PROGRAMS/CVTUTF. Here is the copyright +from those sources: + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + + +Some code in src/java/org/apache/lucene/util/ArrayUtil.java was +derived from Python 2.4.2 sources available at +http://www.python.org. Full license is here: + + http://www.python.org/download/releases/2.4.2/license/ + + +Some code in src/java/org/apache/lucene/util/UnicodeUtil.java was +derived from ICU (http://www.icu-project.org) +The full license is available here: + http://source.icu-project.org/repos/icu/icu/trunk/license.html + +/* + * Copyright (C) 1999-2010, International Business Machines + * Corporation and others. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, and/or sell copies of the + * Software, and to permit persons to whom the Software is furnished to do so, + * provided that the above copyright notice(s) and this permission notice appear + * in all copies of the Software and that both the above copyright notice(s) and + * this permission notice appear in supporting documentation. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE + * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR + * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization of the + * copyright holder. + */ + +The following license applies to the Snowball stemmers: + +Copyright (c) 2001, Dr Martin Porter +Copyright (c) 2002, Richard Boulton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The following license applies to the KStemmer: + +Copyright © 2003, +Center for Intelligent Information Retrieval, +University of Massachusetts, Amherst. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. The names "Center for Intelligent Information Retrieval" and +"University of Massachusetts" must not be used to endorse or promote products +derived from this software without prior written permission. To obtain +permission, contact info@ciir.cs.umass.edu. + +THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/lucene-java-3.4.0/lucene/NOTICE.txt b/lucene-java-3.4.0/lucene/NOTICE.txt new file mode 100644 index 0000000..a72da59 --- /dev/null +++ b/lucene-java-3.4.0/lucene/NOTICE.txt @@ -0,0 +1,94 @@ +Apache Lucene +Copyright 2011 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). + +The snowball stemmers in + contrib/analyzers/common/src/java/net/sf/snowball +were developed by Martin Porter and Richard Boulton. +The snowball stopword lists in + contrib/analyzers/common/src/resources/org/apache/lucene/analysis/snowball +were developed by Martin Porter and Richard Boulton. +The full snowball package is available from + http://snowball.tartarus.org/ + +The KStem stemmer in + common/src/org/apache/lucene/analysis/en +was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst) +under the BSD-license. + +The Arabic,Persian,Romanian,Bulgarian, and Hindi analyzers (contrib/analyzers) come with a default +stopword list that is BSD-licensed created by Jacques Savoy. These files reside in: +contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt, +contrib/analyzers/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt, +contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt, +contrib/analyzers/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt, +contrib/analyzers/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt +See http://members.unine.ch/jacques.savoy/clef/index.html. + +The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers +(common) are based on BSD-licensed reference implementations created by Jacques Savoy and +Ljiljana Dolamic. These files reside in: +contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java +contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java + +The Stempel analyzer (contrib/analyzers) includes BSD-licensed software developed +by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil, +and Edmond Nolan. + +The Polish analyzer (contrib/analyzers) comes with a default +stopword list that is BSD-licensed created by the Carrot2 project. The file resides +in contrib/analyzers/stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt. +See http://project.carrot2.org/license.html. + +Includes lib/servlet-api-2.4.jar from Apache Tomcat +Includes lib/ant-1.7.1.jar and lib/ant-junit-1.7.1.jar from Apache Ant +Includes contrib/queries/lib/jakarta-regexp-1.4.jar from Apache Jakarta Regexp +Includes software from other Apache Software Foundation projects, +including, but not limited to: + - Commons Beanutils (contrib/benchmark/lib/commons-beanutils-1.7.0.jar) + - Commons Collections (contrib/benchmark/lib/commons-collections-3.1.jar) + - Commons Compress (contrib/benchmark/lib/commons-compress-1.0.jar) + - Commons Digester (contrib/benchmark/lib/commons-digester-1.7.jar) + - Commons Logging (contrib/benchmark/lib/commons-logging-1.0.4.jar) + - Xerces (contrib/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar) + +The SmartChineseAnalyzer source code (under contrib/analyzers) was +provided by Xiaoping Gao and copyright 2009 by www.imdict.net. + +ICU4J, (under contrib/icu) is licensed under an MIT styles license +(contrib/icu/lib/ICU-LICENSE.txt) and Copyright (c) 1995-2008 +International Business Machines Corporation and others + +Some files (contrib/analyzers/common/src/test/.../WordBreakTestUnicode_*.java +and data files under contrib/icu/src/data/) are derived from Unicode data such +as the Unicode Character Database. See http://unicode.org/copyright.html for more +details. + +The class org.apache.lucene.SorterTemplate was inspired by CGLIB's class with +the same name. The implementation part is mainly done using pre-existing +Lucene sorting code. In-place stable mergesort was borrowed from CGLIB, +which is Apache-licensed. + +The Google Code Prettify is Apache License 2.0. +See http://code.google.com/p/google-code-prettify/ + +JUnit (under lib/junit-4.7.jar) is licensed under the Common Public License v. 1.0 +See http://junit.sourceforge.net/cpl-v10.html + +JLine (under contrib/lucli/lib/jline.jar) is licensed under the BSD License. +See http://jline.sourceforge.net/ + +This product includes code (JaspellTernarySearchTrie) from Java Spelling Checking Package (jaspell): http://jaspell.sourceforge.net/ +License: The BSD License (http://www.opensource.org/licenses/bsd-license.php) + diff --git a/lucene-java-3.4.0/lucene/README.txt b/lucene-java-3.4.0/lucene/README.txt new file mode 100644 index 0000000..7bc336b --- /dev/null +++ b/lucene-java-3.4.0/lucene/README.txt @@ -0,0 +1,43 @@ +Apache Lucene README file + +INTRODUCTION + +Lucene is a Java full-text search engine. Lucene is not a complete +application, but rather a code library and API that can easily be used +to add search capabilities to applications. + +The Lucene web site is at: + http://lucene.apache.org/ + +Please join the Lucene-User mailing list by sending a message to: + java-user-subscribe@lucene.apache.org + +Files in a binary distribution: + +lucene-core-XX.jar + The compiled Lucene library. + +lucene-core-XX-javadoc.jar + The Javadoc jar for the compiled Lucene library. + +lucene-test-framework-XX.jar + The compiled Lucene test-framework library. + Depends on junit 4.7.x (not 4.6.x, not 4.8.x), and Apache Ant 1.7.x (not 1.6.x, not 1.8.x) + +lucene-test-framework-XX-javadoc.jar + The Javadoc jar for the compiled Lucene test-framework library. + +contrib/demo/lucene-demo-XX.jar + The compiled simple example code. + +contrib/* + Contributed code which extends and enhances Lucene, but is not + part of the core library. Of special note are the JAR files in the analyzers directory which + contain various analyzers that people may find useful in place of the StandardAnalyzer. + +docs/index.html + The contents of the Lucene website. + +docs/api/index.html + The Javadoc Lucene API documentation. This includes the core library, + the test framework, and the demo, as well as all of the contrib modules. diff --git a/lucene-java-3.4.0/lucene/backwards/backwards-readme.txt b/lucene-java-3.4.0/lucene/backwards/backwards-readme.txt new file mode 100644 index 0000000..14d7616 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/backwards-readme.txt @@ -0,0 +1,21 @@ +This folder contains the src/ folder of the previous Lucene major version. + +The test-backwards ANT task compiles the previous version's tests (bundled) against the +previous released lucene-core.jar file (bundled). After that the compiled test classes +are run against the new lucene-core.jar file, created by ANT before. + +After tagging a new Lucene *major* version (tag name "lucene_solr_X_Y_0") do the following +(for minor versions never do this); also always use the x.y.0 version for the backwards folder, +later bugfix releases should not be tested (the reason is that the new version must be backwards +compatible to the last base version, bugfixes should not taken into account): + +* cd lucene/backwards +* svn rm src/test src/test-framework lib/lucene-core*.jar +* svn commit (1st commit; you must do this, else you will corrupt your checkout) +* svn cp https://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_X_Y_0/lucene/src/test-framework src +* svn cp https://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_X_Y_0/lucene/src/test src +* Copy the lucene-core.jar from the last release tarball to lib. +* Check that everything is correct: The backwards folder should contain a src/ folder + that now contains "test" and "test-framework". The files should be the ones from the last version. +* Run "ant test-backwards" +* Commit the stuff again (2nd commit) diff --git a/lucene-java-3.4.0/lucene/backwards/lib/lucene-core-3.3.0.jar b/lucene-java-3.4.0/lucene/backwards/lib/lucene-core-3.3.0.jar new file mode 100644 index 0000000..f628e1b Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/lib/lucene-core-3.3.0.jar differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java new file mode 100644 index 0000000..382e2f4 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -0,0 +1,280 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.StringReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Base class for all Lucene unit tests that use TokenStreams. + *

+ * When writing unit tests for analysis components, its highly recommended + * to use the helper methods here (especially in conjunction with {@link MockAnalyzer} or + * {@link MockTokenizer}), as they contain many assertions and checks to + * catch bugs. + * + * @see MockAnalyzer + * @see MockTokenizer + */ +public abstract class BaseTokenStreamTestCase extends LuceneTestCase { + // some helpers to test Analyzers and TokenStreams: + + public static interface CheckClearAttributesAttribute extends Attribute { + boolean getAndResetClearCalled(); + } + + public static final class CheckClearAttributesAttributeImpl extends AttributeImpl implements CheckClearAttributesAttribute { + private boolean clearCalled = false; + + public boolean getAndResetClearCalled() { + try { + return clearCalled; + } finally { + clearCalled = false; + } + } + + @Override + public void clear() { + clearCalled = true; + } + + @Override + public boolean equals(Object other) { + return ( + other instanceof CheckClearAttributesAttributeImpl && + ((CheckClearAttributesAttributeImpl) other).clearCalled == this.clearCalled + ); + } + + @Override + public int hashCode() { + return 76137213 ^ Boolean.valueOf(clearCalled).hashCode(); + } + + @Override + public void copyTo(AttributeImpl target) { + ((CheckClearAttributesAttributeImpl) target).clear(); + } + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], Integer finalOffset) throws IOException { + assertNotNull(output); + CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class); + + assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); + CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + + OffsetAttribute offsetAtt = null; + if (startOffsets != null || endOffsets != null || finalOffset != null) { + assertTrue("has no OffsetAttribute", ts.hasAttribute(OffsetAttribute.class)); + offsetAtt = ts.getAttribute(OffsetAttribute.class); + } + + TypeAttribute typeAtt = null; + if (types != null) { + assertTrue("has no TypeAttribute", ts.hasAttribute(TypeAttribute.class)); + typeAtt = ts.getAttribute(TypeAttribute.class); + } + + PositionIncrementAttribute posIncrAtt = null; + if (posIncrements != null) { + assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class)); + posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class); + } + + ts.reset(); + for (int i = 0; i < output.length; i++) { + // extra safety to enforce, that the state is not preserved and also assign bogus values + ts.clearAttributes(); + termAtt.setEmpty().append("bogusTerm"); + if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243); + if (typeAtt != null) typeAtt.setType("bogusType"); + if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657); + + checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before + assertTrue("token "+i+" does not exist", ts.incrementToken()); + assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled()); + + assertEquals("term "+i, output[i], termAtt.toString()); + if (startOffsets != null) + assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset()); + if (endOffsets != null) + assertEquals("endOffset "+i, endOffsets[i], offsetAtt.endOffset()); + if (types != null) + assertEquals("type "+i, types[i], typeAtt.type()); + if (posIncrements != null) + assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement()); + + // we can enforce some basic things about a few attributes even if the caller doesn't check: + if (offsetAtt != null) { + assertTrue("startOffset must be >= 0", offsetAtt.startOffset() >= 0); + assertTrue("endOffset must be >= 0", offsetAtt.endOffset() >= 0); + assertTrue("endOffset must be >= startOffset", offsetAtt.endOffset() >= offsetAtt.startOffset()); + } + if (posIncrAtt != null) { + assertTrue("posIncrement must be >= 0", posIncrAtt.getPositionIncrement() >= 0); + } + } + assertFalse("end of stream", ts.incrementToken()); + ts.end(); + if (finalOffset != null) + assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset()); + if (offsetAtt != null) { + assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0); + } + ts.close(); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { + assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output) throws IOException { + assertTokenStreamContents(ts, output, null, null, null, null, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, String[] types) throws IOException { + assertTokenStreamContents(ts, output, null, null, types, null, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] posIncrements) throws IOException { + assertTokenStreamContents(ts, output, null, null, null, posIncrements, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[]) throws IOException { + assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], Integer finalOffset) throws IOException { + assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, finalOffset); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException { + assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null); + } + + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements, Integer finalOffset) throws IOException { + assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, finalOffset); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { + assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length()); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException { + assertAnalyzesTo(a, input, output, null, null, null, null); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, String[] types) throws IOException { + assertAnalyzesTo(a, input, output, null, null, types, null); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException { + assertAnalyzesTo(a, input, output, null, null, null, posIncrements); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException { + assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null); + } + + public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException { + assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements); + } + + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { + assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length()); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException { + assertAnalyzesToReuse(a, input, output, null, null, null, null); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, String[] types) throws IOException { + assertAnalyzesToReuse(a, input, output, null, null, types, null); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException { + assertAnalyzesToReuse(a, input, output, null, null, null, posIncrements); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException { + assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, null); + } + + public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException { + assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, posIncrements); + } + + // simple utility method for testing stemmers + + public static void checkOneTerm(Analyzer a, final String input, final String expected) throws IOException { + assertAnalyzesTo(a, input, new String[]{expected}); + } + + public static void checkOneTermReuse(Analyzer a, final String input, final String expected) throws IOException { + assertAnalyzesToReuse(a, input, new String[]{expected}); + } + + // simple utility method for blasting tokenstreams with data to make sure they don't do anything crazy + + public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException { + checkRandomData(random, a, iterations, 20); + } + + public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException { + for (int i = 0; i < iterations; i++) { + String text; + switch(_TestUtil.nextInt(random, 0, 3)) { + case 0: + text = _TestUtil.randomSimpleString(random); + break; + case 1: + text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength); + break; + default: + text = _TestUtil.randomUnicodeString(random, maxWordLength); + } + + TokenStream ts = a.reusableTokenStream("dummy", new StringReader(text)); + assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); + CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + List tokens = new ArrayList(); + ts.reset(); + while (ts.incrementToken()) { + tokens.add(termAtt.toString()); + // TODO: we could collect offsets etc here for better checking that reset() really works. + } + ts.end(); + ts.close(); + // verify reusing is "reproducable" and also get the normal tokenstream sanity checks + if (!tokens.isEmpty()) + assertAnalyzesToReuse(a, text, tokens.toArray(new String[tokens.size()])); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java new file mode 100644 index 0000000..857c095 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java @@ -0,0 +1,170 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +import org.apache.lucene.util.LuceneTestCase; + +/** + * Analyzer for testing + *

+ * This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers + * for unit tests. If you are testing a custom component such as a queryparser + * or analyzer-wrapper that consumes analysis streams, its a great idea to test + * it with this analyzer instead. MockAnalyzer has the following behavior: + *

    + *
  • By default, the assertions in {@link MockTokenizer} are turned on for extra + * checks that the consumer is consuming properly. These checks can be disabled + * with {@link #setEnableChecks(boolean)}. + *
  • Payload data is randomly injected into the stream for more thorough testing + * of payloads. + *
+ * @see MockTokenizer + */ +public final class MockAnalyzer extends Analyzer { + private final int pattern; + private final boolean lowerCase; + private final CharArraySet filter; + private final boolean enablePositionIncrements; + private int positionIncrementGap; + private final Random random; + private Map previousMappings = new HashMap(); + private boolean enableChecks = true; + + /** + * Creates a new MockAnalyzer. + * + * @param random Random for payloads behavior + * @param pattern pattern constant describing how tokenization should happen + * @param lowerCase true if the tokenizer should lowercase terms + * @param filter CharArraySet describing how terms should be filtered (set of stopwords, etc) + * @param enablePositionIncrements true if position increments should reflect filtered terms. + */ + public MockAnalyzer(Random random, int pattern, boolean lowerCase, CharArraySet filter, boolean enablePositionIncrements) { + this.random = random; + this.pattern = pattern; + this.lowerCase = lowerCase; + this.filter = filter; + this.enablePositionIncrements = enablePositionIncrements; + } + + /** + * Calls {@link #MockAnalyzer(Random, int, boolean, CharArraySet, boolean) + * MockAnalyzer(random, pattern, lowerCase, CharArraySet.EMPTY_STOPSET, false}). + */ + public MockAnalyzer(Random random, int pattern, boolean lowerCase) { + this(random, pattern, lowerCase, CharArraySet.EMPTY_SET, false); + } + + /** + * Create a Whitespace-lowercasing analyzer with no stopwords removal. + *

+ * Calls {@link #MockAnalyzer(Random, int, boolean) + * MockAnalyzer(random, MockTokenizer.WHITESPACE, true)}. + */ + public MockAnalyzer(Random random) { + this(random, MockTokenizer.WHITESPACE, true); + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + MockTokenizer tokenizer = new MockTokenizer(reader, pattern, lowerCase); + tokenizer.setEnableChecks(enableChecks); + StopFilter filt = new StopFilter(LuceneTestCase.TEST_VERSION_CURRENT, tokenizer, filter); + filt.setEnablePositionIncrements(enablePositionIncrements); + return maybePayload(filt, fieldName); + } + + private class SavedStreams { + MockTokenizer tokenizer; + TokenFilter filter; + } + + @Override + public TokenStream reusableTokenStream(String fieldName, Reader reader) + throws IOException { + @SuppressWarnings("unchecked") Map map = (Map) getPreviousTokenStream(); + if (map == null) { + map = new HashMap(); + setPreviousTokenStream(map); + } + + SavedStreams saved = map.get(fieldName); + if (saved == null) { + saved = new SavedStreams(); + saved.tokenizer = new MockTokenizer(reader, pattern, lowerCase); + saved.tokenizer.setEnableChecks(enableChecks); + StopFilter filt = new StopFilter(LuceneTestCase.TEST_VERSION_CURRENT, saved.tokenizer, filter); + filt.setEnablePositionIncrements(enablePositionIncrements); + saved.filter = filt; + saved.filter = maybePayload(saved.filter, fieldName); + map.put(fieldName, saved); + return saved.filter; + } else { + saved.tokenizer.reset(reader); + return saved.filter; + } + } + + private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) { + Integer val = previousMappings.get(fieldName); + if (val == null) { + val = -1; // no payloads + if (LuceneTestCase.rarely(random)) { + switch(random.nextInt(3)) { + case 0: val = -1; // no payloads + break; + case 1: val = Integer.MAX_VALUE; // variable length payload + break; + case 2: val = random.nextInt(12); // fixed length payload + break; + } + } + previousMappings.put(fieldName, val); // save it so we are consistent for this field + } + + if (val == -1) + return stream; + else if (val == Integer.MAX_VALUE) + return new MockVariableLengthPayloadFilter(random, stream); + else + return new MockFixedLengthPayloadFilter(random, stream, val); + } + + public void setPositionIncrementGap(int positionIncrementGap){ + this.positionIncrementGap = positionIncrementGap; + } + + @Override + public int getPositionIncrementGap(String fieldName){ + return positionIncrementGap; + } + + /** + * Toggle consumer workflow checking: if your test consumes tokenstreams normally you + * should leave this enabled. + */ + public void setEnableChecks(boolean enableChecks) { + this.enableChecks = enableChecks; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java new file mode 100644 index 0000000..af0c364 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java @@ -0,0 +1,49 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.index.Payload; + +public final class MockFixedLengthPayloadFilter extends TokenFilter { + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final Random random; + private final byte[] bytes; + private final Payload payload; + + public MockFixedLengthPayloadFilter(Random random, TokenStream in, int length) { + super(in); + this.random = random; + this.bytes = new byte[length]; + this.payload = new Payload(bytes); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + random.nextBytes(bytes); + payloadAtt.setPayload(payload); + return true; + } else { + return false; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java new file mode 100644 index 0000000..fee3c5d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java @@ -0,0 +1,195 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.util.AttributeSource.AttributeFactory; + +/** + * Tokenizer for testing. + *

+ * This tokenizer is a replacement for {@link #WHITESPACE}, {@link #SIMPLE}, and {@link #KEYWORD} + * tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test + * it wrapping this tokenizer instead for extra checks. This tokenizer has the following behavior: + *

    + *
  • An internal state-machine is used for checking consumer consistency. These checks can + * be disabled with {@link #setEnableChecks(boolean)}. + *
  • For convenience, optionally lowercases terms that it outputs. + *
+ */ +public class MockTokenizer extends Tokenizer { + /** Acts Similar to WhitespaceTokenizer */ + public static final int WHITESPACE = 0; + /** Acts Similar to KeywordTokenizer. + * TODO: Keyword returns an "empty" token for an empty reader... + */ + public static final int KEYWORD = 1; + /** Acts like LetterTokenizer. */ + public static final int SIMPLE = 2; + + private final int pattern; + private final boolean lowerCase; + private final int maxTokenLength; + public static final int DEFAULT_MAX_TOKEN_LENGTH = Integer.MAX_VALUE; + + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + int off = 0; + + // TODO: "register" with LuceneTestCase to ensure all streams are closed() ? + // currently, we can only check that the lifecycle is correct if someone is reusing, + // but not for "one-offs". + private static enum State { + SETREADER, // consumer set a reader input either via ctor or via reset(Reader) + RESET, // consumer has called reset() + INCREMENT, // consumer is consuming, has called incrementToken() == true + INCREMENT_FALSE, // consumer has called incrementToken() which returned false + END, // consumer has called end() to perform end of stream operations + CLOSE // consumer has called close() to release any resources + }; + + private State streamState = State.CLOSE; + private boolean enableChecks = true; + + public MockTokenizer(AttributeFactory factory, Reader input, int pattern, boolean lowerCase, int maxTokenLength) { + super(factory, input); + this.pattern = pattern; + this.lowerCase = lowerCase; + this.streamState = State.SETREADER; + this.maxTokenLength = maxTokenLength; + } + + public MockTokenizer(Reader input, int pattern, boolean lowerCase, int maxTokenLength) { + this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, pattern, lowerCase, maxTokenLength); + } + + public MockTokenizer(Reader input, int pattern, boolean lowerCase) { + this(input, pattern, lowerCase, DEFAULT_MAX_TOKEN_LENGTH); + } + + @Override + public final boolean incrementToken() throws IOException { + assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT) + : "incrementToken() called while in wrong state: " + streamState; + clearAttributes(); + for (;;) { + int startOffset = off; + int cp = readCodePoint(); + if (cp < 0) { + break; + } else if (isTokenChar(cp)) { + int endOffset; + do { + char chars[] = Character.toChars(normalize(cp)); + for (int i = 0; i < chars.length; i++) + termAtt.append(chars[i]); + endOffset = off; + if (termAtt.length() >= maxTokenLength) { + break; + } + cp = readCodePoint(); + } while (cp >= 0 && isTokenChar(cp)); + offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset)); + streamState = State.INCREMENT; + return true; + } + } + streamState = State.INCREMENT_FALSE; + return false; + } + + protected int readCodePoint() throws IOException { + int ch = input.read(); + if (ch < 0) { + return ch; + } else { + assert ch != 0xffff; /* only on 3.x */ + assert !Character.isLowSurrogate((char) ch); + off++; + if (Character.isHighSurrogate((char) ch)) { + int ch2 = input.read(); + if (ch2 >= 0) { + off++; + assert Character.isLowSurrogate((char) ch2); + return Character.toCodePoint((char) ch, (char) ch2); + } + } + return ch; + } + } + + protected boolean isTokenChar(int c) { + switch(pattern) { + case WHITESPACE: return !Character.isWhitespace(c); + case KEYWORD: return true; + case SIMPLE: return Character.isLetter(c); + default: throw new RuntimeException("invalid pattern constant:" + pattern); + } + } + + protected int normalize(int c) { + return lowerCase ? Character.toLowerCase(c) : c; + } + + @Override + public void reset() throws IOException { + super.reset(); + off = 0; + assert !enableChecks || streamState != State.RESET : "double reset()"; + streamState = State.RESET; + } + + @Override + public void close() throws IOException { + super.close(); + // in some exceptional cases (e.g. TestIndexWriterExceptions) a test can prematurely close() + // these tests should disable this check, by default we check the normal workflow. + // TODO: investigate the CachingTokenFilter "double-close"... for now we ignore this + assert !enableChecks || streamState == State.END || streamState == State.CLOSE : "close() called in wrong state: " + streamState; + streamState = State.CLOSE; + } + + @Override + public void reset(Reader input) throws IOException { + super.reset(input); + assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState; + streamState = State.SETREADER; + } + + @Override + public void end() throws IOException { + int finalOffset = correctOffset(off); + offsetAtt.setOffset(finalOffset, finalOffset); + // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false. + // these tests should disable this check (in general you should consume the entire stream) + assert !enableChecks || streamState == State.INCREMENT_FALSE : "end() called before incrementToken() returned false!"; + streamState = State.END; + } + + /** + * Toggle consumer workflow checking: if your test consumes tokenstreams normally you + * should leave this enabled. + */ + public void setEnableChecks(boolean enableChecks) { + this.enableChecks = enableChecks; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockVariableLengthPayloadFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockVariableLengthPayloadFilter.java new file mode 100644 index 0000000..f7b5361 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/analysis/MockVariableLengthPayloadFilter.java @@ -0,0 +1,51 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.index.Payload; + +public final class MockVariableLengthPayloadFilter extends TokenFilter { + private static final int MAXLENGTH = 129; + + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final Random random; + private final byte[] bytes = new byte[MAXLENGTH]; + private final Payload payload; + + public MockVariableLengthPayloadFilter(Random random, TokenStream in) { + super(in); + this.random = random; + this.payload = new Payload(bytes); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + random.nextBytes(bytes); + payload.setData(bytes, 0, random.nextInt(MAXLENGTH)); + payloadAtt.setPayload(payload); + return true; + } else { + return false; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/DocHelper.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/DocHelper.java new file mode 100644 index 0000000..b8cdd75 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/DocHelper.java @@ -0,0 +1,254 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; + +class DocHelper { + public static final String FIELD_1_TEXT = "field one text"; + public static final String TEXT_FIELD_1_KEY = "textField1"; + public static Field textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); + + public static final String FIELD_2_TEXT = "field field field two text"; + //Fields will be lexicographically sorted. So, the order is: field, text, two + public static final int [] FIELD_2_FREQS = {3, 1, 1}; + public static final String TEXT_FIELD_2_KEY = "textField2"; + public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + + public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms"; + public static final String TEXT_FIELD_3_KEY = "textField3"; + public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED); + static { textField3.setOmitNorms(true); } + + public static final String KEYWORD_TEXT = "Keyword"; + public static final String KEYWORD_FIELD_KEY = "keyField"; + public static Field keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, + Field.Store.YES, Field.Index.NOT_ANALYZED); + + public static final String NO_NORMS_TEXT = "omitNormsText"; + public static final String NO_NORMS_KEY = "omitNorms"; + public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, + Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + + public static final String NO_TF_TEXT = "analyzed with no tf and positions"; + public static final String NO_TF_KEY = "omitTermFreqAndPositions"; + public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, + Field.Store.YES, Field.Index.ANALYZED); + static { + noTFField.setOmitTermFreqAndPositions(true); + } + + public static final String UNINDEXED_FIELD_TEXT = "unindexed field text"; + public static final String UNINDEXED_FIELD_KEY = "unIndField"; + public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, + Field.Store.YES, Field.Index.NO); + + + public static final String UNSTORED_1_FIELD_TEXT = "unstored field text"; + public static final String UNSTORED_FIELD_1_KEY = "unStoredField1"; + public static Field unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, + Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); + + public static final String UNSTORED_2_FIELD_TEXT = "unstored field text"; + public static final String UNSTORED_FIELD_2_KEY = "unStoredField2"; + public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, + Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); + + public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary"; + public static byte [] LAZY_FIELD_BINARY_BYTES; + public static Field lazyFieldBinary; + + public static final String LAZY_FIELD_KEY = "lazyField"; + public static final String LAZY_FIELD_TEXT = "These are some field bytes"; + public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); + + public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField"; + public static String LARGE_LAZY_FIELD_TEXT; + public static Field largeLazyField; + + //From Issue 509 + public static final String FIELD_UTF1_TEXT = "field one \u4e00text"; + public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8"; + public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); + + public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text"; + //Fields will be lexicographically sorted. So, the order is: field, text, two + public static final int [] FIELD_UTF2_FREQS = {3, 1, 1}; + public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8"; + public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + + + + + public static Map nameValues = null; + + // ordered list of all the fields... + // could use LinkedHashMap for this purpose if Java1.4 is OK + public static Field[] fields = new Field[] { + textField1, + textField2, + textField3, + keyField, + noNormsField, + noTFField, + unIndField, + unStoredField1, + unStoredField2, + textUtfField1, + textUtfField2, + lazyField, + lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last. + largeLazyField//placeholder for large field, since this is null. It must always be last + }; + + public static Map all =new HashMap(); + public static Map indexed =new HashMap(); + public static Map stored =new HashMap(); + public static Map unstored=new HashMap(); + public static Map unindexed=new HashMap(); + public static Map termvector=new HashMap(); + public static Map notermvector=new HashMap(); + public static Map lazy= new HashMap(); + public static Map noNorms=new HashMap(); + public static Map noTf=new HashMap(); + + static { + //Initialize the large Lazy Field + StringBuilder buffer = new StringBuilder(); + for (int i = 0; i < 10000; i++) + { + buffer.append("Lazily loading lengths of language in lieu of laughing "); + } + + try { + LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8"); + } catch (UnsupportedEncodingException e) { + } + lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES); + fields[fields.length - 2] = lazyFieldBinary; + LARGE_LAZY_FIELD_TEXT = buffer.toString(); + largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); + fields[fields.length - 1] = largeLazyField; + for (int i=0; i map, Fieldable field) { + map.put(field.name(), field); + } + + + static + { + nameValues = new HashMap(); + nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT); + nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT); + nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT); + nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT); + nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT); + nameValues.put(NO_TF_KEY, NO_TF_TEXT); + nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT); + nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT); + nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT); + nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT); + nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES); + nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT); + nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT); + nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT); + } + + /** + * Adds the fields above to a document + * @param doc The document to write + */ + public static void setupDoc(Document doc) { + for (int i=0; i= remainder ? remainder : bytesInBuffer; + System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy); + destOffset += bytesToCopy; + start += bytesToCopy; + remainder -= bytesToCopy; + } + pointer += len; + } + + @Override + public void close() { + // ignore + } + + @Override + protected void seekInternal(long pos) { + pointer = (int) pos; + } + + @Override + public long length() { + return length; + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java new file mode 100644 index 0000000..1ff3543 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/MockRandomMergePolicy.java @@ -0,0 +1,111 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.Map; + +import org.apache.lucene.util._TestUtil; + +public class MockRandomMergePolicy extends MergePolicy { + private final Random random; + + public MockRandomMergePolicy(Random random) { + // fork a private random, since we are called + // unpredictably from threads: + this.random = new Random(random.nextLong()); + } + + @Override + public MergeSpecification findMerges(SegmentInfos segmentInfos) { + MergeSpecification mergeSpec = null; + //System.out.println("MRMP: findMerges sis=" + segmentInfos); + + if (segmentInfos.size() > 1 && random.nextInt(5) == 3) { + + List segments = new ArrayList(segmentInfos.asList()); + Collections.shuffle(segments, random); + + // TODO: sometimes make more than 1 merge? + mergeSpec = new MergeSpecification(); + final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size()); + mergeSpec.add(new OneMerge(segments.subList(0, segsToMerge))); + } + + return mergeSpec; + } + + @Override + public MergeSpecification findMergesForOptimize( + SegmentInfos segmentInfos, int maxSegmentCount, Map segmentsToOptimize) + throws CorruptIndexException, IOException { + + final List eligibleSegments = new ArrayList(); + for(SegmentInfo info : segmentInfos) { + if (segmentsToOptimize.containsKey(info)) { + eligibleSegments.add(info); + } + } + + //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos + " eligible=" + eligibleSegments); + MergeSpecification mergeSpec = null; + if (eligibleSegments.size() > 1 || (eligibleSegments.size() == 1 && eligibleSegments.get(0).hasDeletions())) { + mergeSpec = new MergeSpecification(); + // Already shuffled having come out of a set but + // shuffle again for good measure: + Collections.shuffle(eligibleSegments, random); + int upto = 0; + while(upto < eligibleSegments.size()) { + int max = Math.min(10, eligibleSegments.size()-upto); + int inc = max <= 2 ? max : _TestUtil.nextInt(random, 2, max); + mergeSpec.add(new OneMerge(eligibleSegments.subList(upto, upto+inc))); + upto += inc; + } + } + + if (mergeSpec != null) { + for(OneMerge merge : mergeSpec.merges) { + for(SegmentInfo info : merge.segments) { + assert segmentsToOptimize.containsKey(info); + } + } + } + return mergeSpec; + } + + @Override + public MergeSpecification findMergesToExpungeDeletes( + SegmentInfos segmentInfos) + throws CorruptIndexException, IOException { + return findMerges(segmentInfos); + } + + @Override + public void close() { + } + + @Override + public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException { + // 80% of the time we create CFS: + return random.nextInt(5) != 1; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java new file mode 100644 index 0000000..b821b93 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -0,0 +1,241 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Random; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriter; // javadoc +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Version; +import org.apache.lucene.util._TestUtil; + +/** Silly class that randomizes the indexing experience. EG + * it may swap in a different merge policy/scheduler; may + * commit periodically; may or may not optimize in the end, + * may flush by doc count instead of RAM, etc. + */ + +public class RandomIndexWriter implements Closeable { + + public IndexWriter w; + private final Random r; + int docCount; + int flushAt; + private double flushAtFactor = 1.0; + private boolean getReaderCalled; + + // Randomly calls Thread.yield so we mixup thread scheduling + private static final class MockIndexWriter extends IndexWriter { + + private final Random r; + + public MockIndexWriter(Random r,Directory dir, IndexWriterConfig conf) throws IOException { + super(dir, conf); + // must make a private random since our methods are + // called from different threads; else test failures may + // not be reproducible from the original seed + this.r = new Random(r.nextInt()); + } + + @Override + boolean testPoint(String name) { + if (r.nextInt(4) == 2) + Thread.yield(); + return true; + } + } + + /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and Whitespace+LowercasingAnalyzer */ + public RandomIndexWriter(Random r, Directory dir) throws IOException { + this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(r))); + } + + /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */ + public RandomIndexWriter(Random r, Directory dir, Analyzer a) throws IOException { + this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, a)); + } + + /** create a RandomIndexWriter with a random config */ + public RandomIndexWriter(Random r, Directory dir, Version v, Analyzer a) throws IOException { + this(r, dir, LuceneTestCase.newIndexWriterConfig(r, v, a)); + } + + /** create a RandomIndexWriter with the provided config */ + public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException { + this.r = r; + w = new MockIndexWriter(r, dir, c); + flushAt = _TestUtil.nextInt(r, 10, 1000); + if (LuceneTestCase.VERBOSE) { + System.out.println("RIW config=" + w.getConfig()); + } + } + + /** + * Adds a Document. + * @see IndexWriter#addDocument(Document) + */ + public void addDocument(final Document doc) throws IOException { + if (r.nextInt(5) == 3) { + // TODO: maybe, we should simply buffer up added docs + // (but we need to clone them), and only when + // getReader, commit, etc. are called, we do an + // addDocuments? Would be better testing. + w.addDocuments(Collections.singletonList(doc)); + } else { + w.addDocument(doc); + } + maybeCommit(); + } + + public void addDocuments(Collection docs) throws IOException { + w.addDocuments(docs); + maybeCommit(); + } + + public void updateDocuments(Term delTerm, Collection docs) throws IOException { + w.updateDocuments(delTerm, docs); + maybeCommit(); + } + + private void maybeCommit() throws IOException { + if (docCount++ == flushAt) { + if (LuceneTestCase.VERBOSE) { + System.out.println("RIW.add/updateDocument: now doing a commit at docCount=" + docCount); + } + w.commit(); + flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000)); + if (flushAtFactor < 2e6) { + // gradually but exponentially increase time b/w flushes + flushAtFactor *= 1.05; + } + } + } + + /** + * Updates a document. + * @see IndexWriter#updateDocument(Term, Document) + */ + public void updateDocument(Term t, final Document doc) throws IOException { + if (r.nextInt(5) == 3) { + w.updateDocuments(t, Collections.singletonList(doc)); + } else { + w.updateDocument(t, doc); + } + maybeCommit(); + } + + public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException { + w.addIndexes(dirs); + } + + public void deleteDocuments(Term term) throws CorruptIndexException, IOException { + w.deleteDocuments(term); + } + + public void commit() throws CorruptIndexException, IOException { + w.commit(); + } + + public int numDocs() throws IOException { + return w.numDocs(); + } + + public int maxDoc() { + return w.maxDoc(); + } + + public void deleteAll() throws IOException { + w.deleteAll(); + } + + private boolean doRandomOptimize = true; + + public void setDoRandomOptimize(boolean v) { + doRandomOptimize = v; + } + + private void doRandomOptimize() throws IOException { + if (doRandomOptimize) { + final int segCount = w.getSegmentCount(); + if (r.nextBoolean() || segCount == 0) { + // full optimize + w.optimize(); + } else { + // partial optimize + final int limit = _TestUtil.nextInt(r, 1, segCount); + w.optimize(limit); + assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount(); + } + } + } + + public IndexReader getReader() throws IOException { + return getReader(true); + } + + public IndexReader getReader(boolean applyDeletions) throws IOException { + getReaderCalled = true; + if (r.nextInt(4) == 2) { + doRandomOptimize(); + } + if (r.nextBoolean()) { + if (LuceneTestCase.VERBOSE) { + System.out.println("RIW.getReader: use NRT reader"); + } + return w.getReader(applyDeletions); + } else { + if (LuceneTestCase.VERBOSE) { + System.out.println("RIW.getReader: open new reader"); + } + w.commit(); + return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10)); + } + } + + /** + * Close this writer. + * @see IndexWriter#close() + */ + public void close() throws IOException { + // if someone isn't using getReader() API, we want to be sure to + // maybeOptimize since presumably they might open a reader on the dir. + if (getReaderCalled == false && r.nextInt(8) == 2) { + doRandomOptimize(); + } + w.close(); + } + + /** + * Forces an optimize. + *

+ * NOTE: this should be avoided in tests unless absolutely necessary, + * as it will result in less test coverage. + * @see IndexWriter#optimize() + */ + public void optimize() throws IOException { + w.optimize(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/SlowMultiReaderWrapper.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/SlowMultiReaderWrapper.java new file mode 100644 index 0000000..be93a34 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/index/SlowMultiReaderWrapper.java @@ -0,0 +1,49 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; + +import org.apache.lucene.util.ReaderUtil; + +/** + * Acts like Lucene 4.x's SlowMultiReaderWrapper for testing + * of top-level MultiTermEnum, MultiTermDocs, ... + */ +public class SlowMultiReaderWrapper extends MultiReader { + + public SlowMultiReaderWrapper(IndexReader reader) { + super(subReaders(reader)); + } + + private static IndexReader[] subReaders(IndexReader reader) { + ArrayList list = new ArrayList(); + ReaderUtil.gatherSubReaders(list, reader); + return list.toArray(new IndexReader[list.size()]); + } + + @Override + public IndexReader[] getSequentialSubReaders() { + return null; + } + + @Override + public String toString() { + return "SlowMultiReaderWrapper(" + super.toString() + ")"; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/search/AssertingIndexSearcher.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/search/AssertingIndexSearcher.java new file mode 100644 index 0000000..c347eb7 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/search/AssertingIndexSearcher.java @@ -0,0 +1,89 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.concurrent.ExecutorService; +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + * Helper class that adds some extra checks to ensure correct + * usage of {@code IndexSearcher} and {@code Weight}. + * TODO: Extend this by more checks, that's just a start. + */ +public class AssertingIndexSearcher extends IndexSearcher { + public AssertingIndexSearcher(IndexReader r) { + super(r); + } + + public AssertingIndexSearcher(IndexReader r, ExecutorService ex) { + super(r, ex); + } + + // not anonymous because else not serializable (compare trunk) + private static final class UnmodifiableWeight extends Weight { + private final Weight w; + + UnmodifiableWeight(Weight w) { + this.w = w; + } + + @Override + public Explanation explain(IndexReader reader, int doc) throws IOException { + return w.explain(reader, doc); + } + + @Override + public Query getQuery() { + return w.getQuery(); + } + + @Override + public float getValue() { + return w.getValue(); + } + + @Override + public void normalize(float norm) { + throw new IllegalStateException("Weight already normalized."); + } + + @Override + public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + return w.scorer(reader, scoreDocsInOrder, topScorer); + } + + @Override + public float sumOfSquaredWeights() throws IOException { + throw new IllegalStateException("Weight already normalized."); + } + + @Override + public boolean scoresDocsOutOfOrder() { + return w.scoresDocsOutOfOrder(); + } + } + + /** Ensures, that the returned {@code Weight} is not normalized again, which may produce wrong scores. */ + @Override + public Weight createNormalizedWeight(Query query) throws IOException { + final Weight w = super.createNormalizedWeight(query); + return new UnmodifiableWeight(w); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/search/CheckHits.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/search/CheckHits.java new file mode 100644 index 0000000..858d84d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/search/CheckHits.java @@ -0,0 +1,517 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; +import java.util.TreeSet; +import java.util.Random; + +import junit.framework.Assert; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.store.Directory; + +public class CheckHits { + + /** + * Some explains methods calculate their values though a slightly + * different order of operations from the actual scoring method ... + * this allows for a small amount of variation + */ + public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.0002f; + + /** + * Tests that all documents up to maxDoc which are *not* in the + * expected result set, have an explanation which indicates that + * the document does not match + */ + public static void checkNoMatchExplanations(Query q, String defaultFieldName, + Searcher searcher, int[] results) + throws IOException { + + String d = q.toString(defaultFieldName); + Set ignore = new TreeSet(); + for (int i = 0; i < results.length; i++) { + ignore.add(Integer.valueOf(results[i])); + } + + int maxDoc = searcher.maxDoc(); + for (int doc = 0; doc < maxDoc; doc++) { + if (ignore.contains(Integer.valueOf(doc))) continue; + + Explanation exp = searcher.explain(q, doc); + Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", + exp); + Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+ + " doesn't indicate non-match: " + exp.toString(), + exp.isMatch()); + } + + } + + /** + * Tests that a query matches the an expected set of documents using a + * HitCollector. + * + *

+ * Note that when using the HitCollector API, documents will be collected + * if they "match" regardless of what their score is. + *

+ * @param query the query to test + * @param searcher the searcher to test the query against + * @param defaultFieldName used for displaying the query in assertion messages + * @param results a list of documentIds that must match the query + * @see Searcher#search(Query,Collector) + * @see #checkHits + */ + public static void checkHitCollector(Random random, Query query, String defaultFieldName, + Searcher searcher, int[] results) + throws IOException { + + QueryUtils.check(random,query,searcher); + + Set correct = new TreeSet(); + for (int i = 0; i < results.length; i++) { + correct.add(Integer.valueOf(results[i])); + } + final Set actual = new TreeSet(); + final Collector c = new SetCollector(actual); + + searcher.search(query, c); + Assert.assertEquals("Simple: " + query.toString(defaultFieldName), + correct, actual); + + for (int i = -1; i < 2; i++) { + actual.clear(); + QueryUtils.wrapSearcher(random, searcher, i).search(query, c); + Assert.assertEquals("Wrap Searcher " + i + ": " + + query.toString(defaultFieldName), + correct, actual); + } + + if ( ! ( searcher instanceof IndexSearcher ) ) return; + + for (int i = -1; i < 2; i++) { + actual.clear(); + QueryUtils.wrapUnderlyingReader + (random, (IndexSearcher)searcher, i).search(query, c); + Assert.assertEquals("Wrap Reader " + i + ": " + + query.toString(defaultFieldName), + correct, actual); + } + } + + public static class SetCollector extends Collector { + final Set bag; + public SetCollector(Set bag) { + this.bag = bag; + } + private int base = 0; + @Override + public void setScorer(Scorer scorer) throws IOException {} + @Override + public void collect(int doc) { + bag.add(Integer.valueOf(doc + base)); + } + @Override + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + } + + /** + * Tests that a query matches the an expected set of documents using Hits. + * + *

+ * Note that when using the Hits API, documents will only be returned + * if they have a positive normalized score. + *

+ * @param query the query to test + * @param searcher the searcher to test the query against + * @param defaultFieldName used for displaing the query in assertion messages + * @param results a list of documentIds that must match the query + * @see Searcher#search(Query, int) + * @see #checkHitCollector + */ + public static void checkHits( + Random random, + Query query, + String defaultFieldName, + Searcher searcher, + int[] results) + throws IOException { + + ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; + + Set correct = new TreeSet(); + for (int i = 0; i < results.length; i++) { + correct.add(Integer.valueOf(results[i])); + } + + Set actual = new TreeSet(); + for (int i = 0; i < hits.length; i++) { + actual.add(Integer.valueOf(hits[i].doc)); + } + + Assert.assertEquals(query.toString(defaultFieldName), correct, actual); + + QueryUtils.check(random, query,searcher); + } + + /** Tests that a Hits has an expected order of documents */ + public static void checkDocIds(String mes, int[] results, ScoreDoc[] hits) + throws IOException { + Assert.assertEquals(mes + " nr of hits", hits.length, results.length); + for (int i = 0; i < results.length; i++) { + Assert.assertEquals(mes + " doc nrs for hit " + i, results[i], hits[i].doc); + } + } + + /** Tests that two queries have an expected order of documents, + * and that the two queries have the same score values. + */ + public static void checkHitsQuery( + Query query, + ScoreDoc[] hits1, + ScoreDoc[] hits2, + int[] results) + throws IOException { + + checkDocIds("hits1", results, hits1); + checkDocIds("hits2", results, hits2); + checkEqual(query, hits1, hits2); + } + + public static void checkEqual(Query query, ScoreDoc[] hits1, ScoreDoc[] hits2) throws IOException { + final float scoreTolerance = 1.0e-6f; + if (hits1.length != hits2.length) { + Assert.fail("Unequal lengths: hits1="+hits1.length+",hits2="+hits2.length); + } + for (int i = 0; i < hits1.length; i++) { + if (hits1[i].doc != hits2[i].doc) { + Assert.fail("Hit " + i + " docnumbers don't match\n" + + hits2str(hits1, hits2,0,0) + + "for query:" + query.toString()); + } + + if ((hits1[i].doc != hits2[i].doc) + || Math.abs(hits1[i].score - hits2[i].score) > scoreTolerance) + { + Assert.fail("Hit " + i + ", doc nrs " + hits1[i].doc + " and " + hits2[i].doc + + "\nunequal : " + hits1[i].score + + "\n and: " + hits2[i].score + + "\nfor query:" + query.toString()); + } + } + } + + public static String hits2str(ScoreDoc[] hits1, ScoreDoc[] hits2, int start, int end) throws IOException { + StringBuilder sb = new StringBuilder(); + int len1=hits1==null ? 0 : hits1.length; + int len2=hits2==null ? 0 : hits2.length; + if (end<=0) { + end = Math.max(len1,len2); + } + + sb.append("Hits length1=").append(len1).append("\tlength2=").append(len2); + + sb.append('\n'); + for (int i=start; i times others" (where is float). + float x = 0; + String descr = expl.getDescription().toLowerCase(); + boolean productOf = descr.endsWith("product of:"); + boolean sumOf = descr.endsWith("sum of:"); + boolean maxOf = descr.endsWith("max of:"); + boolean maxTimesOthers = false; + if (!(productOf || sumOf || maxOf)) { + // maybe 'max plus x times others' + int k1 = descr.indexOf("max plus "); + if (k1>=0) { + k1 += "max plus ".length(); + int k2 = descr.indexOf(" ",k1); + try { + x = Float.parseFloat(descr.substring(k1,k2).trim()); + if (descr.substring(k2).trim().equals("times others of:")) { + maxTimesOthers = true; + } + } catch (NumberFormatException e) { + } + } + } + Assert.assertTrue( + q+": multi valued explanation description=\""+descr + +"\" must be 'max of plus x times others' or end with 'product of'" + +" or 'sum of:' or 'max of:' - "+expl, + productOf || sumOf || maxOf || maxTimesOthers); + float sum = 0; + float product = 1; + float max = 0; + for (int i=0; i maxDiff + || scorerDiff > maxDiff) { + StringBuilder sbord = new StringBuilder(); + for (int i = 0; i < order.length; i++) + sbord.append(order[i] == skip_op ? " skip()" : " next()"); + throw new RuntimeException("ERROR matching docs:" + "\n\t" + + (doc != scorerDoc ? "--> " : "") + "doc=" + doc + ", scorerDoc=" + scorerDoc + + "\n\t" + (!more ? "--> " : "") + "tscorer.more=" + more + + "\n\t" + (scoreDiff > maxDiff ? "--> " : "") + + "scorerScore=" + scorerScore + " scoreDiff=" + scoreDiff + + " maxDiff=" + maxDiff + "\n\t" + + (scorerDiff > maxDiff ? "--> " : "") + "scorerScore2=" + + scorerScore2 + " scorerDiff=" + scorerDiff + + "\n\thitCollector.doc=" + doc + " score=" + score + + "\n\t Scorer=" + scorer + "\n\t Query=" + q + " " + + q.getClass().getName() + "\n\t Searcher=" + s + + "\n\t Order=" + sbord + "\n\t Op=" + + (op == skip_op ? " skip()" : " next()")); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + // confirm that skipping beyond the last doc, on the + // previous reader, hits NO_MORE_DOCS + if (lastReader[0] != null) { + final IndexReader previousReader = lastReader[0]; + Weight w = new IndexSearcher(previousReader).createNormalizedWeight(q); + Scorer scorer = w.scorer(previousReader, true, false); + if (scorer != null) { + boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; + Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + } + } + this.reader = lastReader[0] = reader; + this.scorer = null; + lastDoc[0] = -1; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + + if (lastReader[0] != null) { + // confirm that skipping beyond the last doc, on the + // previous reader, hits NO_MORE_DOCS + final IndexReader previousReader = lastReader[0]; + Weight w = new IndexSearcher(previousReader).createNormalizedWeight(q); + Scorer scorer = w.scorer(previousReader, true, false); + if (scorer != null) { + boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; + Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + } + } + } + } + + // check that first skip on just created scorers always goes to the right doc + private static void checkFirstSkipTo(final Query q, final IndexSearcher s) throws IOException { + //System.out.println("checkFirstSkipTo: "+q); + final float maxDiff = 1e-3f; + final int lastDoc[] = {-1}; + final IndexReader lastReader[] = {null}; + + s.search(q,new Collector() { + private Scorer scorer; + private IndexReader reader; + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + @Override + public void collect(int doc) throws IOException { + //System.out.println("doc="+doc); + float score = scorer.score(); + try { + + for (int i=lastDoc[0]+1; i<=doc; i++) { + Weight w = s.createNormalizedWeight(q); + Scorer scorer = w.scorer(reader, true, false); + Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS); + Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID()); + float skipToScore = scorer.score(); + Assert.assertEquals("unstable skipTo("+i+") score!",skipToScore,scorer.score(),maxDiff); + Assert.assertEquals("query assigned doc "+doc+" a score of <"+score+"> but skipTo("+i+") has <"+skipToScore+">!",score,skipToScore,maxDiff); + } + lastDoc[0] = doc; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + // confirm that skipping beyond the last doc, on the + // previous reader, hits NO_MORE_DOCS + if (lastReader[0] != null) { + final IndexReader previousReader = lastReader[0]; + Weight w = new IndexSearcher(previousReader).createNormalizedWeight(q); + Scorer scorer = w.scorer(previousReader, true, false); + + if (scorer != null) { + boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; + Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + } + } + + this.reader = lastReader[0] = reader; + lastDoc[0] = -1; + } + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + }); + + if (lastReader[0] != null) { + // confirm that skipping beyond the last doc, on the + // previous reader, hits NO_MORE_DOCS + final IndexReader previousReader = lastReader[0]; + Weight w = new IndexSearcher(previousReader).createNormalizedWeight(q); + Scorer scorer = w.scorer(previousReader, true, false); + if (scorer != null) { + boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; + Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + } + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java new file mode 100644 index 0000000..0d12bd8 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java @@ -0,0 +1,648 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ThrottledIndexOutput; +import org.apache.lucene.util._TestUtil; + +/** + * This is a Directory Wrapper that adds methods + * intended to be used only by unit tests. + * It also adds a number of features useful for testing: + *
    + *
  • Instances created by {@link LuceneTestCase#newDirectory()} are tracked + * to ensure they are closed by the test. + *
  • When a MockDirectoryWrapper is closed, it will throw an exception if + * it has any open files against it (with a stacktrace indicating where + * they were opened from). + *
  • When a MockDirectoryWrapper is closed, it runs CheckIndex to test if + * the index was corrupted. + *
  • MockDirectoryWrapper simulates some "features" of Windows, such as + * refusing to write/delete to open files. + *
+ */ + +public class MockDirectoryWrapper extends Directory { + final Directory delegate; + long maxSize; + + // Max actual bytes used. This is set by MockRAMOutputStream: + long maxUsedSize; + double randomIOExceptionRate; + Random randomState; + boolean noDeleteOpenFile = true; + boolean preventDoubleWrite = true; + boolean checkIndexOnClose = true; + boolean trackDiskUsage = false; + private Set unSyncedFiles; + private Set createdFiles; + private Set openFilesForWrite = new HashSet(); + Set openLocks = Collections.synchronizedSet(new HashSet()); + volatile boolean crashed; + private ThrottledIndexOutput throttledOutput; + private Throttling throttling = Throttling.SOMETIMES; + + // use this for tracking files for crash. + // additionally: provides debugging information in case you leave one open + private Map openFileHandles = Collections.synchronizedMap(new IdentityHashMap()); + + // NOTE: we cannot initialize the Map here due to the + // order in which our constructor actually does this + // member initialization vs when it calls super. It seems + // like super is called, then our members are initialized: + private Map openFiles; + + // Only tracked if noDeleteOpenFile is true: if an attempt + // is made to delete an open file, we enroll it here. + private Set openFilesDeleted; + + private synchronized void init() { + if (openFiles == null) { + openFiles = new HashMap(); + openFilesDeleted = new HashSet(); + } + + if (createdFiles == null) + createdFiles = new HashSet(); + if (unSyncedFiles == null) + unSyncedFiles = new HashSet(); + } + + public MockDirectoryWrapper(Random random, Directory delegate) { + this.delegate = delegate; + // must make a private random since our methods are + // called from different threads; else test failures may + // not be reproducible from the original seed + this.randomState = new Random(random.nextInt()); + this.throttledOutput = new ThrottledIndexOutput(ThrottledIndexOutput + .mBitsToBytes(40 + randomState.nextInt(10)), 5 + randomState.nextInt(5), null); + // force wrapping of lockfactory + try { + setLockFactory(new MockLockFactoryWrapper(this, delegate.getLockFactory())); + } catch (IOException e) { + throw new RuntimeException(e); + } + init(); + } + + public void setTrackDiskUsage(boolean v) { + trackDiskUsage = v; + } + + /** If set to true, we throw an IOException if the same + * file is opened by createOutput, ever. */ + public void setPreventDoubleWrite(boolean value) { + preventDoubleWrite = value; + } + + @Deprecated + @Override + public void sync(String name) throws IOException { + maybeYield(); + maybeThrowDeterministicException(); + if (crashed) + throw new IOException("cannot sync after crash"); + unSyncedFiles.remove(name); + delegate.sync(name); + } + + public static enum Throttling { + /** always emulate a slow hard disk. could be very slow! */ + ALWAYS, + /** sometimes (2% of the time) emulate a slow hard disk. */ + SOMETIMES, + /** never throttle output */ + NEVER + } + + public void setThrottling(Throttling throttling) { + this.throttling = throttling; + } + + @Override + public synchronized void sync(Collection names) throws IOException { + maybeYield(); + for (String name : names) + maybeThrowDeterministicException(); + if (crashed) + throw new IOException("cannot sync after crash"); + unSyncedFiles.removeAll(names); + delegate.sync(names); + } + + @Override + public String toString() { + maybeYield(); + return "MockDirWrapper(" + delegate + ")"; + } + + public synchronized final long sizeInBytes() throws IOException { + if (delegate instanceof RAMDirectory) + return ((RAMDirectory) delegate).sizeInBytes(); + else { + // hack + long size = 0; + for (String file : delegate.listAll()) + size += delegate.fileLength(file); + return size; + } + } + + /** Simulates a crash of OS or machine by overwriting + * unsynced files. */ + public synchronized void crash() throws IOException { + crashed = true; + openFiles = new HashMap(); + openFilesForWrite = new HashSet(); + openFilesDeleted = new HashSet(); + Iterator it = unSyncedFiles.iterator(); + unSyncedFiles = new HashSet(); + // first force-close all files, so we can corrupt on windows etc. + // clone the file map, as these guys want to remove themselves on close. + Map m = new IdentityHashMap(openFileHandles); + for (Closeable f : m.keySet()) + try { + f.close(); + } catch (Exception ignored) {} + + int count = 0; + while(it.hasNext()) { + String name = it.next(); + if (count % 3 == 0) { + deleteFile(name, true); + } else if (count % 3 == 1) { + // Zero out file entirely + long length = fileLength(name); + byte[] zeroes = new byte[256]; + long upto = 0; + IndexOutput out = delegate.createOutput(name); + while(upto < length) { + final int limit = (int) Math.min(length-upto, zeroes.length); + out.writeBytes(zeroes, 0, limit); + upto += limit; + } + out.close(); + } else if (count % 3 == 2) { + // Truncate the file: + IndexOutput out = delegate.createOutput(name); + out.setLength(fileLength(name)/2); + out.close(); + } + count++; + } + } + + public synchronized void clearCrash() throws IOException { + crashed = false; + openLocks.clear(); + } + + public void setMaxSizeInBytes(long maxSize) { + this.maxSize = maxSize; + } + public long getMaxSizeInBytes() { + return this.maxSize; + } + + /** + * Returns the peek actual storage used (bytes) in this + * directory. + */ + public long getMaxUsedSizeInBytes() { + return this.maxUsedSize; + } + public void resetMaxUsedSizeInBytes() throws IOException { + this.maxUsedSize = getRecomputedActualSizeInBytes(); + } + + /** + * Emulate windows whereby deleting an open file is not + * allowed (raise IOException). + */ + public void setNoDeleteOpenFile(boolean value) { + this.noDeleteOpenFile = value; + } + public boolean getNoDeleteOpenFile() { + return noDeleteOpenFile; + } + + /** + * Set whether or not checkindex should be run + * on close + */ + public void setCheckIndexOnClose(boolean value) { + this.checkIndexOnClose = value; + } + + public boolean getCheckIndexOnClose() { + return checkIndexOnClose; + } + /** + * If 0.0, no exceptions will be thrown. Else this should + * be a double 0.0 - 1.0. We will randomly throw an + * IOException on the first write to an OutputStream based + * on this probability. + */ + public void setRandomIOExceptionRate(double rate) { + randomIOExceptionRate = rate; + } + public double getRandomIOExceptionRate() { + return randomIOExceptionRate; + } + + void maybeThrowIOException() throws IOException { + if (randomIOExceptionRate > 0.0) { + int number = Math.abs(randomState.nextInt() % 1000); + if (number < randomIOExceptionRate*1000) { + if (LuceneTestCase.VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception"); + new Throwable().printStackTrace(System.out); + } + throw new IOException("a random IOException"); + } + } + } + + @Override + public synchronized void deleteFile(String name) throws IOException { + maybeYield(); + deleteFile(name, false); + } + + // sets the cause of the incoming ioe to be the stack + // trace when the offending file name was opened + private synchronized IOException fillOpenTrace(IOException ioe, String name, boolean input) { + for(Map.Entry ent : openFileHandles.entrySet()) { + if (input && ent.getKey() instanceof MockIndexInputWrapper && ((MockIndexInputWrapper) ent.getKey()).name.equals(name)) { + ioe.initCause(ent.getValue()); + break; + } else if (!input && ent.getKey() instanceof MockIndexOutputWrapper && ((MockIndexOutputWrapper) ent.getKey()).name.equals(name)) { + ioe.initCause(ent.getValue()); + break; + } + } + return ioe; + } + + private void maybeYield() { + if (randomState.nextBoolean()) { + Thread.yield(); + } + } + + private synchronized void deleteFile(String name, boolean forced) throws IOException { + maybeYield(); + + maybeThrowDeterministicException(); + + if (crashed && !forced) + throw new IOException("cannot delete after crash"); + + if (unSyncedFiles.contains(name)) + unSyncedFiles.remove(name); + if (!forced && noDeleteOpenFile) { + if (openFiles.containsKey(name)) { + openFilesDeleted.add(name); + throw fillOpenTrace(new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open: cannot delete"), name, true); + } else { + openFilesDeleted.remove(name); + } + } + delegate.deleteFile(name); + } + + public synchronized Set getOpenDeletedFiles() { + return new HashSet(openFilesDeleted); + } + + @Override + public synchronized IndexOutput createOutput(String name) throws IOException { + maybeYield(); + if (crashed) + throw new IOException("cannot createOutput after crash"); + init(); + synchronized(this) { + if (preventDoubleWrite && createdFiles.contains(name) && !name.equals("segments.gen")) + throw new IOException("file \"" + name + "\" was already written to"); + } + if (noDeleteOpenFile && openFiles.containsKey(name)) + throw new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open: cannot overwrite"); + + if (crashed) + throw new IOException("cannot createOutput after crash"); + unSyncedFiles.add(name); + createdFiles.add(name); + + if (delegate instanceof RAMDirectory) { + RAMDirectory ramdir = (RAMDirectory) delegate; + RAMFile file = new RAMFile(ramdir); + RAMFile existing = ramdir.fileMap.get(name); + + // Enforce write once: + if (existing!=null && !name.equals("segments.gen") && preventDoubleWrite) + throw new IOException("file " + name + " already exists"); + else { + if (existing!=null) { + ramdir.sizeInBytes.getAndAdd(-existing.sizeInBytes); + existing.directory = null; + } + ramdir.fileMap.put(name, file); + } + } + + //System.out.println(Thread.currentThread().getName() + ": MDW: create " + name); + IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name); + addFileHandle(io, name, false); + openFilesForWrite.add(name); + + // throttling REALLY slows down tests, so don't do it very often for SOMETIMES. + if (throttling == Throttling.ALWAYS || + (throttling == Throttling.SOMETIMES && randomState.nextInt(50) == 0)) { + if (LuceneTestCase.VERBOSE) { + System.out.println("MockDirectoryWrapper: throttling indexOutput"); + } + return throttledOutput.newFromDelegate(io); + } else { + return io; + } + } + + private void addFileHandle(Closeable c, String name, boolean input) { + Integer v = openFiles.get(name); + if (v != null) { + v = Integer.valueOf(v.intValue()+1); + openFiles.put(name, v); + } else { + openFiles.put(name, Integer.valueOf(1)); + } + + openFileHandles.put(c, new RuntimeException("unclosed Index" + (input ? "Input" : "Output") + ": " + name)); + } + + @Override + public synchronized IndexInput openInput(String name) throws IOException { + maybeYield(); + if (!delegate.fileExists(name)) + throw new FileNotFoundException(name); + + // cannot open a file for input if it's still open for + // output, except for segments.gen and segments_N + if (openFilesForWrite.contains(name) && !name.startsWith("segments")) { + throw fillOpenTrace(new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open for writing"), name, false); + } + + IndexInput ii = new MockIndexInputWrapper(this, name, delegate.openInput(name)); + addFileHandle(ii, name, true); + return ii; + } + + /** Provided for testing purposes. Use sizeInBytes() instead. */ + public synchronized final long getRecomputedSizeInBytes() throws IOException { + if (!(delegate instanceof RAMDirectory)) + return sizeInBytes(); + long size = 0; + for(final RAMFile file: ((RAMDirectory)delegate).fileMap.values()) { + size += file.getSizeInBytes(); + } + return size; + } + + /** Like getRecomputedSizeInBytes(), but, uses actual file + * lengths rather than buffer allocations (which are + * quantized up to nearest + * RAMOutputStream.BUFFER_SIZE (now 1024) bytes. + */ + + public final synchronized long getRecomputedActualSizeInBytes() throws IOException { + if (!(delegate instanceof RAMDirectory)) + return sizeInBytes(); + long size = 0; + for (final RAMFile file : ((RAMDirectory)delegate).fileMap.values()) + size += file.length; + return size; + } + + @Override + public synchronized void close() throws IOException { + maybeYield(); + if (openFiles == null) { + openFiles = new HashMap(); + openFilesDeleted = new HashSet(); + } + if (noDeleteOpenFile && openFiles.size() > 0) { + // print the first one as its very verbose otherwise + Exception cause = null; + Iterator stacktraces = openFileHandles.values().iterator(); + if (stacktraces.hasNext()) + cause = stacktraces.next(); + // RuntimeException instead of IOException because + // super() does not throw IOException currently: + throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open files: " + openFiles, cause); + } + if (noDeleteOpenFile && openLocks.size() > 0) { + throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open locks: " + openLocks); + } + open = false; + if (checkIndexOnClose && IndexReader.indexExists(this)) { + if (LuceneTestCase.VERBOSE) { + System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex"); + } + _TestUtil.checkIndex(this); + } + delegate.close(); + } + + private synchronized void removeOpenFile(Closeable c, String name) { + Integer v = openFiles.get(name); + // Could be null when crash() was called + if (v != null) { + if (v.intValue() == 1) { + openFiles.remove(name); + openFilesDeleted.remove(name); + } else { + v = Integer.valueOf(v.intValue()-1); + openFiles.put(name, v); + } + } + + openFileHandles.remove(c); + } + + public synchronized void removeIndexOutput(IndexOutput out, String name) { + openFilesForWrite.remove(name); + removeOpenFile(out, name); + } + + public synchronized void removeIndexInput(IndexInput in, String name) { + removeOpenFile(in, name); + } + + boolean open = true; + + public synchronized boolean isOpen() { + return open; + } + + /** + * Objects that represent fail-able conditions. Objects of a derived + * class are created and registered with the mock directory. After + * register, each object will be invoked once for each first write + * of a file, giving the object a chance to throw an IOException. + */ + public static class Failure { + /** + * eval is called on the first write of every new file. + */ + public void eval(MockDirectoryWrapper dir) throws IOException { } + + /** + * reset should set the state of the failure to its default + * (freshly constructed) state. Reset is convenient for tests + * that want to create one failure object and then reuse it in + * multiple cases. This, combined with the fact that Failure + * subclasses are often anonymous classes makes reset difficult to + * do otherwise. + * + * A typical example of use is + * Failure failure = new Failure() { ... }; + * ... + * mock.failOn(failure.reset()) + */ + public Failure reset() { return this; } + + protected boolean doFail; + + public void setDoFail() { + doFail = true; + } + + public void clearDoFail() { + doFail = false; + } + } + + ArrayList failures; + + /** + * add a Failure object to the list of objects to be evaluated + * at every potential failure point + */ + synchronized public void failOn(Failure fail) { + if (failures == null) { + failures = new ArrayList(); + } + failures.add(fail); + } + + /** + * Iterate through the failures list, giving each object a + * chance to throw an IOE + */ + synchronized void maybeThrowDeterministicException() throws IOException { + if (failures != null) { + for(int i = 0; i < failures.size(); i++) { + failures.get(i).eval(this); + } + } + } + + @Override + public synchronized String[] listAll() throws IOException { + maybeYield(); + return delegate.listAll(); + } + + @Override + public synchronized boolean fileExists(String name) throws IOException { + maybeYield(); + return delegate.fileExists(name); + } + + @Override + public synchronized long fileModified(String name) throws IOException { + maybeYield(); + return delegate.fileModified(name); + } + + @Override + @Deprecated + /* @deprecated Lucene never uses this API; it will be + * removed in 4.0. */ + public synchronized void touchFile(String name) throws IOException { + maybeYield(); + delegate.touchFile(name); + } + + @Override + public synchronized long fileLength(String name) throws IOException { + maybeYield(); + return delegate.fileLength(name); + } + + @Override + public synchronized Lock makeLock(String name) { + maybeYield(); + return delegate.makeLock(name); + } + + @Override + public synchronized void clearLock(String name) throws IOException { + maybeYield(); + delegate.clearLock(name); + } + + @Override + public synchronized void setLockFactory(LockFactory lockFactory) throws IOException { + maybeYield(); + delegate.setLockFactory(lockFactory); + } + + @Override + public synchronized LockFactory getLockFactory() { + maybeYield(); + return delegate.getLockFactory(); + } + + @Override + public synchronized String getLockID() { + maybeYield(); + return delegate.getLockID(); + } + + @Override + public synchronized void copy(Directory to, String src, String dest) throws IOException { + maybeYield(); + delegate.copy(to, src, dest); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java new file mode 100644 index 0000000..32d8e6f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockIndexInputWrapper.java @@ -0,0 +1,161 @@ +package org.apache.lucene.store; + +import java.io.IOException; +import java.util.Map; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Used by MockDirectoryWrapper to create an input stream that + * keeps track of when it's been closed. + */ + +public class MockIndexInputWrapper extends IndexInput { + private MockDirectoryWrapper dir; + final String name; + private IndexInput delegate; + private boolean isClone; + + /** Construct an empty output buffer. */ + public MockIndexInputWrapper(MockDirectoryWrapper dir, String name, IndexInput delegate) { + this.name = name; + this.dir = dir; + this.delegate = delegate; + } + + @Override + public void close() throws IOException { + try { + // turn on the following to look for leaks closing inputs, + // after fixing TestTransactions + // dir.maybeThrowDeterministicException(); + } finally { + delegate.close(); + // Pending resolution on LUCENE-686 we may want to + // remove the conditional check so we also track that + // all clones get closed: + if (!isClone) { + dir.removeIndexInput(this, name); + } + } + } + + @Override + public Object clone() { + IndexInput iiclone = (IndexInput) delegate.clone(); + MockIndexInputWrapper clone = new MockIndexInputWrapper(dir, name, iiclone); + clone.isClone = true; + // Pending resolution on LUCENE-686 we may want to + // uncomment this code so that we also track that all + // clones get closed: + /* + synchronized(dir.openFiles) { + if (dir.openFiles.containsKey(name)) { + Integer v = (Integer) dir.openFiles.get(name); + v = Integer.valueOf(v.intValue()+1); + dir.openFiles.put(name, v); + } else { + throw new RuntimeException("BUG: cloned file was not open?"); + } + } + */ + return clone; + } + + @Override + public long getFilePointer() { + return delegate.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + delegate.seek(pos); + } + + @Override + public long length() { + return delegate.length(); + } + + @Override + public byte readByte() throws IOException { + return delegate.readByte(); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + delegate.readBytes(b, offset, len); + } + + @Override + public void copyBytes(IndexOutput out, long numBytes) throws IOException { + delegate.copyBytes(out, numBytes); + } + + @Override + public void readBytes(byte[] b, int offset, int len, boolean useBuffer) + throws IOException { + delegate.readBytes(b, offset, len, useBuffer); + } + + @Override + public int readInt() throws IOException { + return delegate.readInt(); + } + + @Override + public int readVInt() throws IOException { + return delegate.readVInt(); + } + + @Override + public long readLong() throws IOException { + return delegate.readLong(); + } + + @Override + public long readVLong() throws IOException { + return delegate.readVLong(); + } + + @Override + public String readString() throws IOException { + return delegate.readString(); + } + + @Override + public Map readStringStringMap() throws IOException { + return delegate.readStringStringMap(); + } + + @Override + public void setModifiedUTF8StringsMode() { + delegate.setModifiedUTF8StringsMode(); + } + + @Override + public void readChars(char[] buffer, int start, int length) + throws IOException { + delegate.readChars(buffer, start, length); + } + + @Override + public void skipChars(int length) throws IOException { + delegate.skipChars(length); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockIndexOutputWrapper.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockIndexOutputWrapper.java new file mode 100644 index 0000000..0f93567 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockIndexOutputWrapper.java @@ -0,0 +1,159 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; + +/** + * Used by MockRAMDirectory to create an output stream that + * will throw an IOException on fake disk full, track max + * disk space actually used, and maybe throw random + * IOExceptions. + */ + +public class MockIndexOutputWrapper extends IndexOutput { + private MockDirectoryWrapper dir; + private final IndexOutput delegate; + private boolean first=true; + final String name; + + byte[] singleByte = new byte[1]; + + /** Construct an empty output buffer. */ + public MockIndexOutputWrapper(MockDirectoryWrapper dir, IndexOutput delegate, String name) { + this.dir = dir; + this.name = name; + this.delegate = delegate; + } + + @Override + public void close() throws IOException { + try { + dir.maybeThrowDeterministicException(); + } finally { + delegate.close(); + if (dir.trackDiskUsage) { + // Now compute actual disk usage & track the maxUsedSize + // in the MockDirectoryWrapper: + long size = dir.getRecomputedActualSizeInBytes(); + if (size > dir.maxUsedSize) { + dir.maxUsedSize = size; + } + } + dir.removeIndexOutput(this, name); + } + } + + @Override + public void flush() throws IOException { + dir.maybeThrowDeterministicException(); + delegate.flush(); + } + + @Override + public void writeByte(byte b) throws IOException { + singleByte[0] = b; + writeBytes(singleByte, 0, 1); + } + + @Override + public void writeBytes(byte[] b, int offset, int len) throws IOException { + long freeSpace = dir.maxSize == 0 ? 0 : dir.maxSize - dir.sizeInBytes(); + long realUsage = 0; + + // If MockRAMDir crashed since we were opened, then + // don't write anything: + if (dir.crashed) + throw new IOException("MockRAMDirectory was crashed; cannot write to " + name); + + // Enforce disk full: + if (dir.maxSize != 0 && freeSpace <= len) { + // Compute the real disk free. This will greatly slow + // down our test but makes it more accurate: + realUsage = dir.getRecomputedActualSizeInBytes(); + freeSpace = dir.maxSize - realUsage; + } + + if (dir.maxSize != 0 && freeSpace <= len) { + if (freeSpace > 0) { + realUsage += freeSpace; + delegate.writeBytes(b, offset, (int) freeSpace); + } + if (realUsage > dir.maxUsedSize) { + dir.maxUsedSize = realUsage; + } + String message = "fake disk full at " + dir.getRecomputedActualSizeInBytes() + " bytes when writing " + name + " (file length=" + delegate.length(); + if (freeSpace > 0) { + message += "; wrote " + freeSpace + " of " + len + " bytes"; + } + message += ")"; + if (LuceneTestCase.VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": MDW: now throw fake disk full"); + new Throwable().printStackTrace(System.out); + } + throw new IOException(message); + } else { + if (dir.randomState.nextInt(200) == 0) { + final int half = len/2; + delegate.writeBytes(b, offset, half); + Thread.yield(); + delegate.writeBytes(b, offset+half, len-half); + } else { + delegate.writeBytes(b, offset, len); + } + } + + dir.maybeThrowDeterministicException(); + + if (first) { + // Maybe throw random exception; only do this on first + // write to a new file: + first = false; + dir.maybeThrowIOException(); + } + } + + @Override + public long getFilePointer() { + return delegate.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + delegate.seek(pos); + } + + @Override + public long length() throws IOException { + return delegate.length(); + } + + @Override + public void setLength(long length) throws IOException { + delegate.setLength(length); + } + + @Override + public void copyBytes(DataInput input, long numBytes) throws IOException { + delegate.copyBytes(input, numBytes); + // TODO: we may need to check disk full here as well + dir.maybeThrowDeterministicException(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockLockFactoryWrapper.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockLockFactoryWrapper.java new file mode 100644 index 0000000..b51889f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/MockLockFactoryWrapper.java @@ -0,0 +1,87 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +public class MockLockFactoryWrapper extends LockFactory { + MockDirectoryWrapper dir; + LockFactory delegate; + + public MockLockFactoryWrapper(MockDirectoryWrapper dir, LockFactory delegate) { + this.dir = dir; + this.delegate = delegate; + } + + @Override + public void setLockPrefix(String lockPrefix) { + delegate.setLockPrefix(lockPrefix); + } + + @Override + public String getLockPrefix() { + return delegate.getLockPrefix(); + } + + @Override + public Lock makeLock(String lockName) { + return new MockLock(delegate.makeLock(lockName), lockName); + } + + @Override + public void clearLock(String lockName) throws IOException { + delegate.clearLock(lockName); + dir.openLocks.remove(lockName); + } + + @Override + public String toString() { + return "MockLockFactoryWrapper(" + delegate.toString() + ")"; + } + + private class MockLock extends Lock { + private Lock delegateLock; + private String name; + + MockLock(Lock delegate, String name) { + this.delegateLock = delegate; + this.name = name; + } + + @Override + public boolean obtain() throws IOException { + if (delegateLock.obtain()) { + dir.openLocks.add(name); + return true; + } else { + return false; + } + } + + @Override + public void release() throws IOException { + delegateLock.release(); + dir.openLocks.remove(name); + } + + @Override + public boolean isLocked() throws IOException { + return delegateLock.isLocked(); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/_TestHelper.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/_TestHelper.java new file mode 100644 index 0000000..fb90a87 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/store/_TestHelper.java @@ -0,0 +1,65 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput; + +/** This class provides access to package-level features defined in the + * store package. It is used for testing only. + */ +public class _TestHelper { + + /** Returns true if the instance of the provided input stream is actually + * an SimpleFSIndexInput. + */ + public static boolean isSimpleFSIndexInput(IndexInput is) { + return is instanceof SimpleFSIndexInput; + } + + /** Returns true if the provided input stream is an SimpleFSIndexInput and + * is a clone, that is it does not own its underlying file descriptor. + */ + public static boolean isSimpleFSIndexInputClone(IndexInput is) { + if (isSimpleFSIndexInput(is)) { + return ((SimpleFSIndexInput) is).isClone; + } else { + return false; + } + } + + /** Given an instance of SimpleFSDirectory.SimpleFSIndexInput, this method returns + * true if the underlying file descriptor is valid, and false otherwise. + * This can be used to determine if the OS file has been closed. + * The descriptor becomes invalid when the non-clone instance of the + * SimpleFSIndexInput that owns this descriptor is closed. However, the + * descriptor may possibly become invalid in other ways as well. + */ + public static boolean isSimpleFSIndexInputOpen(IndexInput is) + throws IOException + { + if (isSimpleFSIndexInput(is)) { + SimpleFSIndexInput fis = (SimpleFSIndexInput) is; + return fis.isFDValid(); + } else { + return false; + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LineFileDocs.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LineFileDocs.java new file mode 100644 index 0000000..a4cd41f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LineFileDocs.java @@ -0,0 +1,178 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.InputStream; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.zip.GZIPInputStream; +import java.util.Random; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +/** Minimal port of contrib/benchmark's LneDocSource + + * DocMaker, so tests can enum docs from a line file created + * by contrib/benchmark's WriteLineDoc task */ +public class LineFileDocs implements Closeable { + + private BufferedReader reader; + private final static int BUFFER_SIZE = 1 << 16; // 64K + private final AtomicInteger id = new AtomicInteger(); + private final String path; + + /** If forever is true, we rewind the file at EOF (repeat + * the docs over and over) */ + public LineFileDocs(Random random, String path) throws IOException { + this.path = path; + open(random); + } + + public LineFileDocs(Random random) throws IOException { + this(random, LuceneTestCase.TEST_LINE_DOCS_FILE); + } + + public synchronized void close() throws IOException { + if (reader != null) { + reader.close(); + reader = null; + } + } + + private synchronized void open(Random random) throws IOException { + InputStream is = getClass().getResourceAsStream(path); + if (is == null) { + // if its not in classpath, we load it as absolute filesystem path (e.g. Hudson's home dir) + is = new FileInputStream(path); + } + File file = new File(path); + long size; + if (file.exists()) { + size = file.length(); + } else { + size = is.available(); + } + if (path.endsWith(".gz")) { + is = new GZIPInputStream(is); + // guestimate: + size *= 2.8; + } + + reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE); + + // Override sizes for currently "known" line files: + if (path.equals("europarl.lines.txt.gz")) { + size = 15129506L; + } else if (path.equals("/home/hudson/lucene-data/enwiki.random.lines.txt.gz")) { + size = 3038178822L; + } + + // Randomly seek to starting point: + if (random != null && size > 3) { + final long seekTo = (random.nextLong()&Long.MAX_VALUE) % (size/3); + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: LineFileDocs: seek to fp=" + seekTo + " on open"); + } + reader.skip(seekTo); + reader.readLine(); + } + } + + public synchronized void reset(Random random) throws IOException { + close(); + open(random); + id.set(0); + } + + private final static char SEP = '\t'; + + private static final class DocState { + final Document doc; + final Field titleTokenized; + final Field title; + final Field body; + final Field id; + final Field date; + + public DocState() { + doc = new Document(); + + title = new Field("title", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(title); + + titleTokenized = new Field("titleTokenized", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(titleTokenized); + + body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(body); + + id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(id); + + date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(date); + } + } + + private final ThreadLocal threadDocs = new ThreadLocal(); + + /** Note: Document instance is re-used per-thread */ + public Document nextDoc() throws IOException { + String line; + synchronized(this) { + line = reader.readLine(); + if (line == null) { + // Always rewind at end: + if (LuceneTestCase.VERBOSE) { + System.out.println("TEST: LineFileDocs: now rewind file..."); + } + close(); + open(null); + line = reader.readLine(); + } + } + + DocState docState = threadDocs.get(); + if (docState == null) { + docState = new DocState(); + threadDocs.set(docState); + } + + int spot = line.indexOf(SEP); + if (spot == -1) { + throw new RuntimeException("line: [" + line + "] is in an invalid format !"); + } + int spot2 = line.indexOf(SEP, 1 + spot); + if (spot2 == -1) { + throw new RuntimeException("line: [" + line + "] is in an invalid format !"); + } + + docState.body.setValue(line.substring(1+spot2, line.length())); + final String title = line.substring(0, spot); + docState.title.setValue(title); + docState.titleTokenized.setValue(title); + docState.date.setValue(line.substring(1+spot, spot2)); + docState.id.setValue(Integer.toString(id.getAndIncrement())); + return docState.doc; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneJUnitDividingSelector.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneJUnitDividingSelector.java new file mode 100644 index 0000000..5a9509c --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneJUnitDividingSelector.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.lucene.util; +import java.io.File; + +import org.apache.tools.ant.BuildException; +import org.apache.tools.ant.types.Parameter; +import org.apache.tools.ant.types.selectors.BaseExtendSelector; + +/** Divides filesets into equal groups */ +public class LuceneJUnitDividingSelector extends BaseExtendSelector { + private int counter; + /** Number of total parts to split. */ + private int divisor; + /** Current part to accept. */ + private int part; + + @Override + public void setParameters(Parameter[] pParameters) { + super.setParameters(pParameters); + for (int j = 0; j < pParameters.length; j++) { + Parameter p = pParameters[j]; + if ("divisor".equalsIgnoreCase(p.getName())) { + divisor = Integer.parseInt(p.getValue()); + } + else if ("part".equalsIgnoreCase(p.getName())) { + part = Integer.parseInt(p.getValue()); + } + else { + throw new BuildException("unknown " + p.getName()); + } + } + } + + @Override + public void verifySettings() { + super.verifySettings(); + if (divisor <= 0 || part <= 0) { + throw new BuildException("part or divisor not set"); + } + if (part > divisor) { + throw new BuildException("part must be <= divisor"); + } + } + + @Override + public boolean isSelected(File dir, String name, File path) { + counter = counter % divisor + 1; + return counter == part; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneJUnitResultFormatter.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneJUnitResultFormatter.java new file mode 100644 index 0000000..a03f780 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneJUnitResultFormatter.java @@ -0,0 +1,293 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.lucene.util; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.text.NumberFormat; +import java.util.logging.LogManager; + +import junit.framework.AssertionFailedError; +import junit.framework.Test; + +import org.apache.lucene.store.LockReleaseFailedException; +import org.apache.lucene.store.NativeFSLockFactory; +import org.apache.tools.ant.taskdefs.optional.junit.JUnitResultFormatter; +import org.apache.tools.ant.taskdefs.optional.junit.JUnitTest; +import org.apache.tools.ant.taskdefs.optional.junit.JUnitTestRunner; +import org.apache.tools.ant.util.FileUtils; +import org.apache.tools.ant.util.StringUtils; +import org.junit.Ignore; + +/** + * Just like BriefJUnitResultFormatter "brief" bundled with ant, + * except all formatted text is buffered until the test suite is finished. + * At this point, the output is written at once in synchronized fashion. + * This way tests can run in parallel without interleaving output. + */ +public class LuceneJUnitResultFormatter implements JUnitResultFormatter { + private static final double ONE_SECOND = 1000.0; + + private static final NativeFSLockFactory lockFactory; + + /** Where to write the log to. */ + private OutputStream out; + + /** Formatter for timings. */ + private NumberFormat numberFormat = NumberFormat.getInstance(); + + /** Output suite has written to System.out */ + private String systemOutput = null; + + /** Output suite has written to System.err */ + private String systemError = null; + + /** Buffer output until the end of the test */ + private ByteArrayOutputStream sb; // use a BOS for our mostly ascii-output + + private static final org.apache.lucene.store.Lock lock; + + static { + File lockDir = new File(System.getProperty("java.io.tmpdir"), + "lucene_junit_lock"); + lockDir.mkdirs(); + if (!lockDir.exists()) { + throw new RuntimeException("Could not make Lock directory:" + lockDir); + } + try { + lockFactory = new NativeFSLockFactory(lockDir); + lock = lockFactory.makeLock("junit_lock"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** Constructor for LuceneJUnitResultFormatter. */ + public LuceneJUnitResultFormatter() { + } + + /** + * Sets the stream the formatter is supposed to write its results to. + * @param out the output stream to write to + */ + public void setOutput(OutputStream out) { + this.out = out; + } + + /** + * @see JUnitResultFormatter#setSystemOutput(String) + */ + /** {@inheritDoc}. */ + public void setSystemOutput(String out) { + systemOutput = out; + } + + /** + * @see JUnitResultFormatter#setSystemError(String) + */ + /** {@inheritDoc}. */ + public void setSystemError(String err) { + systemError = err; + } + + + /** + * The whole testsuite started. + * @param suite the test suite + */ + public synchronized void startTestSuite(JUnitTest suite) { + if (out == null) { + return; // Quick return - no output do nothing. + } + sb = new ByteArrayOutputStream(); // don't reuse, so its gc'ed + try { + LogManager.getLogManager().readConfiguration(); + } catch (Exception e) {} + append("Testsuite: "); + append(suite.getName()); + append(StringUtils.LINE_SEP); + } + + /** + * The whole testsuite ended. + * @param suite the test suite + */ + public synchronized void endTestSuite(JUnitTest suite) { + append("Tests run: "); + append(suite.runCount()); + append(", Failures: "); + append(suite.failureCount()); + append(", Errors: "); + append(suite.errorCount()); + append(", Time elapsed: "); + append(numberFormat.format(suite.getRunTime() / ONE_SECOND)); + append(" sec"); + append(StringUtils.LINE_SEP); + append(StringUtils.LINE_SEP); + + // append the err and output streams to the log + if (systemOutput != null && systemOutput.length() > 0) { + append("------------- Standard Output ---------------") + .append(StringUtils.LINE_SEP) + .append(systemOutput) + .append("------------- ---------------- ---------------") + .append(StringUtils.LINE_SEP); + } + + // HACK: junit gives us no way to do this in LuceneTestCase + try { + Class clazz = Class.forName(suite.getName()); + Ignore ignore = clazz.getAnnotation(Ignore.class); + if (ignore != null) { + if (systemError == null) systemError = ""; + systemError += "NOTE: Ignoring test class '" + clazz.getSimpleName() + "': " + + ignore.value() + StringUtils.LINE_SEP; + } + } catch (ClassNotFoundException e) { /* no problem */ } + // END HACK + + if (systemError != null && systemError.length() > 0) { + append("------------- Standard Error -----------------") + .append(StringUtils.LINE_SEP) + .append(systemError) + .append("------------- ---------------- ---------------") + .append(StringUtils.LINE_SEP); + } + + if (out != null) { + try { + lock.obtain(5000); + try { + sb.writeTo(out); + out.flush(); + } finally { + try { + lock.release(); + } catch(LockReleaseFailedException e) { + // well lets pretend its released anyway + } + } + } catch (IOException e) { + throw new RuntimeException("unable to write results", e); + } finally { + if (out != System.out && out != System.err) { + FileUtils.close(out); + } + } + } + } + + /** + * A test started. + * @param test a test + */ + public void startTest(Test test) { + } + + /** + * A test ended. + * @param test a test + */ + public void endTest(Test test) { + } + + /** + * Interface TestListener for JUnit <= 3.4. + * + *

A Test failed. + * @param test a test + * @param t the exception thrown by the test + */ + public void addFailure(Test test, Throwable t) { + formatError("\tFAILED", test, t); + } + + /** + * Interface TestListener for JUnit > 3.4. + * + *

A Test failed. + * @param test a test + * @param t the assertion failed by the test + */ + public void addFailure(Test test, AssertionFailedError t) { + addFailure(test, (Throwable) t); + } + + /** + * A test caused an error. + * @param test a test + * @param error the error thrown by the test + */ + public void addError(Test test, Throwable error) { + formatError("\tCaused an ERROR", test, error); + } + + /** + * Format the test for printing.. + * @param test a test + * @return the formatted testname + */ + protected String formatTest(Test test) { + if (test == null) { + return "Null Test: "; + } else { + return "Testcase: " + test.toString() + ":"; + } + } + + /** + * Format an error and print it. + * @param type the type of error + * @param test the test that failed + * @param error the exception that the test threw + */ + protected synchronized void formatError(String type, Test test, + Throwable error) { + if (test != null) { + endTest(test); + } + + append(formatTest(test) + type); + append(StringUtils.LINE_SEP); + append(error.getMessage()); + append(StringUtils.LINE_SEP); + String strace = JUnitTestRunner.getFilteredTrace(error); + append(strace); + append(StringUtils.LINE_SEP); + append(StringUtils.LINE_SEP); + } + + public LuceneJUnitResultFormatter append(String s) { + if (s == null) + s = "(null)"; + try { + sb.write(s.getBytes()); // intentionally use default charset, its a console. + } catch (IOException e) { + throw new RuntimeException(e); + } + return this; + } + + public LuceneJUnitResultFormatter append(long l) { + return append(Long.toString(l)); + } +} + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneTestCase.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneTestCase.java new file mode 100644 index 0000000..662fd99 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/LuceneTestCase.java @@ -0,0 +1,1309 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.lang.annotation.Documented; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.util.*; +import java.util.Map.Entry; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogByteSizeMergePolicy; +import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MockRandomMergePolicy; +import org.apache.lucene.index.SerialMergeScheduler; +import org.apache.lucene.index.SlowMultiReaderWrapper; +import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FieldCache.CacheEntry; +import org.apache.lucene.search.AssertingIndexSearcher; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.LockFactory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.FieldCacheSanityChecker.Insanity; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestWatchman; +import org.junit.runner.Description; +import org.junit.runner.RunWith; +import org.junit.runner.manipulation.Filter; +import org.junit.runner.manipulation.NoTestsRemainException; +import org.junit.runner.notification.Failure; +import org.junit.runner.notification.RunListener; +import org.junit.runner.notification.RunNotifier; +import org.junit.runners.BlockJUnit4ClassRunner; +import org.junit.runners.model.FrameworkMethod; +import org.junit.runners.model.InitializationError; + +/** + * Base class for all Lucene unit tests, Junit3 or Junit4 variant. + *

+ *

+ *

+ * If you + * override either setUp() or + * tearDown() in your unit test, make sure you + * call super.setUp() and + * super.tearDown() + *

+ * + * @After - replaces setup + * @Before - replaces teardown + * @Test - any public method with this annotation is a test case, regardless + * of its name + *

+ *

+ * See Junit4 documentation for a complete list of features. + *

+ * Import from org.junit rather than junit.framework. + *

+ * You should be able to use this class anywhere you used LuceneTestCase + * if you annotate your derived class correctly with the annotations above + * @see #assertSaneFieldCaches(String) + */ + +@RunWith(LuceneTestCase.LuceneTestCaseRunner.class) +public abstract class LuceneTestCase extends Assert { + + /** + * true iff tests are run in verbose mode. Note: if it is false, tests are not + * expected to print any messages. + */ + public static final boolean VERBOSE = Boolean.getBoolean("tests.verbose"); + + /** Use this constant when creating Analyzers and any other version-dependent stuff. + *

NOTE: Change this when development starts for new Lucene version: + */ + public static final Version TEST_VERSION_CURRENT = Version.LUCENE_33; + + /** + * If this is set, it is the only method that should run. + */ + static final String TEST_METHOD; + + /** Create indexes in this directory, optimally use a subdir, named after the test */ + public static final File TEMP_DIR; + static { + String method = System.getProperty("testmethod", "").trim(); + TEST_METHOD = method.length() == 0 ? null : method; + String s = System.getProperty("tempDir", System.getProperty("java.io.tmpdir")); + if (s == null) + throw new RuntimeException("To run tests, you need to define system property 'tempDir' or 'java.io.tmpdir'."); + TEMP_DIR = new File(s); + TEMP_DIR.mkdirs(); + } + + /** set of directories we created, in afterclass we try to clean these up */ + private static final Map tempDirs = Collections.synchronizedMap(new HashMap()); + + // by default we randomly pick a different codec for + // each test case (non-J4 tests) and each test class (J4 + // tests) + /** Gets the locale to run tests with */ + public static final String TEST_LOCALE = System.getProperty("tests.locale", "random"); + /** Gets the timezone to run tests with */ + public static final String TEST_TIMEZONE = System.getProperty("tests.timezone", "random"); + /** Gets the directory to run tests with */ + public static final String TEST_DIRECTORY = System.getProperty("tests.directory", "random"); + /** Get the number of times to run tests */ + public static final int TEST_ITER = Integer.parseInt(System.getProperty("tests.iter", "1")); + /** Get the minimum number of times to run tests until a failure happens */ + public static final int TEST_ITER_MIN = Integer.parseInt(System.getProperty("tests.iter.min", Integer.toString(TEST_ITER))); + /** Get the random seed for tests */ + public static final String TEST_SEED = System.getProperty("tests.seed", "random"); + /** whether or not nightly tests should run */ + public static final boolean TEST_NIGHTLY = Boolean.parseBoolean(System.getProperty("tests.nightly", "false")); + /** the line file used by LineFileDocs */ + public static final String TEST_LINE_DOCS_FILE = System.getProperty("tests.linedocsfile", "europarl.lines.txt.gz"); + /** whether or not to clean threads between test invocations: "false", "perMethod", "perClass" */ + public static final String TEST_CLEAN_THREADS = System.getProperty("tests.cleanthreads", "perClass"); + + /** + * A random multiplier which you should use when writing random tests: + * multiply it by the number of iterations + */ + public static final int RANDOM_MULTIPLIER = Integer.parseInt(System.getProperty("tests.multiplier", "1")); + + private int savedBoolMaxClauseCount; + + private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null; + + /** Used to track if setUp and tearDown are called correctly from subclasses */ + private static State state = State.INITIAL; + + private static enum State { + INITIAL, // no tests ran yet + SETUP, // test has called setUp() + RANTEST, // test is running + TEARDOWN // test has called tearDown() + } + + private static class UncaughtExceptionEntry { + public final Thread thread; + public final Throwable exception; + + public UncaughtExceptionEntry(Thread thread, Throwable exception) { + this.thread = thread; + this.exception = exception; + } + } + private List uncaughtExceptions = Collections.synchronizedList(new ArrayList()); + + private static Locale locale; + private static Locale savedLocale; + private static TimeZone timeZone; + private static TimeZone savedTimeZone; + + protected static Map stores; + + private static class TwoLongs { + public final long l1, l2; + + public TwoLongs(long l1, long l2) { + this.l1 = l1; + this.l2 = l2; + } + + @Override + public String toString() { + return l1 + ":" + l2; + } + + public static TwoLongs fromString(String s) { + final int i = s.indexOf(':'); + assert i != -1; + return new TwoLongs(Long.parseLong(s.substring(0, i)), + Long.parseLong(s.substring(1+i))); + } + } + + /** @deprecated: until we fix no-fork problems in solr tests */ + @Deprecated + private static List testClassesRun = new ArrayList(); + + @BeforeClass + public static void beforeClassLuceneTestCaseJ4() { + state = State.INITIAL; + staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1; + random.setSeed(staticSeed); + tempDirs.clear(); + stores = Collections.synchronizedMap(new IdentityHashMap()); + // enable this by default, for IDE consistency with ant tests (as its the default from ant) + // TODO: really should be in solr base classes, but some extend LTC directly. + // we do this in beforeClass, because some tests currently disable it + if (System.getProperty("solr.directoryFactory") == null) { + System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockDirectoryFactory"); + } + // this code consumes randoms where 4.0's lucenetestcase would: to make seeds work across both branches. + // TODO: doesn't completely work, because what if we get mockrandom codec?! + if (random.nextInt(4) != 0) { + random.nextInt(); // consume RandomCodecProvider's seed. + } + // end compatibility random-consumption + savedLocale = Locale.getDefault(); + locale = TEST_LOCALE.equals("random") ? randomLocale(random) : localeForName(TEST_LOCALE); + Locale.setDefault(locale); + savedTimeZone = TimeZone.getDefault(); + timeZone = TEST_TIMEZONE.equals("random") ? randomTimeZone(random) : TimeZone.getTimeZone(TEST_TIMEZONE); + TimeZone.setDefault(timeZone); + testsFailed = false; + } + + @AfterClass + public static void afterClassLuceneTestCaseJ4() { + if (!testsFailed) { + assertTrue("ensure your setUp() calls super.setUp() and your tearDown() calls super.tearDown()!!!", + state == State.INITIAL || state == State.TEARDOWN); + } + state = State.INITIAL; + if (! "false".equals(TEST_CLEAN_THREADS)) { + int rogueThreads = threadCleanup("test class"); + if (rogueThreads > 0) { + // TODO: fail here once the leaks are fixed. + System.err.println("RESOURCE LEAK: test class left " + rogueThreads + " thread(s) running"); + } + } + Locale.setDefault(savedLocale); + TimeZone.setDefault(savedTimeZone); + System.clearProperty("solr.solr.home"); + System.clearProperty("solr.data.dir"); + // now look for unclosed resources + if (!testsFailed) + for (MockDirectoryWrapper d : stores.keySet()) { + if (d.isOpen()) { + StackTraceElement elements[] = stores.get(d); + // Look for the first class that is not LuceneTestCase that requested + // a Directory. The first two items are of Thread's, so skipping over + // them. + StackTraceElement element = null; + for (int i = 2; i < elements.length; i++) { + StackTraceElement ste = elements[i]; + if (ste.getClassName().indexOf("LuceneTestCase") == -1) { + element = ste; + break; + } + } + fail("directory of test was not closed, opened from: " + element); + } + } + stores = null; + // if verbose or tests failed, report some information back + if (VERBOSE || testsFailed) + System.err.println("NOTE: test params are: " + + "locale=" + locale + + ", timezone=" + (timeZone == null ? "(null)" : timeZone.getID())); + if (testsFailed) { + System.err.println("NOTE: all tests run in this JVM:"); + System.err.println(Arrays.toString(testClassesRun.toArray())); + System.err.println("NOTE: " + System.getProperty("os.name") + " " + + System.getProperty("os.version") + " " + + System.getProperty("os.arch") + "/" + + System.getProperty("java.vendor") + " " + + System.getProperty("java.version") + " " + + (Constants.JRE_IS_64BIT ? "(64-bit)" : "(32-bit)") + "/" + + "cpus=" + Runtime.getRuntime().availableProcessors() + "," + + "threads=" + Thread.activeCount() + "," + + "free=" + Runtime.getRuntime().freeMemory() + "," + + "total=" + Runtime.getRuntime().totalMemory()); + } + // clear out any temp directories if we can + if (!testsFailed) { + for (Entry entry : tempDirs.entrySet()) { + try { + _TestUtil.rmDir(entry.getKey()); + } catch (IOException e) { + e.printStackTrace(); + System.err.println("path " + entry.getKey() + " allocated from"); + // first two STE's are Java's + StackTraceElement[] elements = entry.getValue(); + for (int i = 2; i < elements.length; i++) { + StackTraceElement ste = elements[i]; + // print only our code's stack information + if (ste.getClassName().indexOf("org.apache.lucene") == -1) break; + System.err.println("\t" + ste); + } + fail("could not remove temp dir: " + entry.getKey()); + } + } + } + } + + private static boolean testsFailed; /* true if any tests failed */ + + // This is how we get control when errors occur. + // Think of this as start/end/success/failed + // events. + @Rule + public final TestWatchman intercept = new TestWatchman() { + + @Override + public void failed(Throwable e, FrameworkMethod method) { + // org.junit.internal.AssumptionViolatedException in older releases + // org.junit.Assume.AssumptionViolatedException in recent ones + if (e.getClass().getName().endsWith("AssumptionViolatedException")) { + if (e.getCause() instanceof TestIgnoredException) + e = e.getCause(); + System.err.print("NOTE: Assume failed in '" + method.getName() + "' (ignored):"); + if (VERBOSE) { + System.err.println(); + e.printStackTrace(System.err); + } else { + System.err.print(" "); + System.err.println(e.getMessage()); + } + } else { + testsFailed = true; + reportAdditionalFailureInfo(); + } + super.failed(e, method); + } + + @Override + public void starting(FrameworkMethod method) { + // set current method name for logging + LuceneTestCase.this.name = method.getName(); + if (!testsFailed) { + assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.SETUP); + } + state = State.RANTEST; + super.starting(method); + } + }; + + @Before + public void setUp() throws Exception { + seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l2; + random.setSeed(seed); + if (!testsFailed) { + assertTrue("ensure your tearDown() calls super.tearDown()!!!", (state == State.INITIAL || state == State.TEARDOWN)); + } + state = State.SETUP; + savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler(); + Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { + public void uncaughtException(Thread t, Throwable e) { + testsFailed = true; + uncaughtExceptions.add(new UncaughtExceptionEntry(t, e)); + if (savedUncaughtExceptionHandler != null) + savedUncaughtExceptionHandler.uncaughtException(t, e); + } + }); + + savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount(); + } + + + /** + * Forcible purges all cache entries from the FieldCache. + *

+ * This method will be called by tearDown to clean up FieldCache.DEFAULT. + * If a (poorly written) test has some expectation that the FieldCache + * will persist across test methods (ie: a static IndexReader) this + * method can be overridden to do nothing. + *

+ * + * @see FieldCache#purgeAllCaches() + */ + protected void purgeFieldCache(final FieldCache fc) { + fc.purgeAllCaches(); + } + + protected String getTestLabel() { + return getClass().getName() + "." + getName(); + } + + public static void setUseCompoundFile(MergePolicy mp, boolean useCompound) { + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompound); + } else if (mp instanceof TieredMergePolicy) { + ((TieredMergePolicy) mp).setUseCompoundFile(useCompound); + } else { + fail("MergePolicy (compound-file) not supported " + mp); + } + } + + public static void setMergeFactor(MergePolicy mp, int mergeFactor) { + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setMergeFactor(mergeFactor); + } else if (mp instanceof TieredMergePolicy) { + ((TieredMergePolicy) mp).setMaxMergeAtOnce(mergeFactor); + ((TieredMergePolicy) mp).setMaxMergeAtOnceExplicit(mergeFactor); + } else { + fail("MergePolicy not supported " + mp); + } + } + + @After + public void tearDown() throws Exception { + if (!testsFailed) { + // Note: we allow a test to go straight from SETUP -> TEARDOWN (without ever entering the RANTEST state) + // because if you assume() inside setUp(), it skips the test and the TestWatchman has no way to know... + assertTrue("ensure your setUp() calls super.setUp()!!!", state == State.RANTEST || state == State.SETUP); + } + state = State.TEARDOWN; + BooleanQuery.setMaxClauseCount(savedBoolMaxClauseCount); + if ("perMethod".equals(TEST_CLEAN_THREADS)) { + int rogueThreads = threadCleanup("test method: '" + getName() + "'"); + if (rogueThreads > 0) { + System.err.println("RESOURCE LEAK: test method: '" + getName() + + "' left " + rogueThreads + " thread(s) running"); + // TODO: fail, but print seed for now. + if (!testsFailed && uncaughtExceptions.isEmpty()) { + reportAdditionalFailureInfo(); + } + } + } + Thread.setDefaultUncaughtExceptionHandler(savedUncaughtExceptionHandler); + try { + + if (!uncaughtExceptions.isEmpty()) { + testsFailed = true; + reportAdditionalFailureInfo(); + System.err.println("The following exceptions were thrown by threads:"); + for (UncaughtExceptionEntry entry : uncaughtExceptions) { + System.err.println("*** Thread: " + entry.thread.getName() + " ***"); + entry.exception.printStackTrace(System.err); + } + fail("Some threads threw uncaught exceptions!"); + } + + // calling assertSaneFieldCaches here isn't as useful as having test + // classes call it directly from the scope where the index readers + // are used, because they could be gc'ed just before this tearDown + // method is called. + // + // But it's better then nothing. + // + // If you are testing functionality that you know for a fact + // "violates" FieldCache sanity, then you should either explicitly + // call purgeFieldCache at the end of your test method, or refactor + // your Test class so that the inconsistant FieldCache usages are + // isolated in distinct test methods + assertSaneFieldCaches(getTestLabel()); + + } finally { + purgeFieldCache(FieldCache.DEFAULT); + } + } + + private final static int THREAD_STOP_GRACE_MSEC = 50; + // jvm-wide list of 'rogue threads' we found, so they only get reported once. + private final static IdentityHashMap rogueThreads = new IdentityHashMap(); + + static { + // just a hack for things like eclipse test-runner threads + for (Thread t : Thread.getAllStackTraces().keySet()) { + rogueThreads.put(t, true); + } + + if (TEST_ITER > 1) { + System.out.println("WARNING: you are using -Dtests.iter=n where n > 1, not all tests support this option."); + System.out.println("Some may crash or fail: this is not a bug."); + } + } + + /** + * Looks for leftover running threads, trying to kill them off, + * so they don't fail future tests. + * returns the number of rogue threads that it found. + */ + private static int threadCleanup(String context) { + // educated guess + Thread[] stillRunning = new Thread[Thread.activeCount()+1]; + int threadCount = 0; + int rogueCount = 0; + + if ((threadCount = Thread.enumerate(stillRunning)) > 1) { + while (threadCount == stillRunning.length) { + // truncated response + stillRunning = new Thread[stillRunning.length*2]; + threadCount = Thread.enumerate(stillRunning); + } + + for (int i = 0; i < threadCount; i++) { + Thread t = stillRunning[i]; + + if (t.isAlive() && + !rogueThreads.containsKey(t) && + t != Thread.currentThread() && + // TODO: TimeLimitingCollector starts a thread statically.... WTF?! + !t.getName().equals("TimeLimitedCollector timer thread") && + /* its ok to keep your searcher across test cases */ + (t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) { + System.err.println("WARNING: " + context + " left thread running: " + t); + rogueThreads.put(t, true); + rogueCount++; + if (t.getName().startsWith("LuceneTestCase")) { + System.err.println("PLEASE CLOSE YOUR INDEXSEARCHERS IN YOUR TEST!!!!"); + continue; + } else { + // wait on the thread to die of natural causes + try { + t.join(THREAD_STOP_GRACE_MSEC); + } catch (InterruptedException e) { e.printStackTrace(); } + } + // try to stop the thread: + t.setUncaughtExceptionHandler(null); + Thread.setDefaultUncaughtExceptionHandler(null); + t.interrupt(); + } + } + } + return rogueCount; + } + + /** + * Asserts that FieldCacheSanityChecker does not detect any + * problems with FieldCache.DEFAULT. + *

+ * If any problems are found, they are logged to System.err + * (allong with the msg) when the Assertion is thrown. + *

+ *

+ * This method is called by tearDown after every test method, + * however IndexReaders scoped inside test methods may be garbage + * collected prior to this method being called, causing errors to + * be overlooked. Tests are encouraged to keep their IndexReaders + * scoped at the class level, or to explicitly call this method + * directly in the same scope as the IndexReader. + *

+ * + * @see org.apache.lucene.util.FieldCacheSanityChecker + */ + protected void assertSaneFieldCaches(final String msg) { + final CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries(); + Insanity[] insanity = null; + try { + try { + insanity = FieldCacheSanityChecker.checkSanity(entries); + } catch (RuntimeException e) { + dumpArray(msg + ": FieldCache", entries, System.err); + throw e; + } + + assertEquals(msg + ": Insane FieldCache usage(s) found", + 0, insanity.length); + insanity = null; + } finally { + + // report this in the event of any exception/failure + // if no failure, then insanity will be null anyway + if (null != insanity) { + dumpArray(msg + ": Insane FieldCache usage(s)", insanity, System.err); + } + + } + } + + /** + * Returns a number of at least i + *

+ * The actual number returned will be influenced by whether {@link #TEST_NIGHTLY} + * is active and {@link #RANDOM_MULTIPLIER}, but also with some random fudge. + */ + public static int atLeast(Random random, int i) { + int min = (TEST_NIGHTLY ? 5*i : i) * RANDOM_MULTIPLIER; + int max = min+(min/2); + return _TestUtil.nextInt(random, min, max); + } + + public static int atLeast(int i) { + return atLeast(random, i); + } + + /** + * Returns true if something should happen rarely, + *

+ * The actual number returned will be influenced by whether {@link #TEST_NIGHTLY} + * is active and {@link #RANDOM_MULTIPLIER}. + */ + public static boolean rarely(Random random) { + int p = TEST_NIGHTLY ? 25 : 5; + p += (p * Math.log(RANDOM_MULTIPLIER)); + int min = 100 - Math.min(p, 90); // never more than 90 + return random.nextInt(100) >= min; + } + + public static boolean rarely() { + return rarely(random); + } + + public static boolean usually(Random random) { + return !rarely(random); + } + + public static boolean usually() { + return usually(random); + } + + // These deprecated methods should be removed soon, when all tests using no Epsilon are fixed: + + @Deprecated + static public void assertEquals(double expected, double actual) { + assertEquals(null, expected, actual); + } + + @Deprecated + static public void assertEquals(String message, double expected, double actual) { + assertEquals(message, Double.valueOf(expected), Double.valueOf(actual)); + } + + @Deprecated + static public void assertEquals(float expected, float actual) { + assertEquals(null, expected, actual); + } + + @Deprecated + static public void assertEquals(String message, float expected, float actual) { + assertEquals(message, Float.valueOf(expected), Float.valueOf(actual)); + } + + // Replacement for Assume jUnit class, so we can add a message with explanation: + + private static final class TestIgnoredException extends RuntimeException { + TestIgnoredException(String msg) { + super(msg); + } + + TestIgnoredException(String msg, Throwable t) { + super(msg, t); + } + + @Override + public String getMessage() { + StringBuilder sb = new StringBuilder(super.getMessage()); + if (getCause() != null) + sb.append(" - ").append(getCause()); + return sb.toString(); + } + + // only this one is called by our code, exception is not used outside this class: + @Override + public void printStackTrace(PrintStream s) { + if (getCause() != null) { + s.println(super.toString() + " - Caused by:"); + getCause().printStackTrace(s); + } else { + super.printStackTrace(s); + } + } + } + + public static void assumeTrue(String msg, boolean b) { + Assume.assumeNoException(b ? null : new TestIgnoredException(msg)); + } + + public static void assumeFalse(String msg, boolean b) { + assumeTrue(msg, !b); + } + + public static void assumeNoException(String msg, Exception e) { + Assume.assumeNoException(e == null ? null : new TestIgnoredException(msg, e)); + } + + /** + * Convenience method for logging an iterator. + * + * @param label String logged before/after the items in the iterator + * @param iter Each next() is toString()ed and logged on it's own line. If iter is null this is logged differnetly then an empty iterator. + * @param stream Stream to log messages to. + */ + public static void dumpIterator(String label, Iterator iter, + PrintStream stream) { + stream.println("*** BEGIN " + label + " ***"); + if (null == iter) { + stream.println(" ... NULL ..."); + } else { + while (iter.hasNext()) { + stream.println(iter.next().toString()); + } + } + stream.println("*** END " + label + " ***"); + } + + /** + * Convenience method for logging an array. Wraps the array in an iterator and delegates + * + * @see #dumpIterator(String,Iterator,PrintStream) + */ + public static void dumpArray(String label, Object[] objs, + PrintStream stream) { + Iterator iter = (null == objs) ? null : Arrays.asList(objs).iterator(); + dumpIterator(label, iter, stream); + } + + /** create a new index writer config with random defaults */ + public static IndexWriterConfig newIndexWriterConfig(Version v, Analyzer a) { + return newIndexWriterConfig(random, v, a); + } + + /** create a new index writer config with random defaults using the specified random */ + public static IndexWriterConfig newIndexWriterConfig(Random r, Version v, Analyzer a) { + IndexWriterConfig c = new IndexWriterConfig(v, a); + if (r.nextBoolean()) { + c.setMergePolicy(newTieredMergePolicy()); + } else if (r.nextBoolean()) { + c.setMergePolicy(newLogMergePolicy()); + } else { + c.setMergePolicy(new MockRandomMergePolicy(r)); + } + + if (r.nextBoolean()) { + c.setMergeScheduler(new SerialMergeScheduler()); + } + if (r.nextBoolean()) { + if (rarely(r)) { + // crazy value + c.setMaxBufferedDocs(_TestUtil.nextInt(r, 2, 7)); + } else { + // reasonable value + c.setMaxBufferedDocs(_TestUtil.nextInt(r, 8, 1000)); + } + } + if (r.nextBoolean()) { + if (rarely(r)) { + // crazy value + c.setTermIndexInterval(random.nextBoolean() ? _TestUtil.nextInt(r, 1, 31) : _TestUtil.nextInt(r, 129, 1000)); + } else { + // reasonable value + c.setTermIndexInterval(_TestUtil.nextInt(r, 32, 128)); + } + } + if (r.nextBoolean()) { + c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20)); + } + + if (r.nextBoolean()) { + c.setMergePolicy(new MockRandomMergePolicy(r)); + } else { + c.setMergePolicy(newLogMergePolicy()); + } + + c.setReaderPooling(r.nextBoolean()); + c.setReaderTermsIndexDivisor(_TestUtil.nextInt(r, 1, 4)); + return c; + } + + public static LogMergePolicy newLogMergePolicy() { + return newLogMergePolicy(random); + } + + public static TieredMergePolicy newTieredMergePolicy() { + return newTieredMergePolicy(random); + } + + public static LogMergePolicy newLogMergePolicy(Random r) { + LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy(); + logmp.setUseCompoundFile(r.nextBoolean()); + logmp.setCalibrateSizeByDeletes(r.nextBoolean()); + if (rarely(r)) { + logmp.setMergeFactor(_TestUtil.nextInt(r, 2, 4)); + } else { + logmp.setMergeFactor(_TestUtil.nextInt(r, 5, 50)); + } + return logmp; + } + + public static TieredMergePolicy newTieredMergePolicy(Random r) { + TieredMergePolicy tmp = new TieredMergePolicy(); + if (rarely(r)) { + tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 2, 4)); + tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 2, 4)); + } else { + tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 5, 50)); + tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 5, 50)); + } + tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0); + tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0); + tmp.setExpungeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0); + tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20)); + tmp.setUseCompoundFile(r.nextBoolean()); + tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8); + tmp.setReclaimDeletesWeight(r.nextDouble()*4); + return tmp; + } + + public static LogMergePolicy newLogMergePolicy(boolean useCFS) { + LogMergePolicy logmp = newLogMergePolicy(); + logmp.setUseCompoundFile(useCFS); + return logmp; + } + + public static LogMergePolicy newLogMergePolicy(boolean useCFS, int mergeFactor) { + LogMergePolicy logmp = newLogMergePolicy(); + logmp.setUseCompoundFile(useCFS); + logmp.setMergeFactor(mergeFactor); + return logmp; + } + + public static LogMergePolicy newLogMergePolicy(int mergeFactor) { + LogMergePolicy logmp = newLogMergePolicy(); + logmp.setMergeFactor(mergeFactor); + return logmp; + } + + /** + * Returns a new Directory instance. Use this when the test does not + * care about the specific Directory implementation (most tests). + *

+ * The Directory is wrapped with {@link MockDirectoryWrapper}. + * By default this means it will be picky, such as ensuring that you + * properly close it and all open files in your test. It will emulate + * some features of Windows, such as not allowing open files to be + * overwritten. + */ + public static MockDirectoryWrapper newDirectory() throws IOException { + return newDirectory(random); + } + + /** + * Returns a new Directory instance, using the specified random. + * See {@link #newDirectory()} for more information. + */ + public static MockDirectoryWrapper newDirectory(Random r) throws IOException { + Directory impl = newDirectoryImpl(r, TEST_DIRECTORY); + MockDirectoryWrapper dir = new MockDirectoryWrapper(r, impl); + stores.put(dir, Thread.currentThread().getStackTrace()); + return dir; + } + + /** + * Returns a new Directory instance, with contents copied from the + * provided directory. See {@link #newDirectory()} for more + * information. + */ + public static MockDirectoryWrapper newDirectory(Directory d) throws IOException { + return newDirectory(random, d); + } + + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ + public static MockDirectoryWrapper newFSDirectory(File f) throws IOException { + return newFSDirectory(f, null); + } + + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ + public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException { + String fsdirClass = TEST_DIRECTORY; + if (fsdirClass.equals("random")) { + fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; + } + + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store + fsdirClass = "org.apache.lucene.store." + fsdirClass; + } + + Class clazz; + try { + try { + clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class); + } catch (ClassCastException e) { + // TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random + fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; + + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store + fsdirClass = "org.apache.lucene.store." + fsdirClass; + } + + clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class); + } + MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f)); + if (lf != null) { + dir.setLockFactory(lf); + } + stores.put(dir, Thread.currentThread().getStackTrace()); + return dir; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Returns a new Directory instance, using the specified random + * with contents copied from the provided directory. See + * {@link #newDirectory()} for more information. + */ + public static MockDirectoryWrapper newDirectory(Random r, Directory d) throws IOException { + Directory impl = newDirectoryImpl(r, TEST_DIRECTORY); + for (String file : d.listAll()) { + d.copy(impl, file, file); + } + MockDirectoryWrapper dir = new MockDirectoryWrapper(r, impl); + stores.put(dir, Thread.currentThread().getStackTrace()); + return dir; + } + + /** Returns a new field instance. + * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ + public static Field newField(String name, String value, Index index) { + return newField(random, name, value, index); + } + + /** Returns a new field instance. + * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ + public static Field newField(String name, String value, Store store, Index index) { + return newField(random, name, value, store, index); + } + + /** + * Returns a new Field instance. Use this when the test does not + * care about some specific field settings (most tests) + *

    + *
  • If the store value is set to Store.NO, sometimes the field will be randomly stored. + *
  • More term vector data than you ask for might be indexed, for example if you choose YES + * it might index term vectors with offsets too. + *
+ */ + public static Field newField(String name, String value, Store store, Index index, TermVector tv) { + return newField(random, name, value, store, index, tv); + } + + /** Returns a new field instance, using the specified random. + * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ + public static Field newField(Random random, String name, String value, Index index) { + return newField(random, name, value, Store.NO, index); + } + + /** Returns a new field instance, using the specified random. + * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ + public static Field newField(Random random, String name, String value, Store store, Index index) { + return newField(random, name, value, store, index, TermVector.NO); + } + + /** Returns a new field instance, using the specified random. + * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ + public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) { + if (usually(random)) { + // most of the time, don't modify the params + return new Field(name, value, store, index, tv); + } + + if (!index.isIndexed()) + return new Field(name, value, store, index, tv); + + if (!store.isStored() && random.nextBoolean()) + store = Store.YES; // randomly store it + + tv = randomTVSetting(random, tv); + + return new Field(name, value, store, index, tv); + } + + static final TermVector tvSettings[] = { + TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, + TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS + }; + + private static TermVector randomTVSetting(Random random, TermVector minimum) { + switch(minimum) { + case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)]; + case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)]; + case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS + : TermVector.WITH_POSITIONS_OFFSETS; + case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS + : TermVector.WITH_POSITIONS_OFFSETS; + default: return TermVector.WITH_POSITIONS_OFFSETS; + } + } + + /** return a random Locale from the available locales on the system */ + public static Locale randomLocale(Random random) { + Locale locales[] = Locale.getAvailableLocales(); + return locales[random.nextInt(locales.length)]; + } + + /** return a random TimeZone from the available timezones on the system */ + public static TimeZone randomTimeZone(Random random) { + String tzIds[] = TimeZone.getAvailableIDs(); + return TimeZone.getTimeZone(tzIds[random.nextInt(tzIds.length)]); + } + + /** return a Locale object equivalent to its programmatic name */ + public static Locale localeForName(String localeName) { + String elements[] = localeName.split("\\_"); + switch(elements.length) { + case 3: return new Locale(elements[0], elements[1], elements[2]); + case 2: return new Locale(elements[0], elements[1]); + case 1: return new Locale(elements[0]); + default: throw new IllegalArgumentException("Invalid Locale: " + localeName); + } + } + + private static final String FS_DIRECTORIES[] = { + "SimpleFSDirectory", + "NIOFSDirectory", + "MMapDirectory" + }; + + private static final String CORE_DIRECTORIES[] = { + "RAMDirectory", + FS_DIRECTORIES[0], FS_DIRECTORIES[1], FS_DIRECTORIES[2] + }; + + public static String randomDirectory(Random random) { + if (rarely(random)) { + return CORE_DIRECTORIES[random.nextInt(CORE_DIRECTORIES.length)]; + } else { + return "RAMDirectory"; + } + } + + private static Directory newFSDirectoryImpl( + Class clazz, File file) + throws IOException { + FSDirectory d = null; + try { + // Assuming every FSDirectory has a ctor(File), but not all may take a + // LockFactory too, so setting it afterwards. + Constructor ctor = clazz.getConstructor(File.class); + d = ctor.newInstance(file); + } catch (Exception e) { + d = FSDirectory.open(file); + } + return d; + } + + /** Registers a temp file that will be deleted when tests are done. */ + public static void registerTempFile(File tmpFile) { + tempDirs.put(tmpFile.getAbsoluteFile(), Thread.currentThread().getStackTrace()); + } + + static Directory newDirectoryImpl(Random random, String clazzName) { + if (clazzName.equals("random")) + clazzName = randomDirectory(random); + if (clazzName.indexOf(".") == -1) // if not fully qualified, assume .store + clazzName = "org.apache.lucene.store." + clazzName; + try { + final Class clazz = Class.forName(clazzName).asSubclass(Directory.class); + // If it is a FSDirectory type, try its ctor(File) + if (FSDirectory.class.isAssignableFrom(clazz)) { + final File tmpFile = _TestUtil.createTempFile("test", "tmp", TEMP_DIR); + tmpFile.delete(); + tmpFile.mkdir(); + registerTempFile(tmpFile); + return newFSDirectoryImpl(clazz.asSubclass(FSDirectory.class), tmpFile); + } + + // try empty ctor + return clazz.newInstance(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** create a new searcher over the reader. + * This searcher might randomly use threads. */ + public static IndexSearcher newSearcher(IndexReader r) throws IOException { + return newSearcher(r, true); + } + + /** create a new searcher over the reader. + * This searcher might randomly use threads. + * if maybeWrap is true, this searcher might wrap the reader + * with one that returns null for getSequentialSubReaders. + */ + public static IndexSearcher newSearcher(IndexReader r, boolean maybeWrap) throws IOException { + if (random.nextBoolean()) { + if (maybeWrap && rarely()) { + r = new SlowMultiReaderWrapper(r); + } + return new AssertingIndexSearcher(r); + } else { + int threads = 0; + final ExecutorService ex = (random.nextBoolean()) ? null + : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), + new NamedThreadFactory("LuceneTestCase")); + if (ex != null && VERBOSE) { + System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads"); + } + return new AssertingIndexSearcher(r, ex) { + @Override + public void close() throws IOException { + super.close(); + shutdownExecutorService(ex); + } + }; + } + } + + static void shutdownExecutorService(ExecutorService ex) { + if (ex != null) { + ex.shutdown(); + try { + ex.awaitTermination(1000, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + + public String getName() { + return this.name; + } + + /** Gets a resource from the classpath as {@link File}. This method should only be used, + * if a real file is needed. To get a stream, code should prefer + * {@link Class#getResourceAsStream} using {@code this.getClass()}. + */ + + protected File getDataFile(String name) throws IOException { + try { + return new File(this.getClass().getResource(name).toURI()); + } catch (Exception e) { + throw new IOException("Cannot find resource: " + name); + } + } + + // We get here from InterceptTestCaseEvents on the 'failed' event.... + public void reportAdditionalFailureInfo() { + System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + + " -Dtestmethod=" + getName() + " -Dtests.seed=" + new TwoLongs(staticSeed, seed) + + reproduceWithExtraParams()); + } + + // extra params that were overridden needed to reproduce the command + private String reproduceWithExtraParams() { + StringBuilder sb = new StringBuilder(); + if (!TEST_LOCALE.equals("random")) sb.append(" -Dtests.locale=").append(TEST_LOCALE); + if (!TEST_TIMEZONE.equals("random")) sb.append(" -Dtests.timezone=").append(TEST_TIMEZONE); + if (!TEST_DIRECTORY.equals("random")) sb.append(" -Dtests.directory=").append(TEST_DIRECTORY); + if (RANDOM_MULTIPLIER > 1) sb.append(" -Dtests.multiplier=").append(RANDOM_MULTIPLIER); + if (TEST_NIGHTLY) sb.append(" -Dtests.nightly=true"); + return sb.toString(); + } + + // recorded seed: for beforeClass + private static long staticSeed; + // seed for individual test methods, changed in @before + private long seed; + + private static final Random seedRand = new Random(); + protected static final Random random = new Random(0); + + private String name = ""; + + /** + * Annotation for tests that should only be run during nightly builds. + */ + @Documented + @Inherited + @Retention(RetentionPolicy.RUNTIME) + public @interface Nightly {} + + /** optionally filters the tests to be run by TEST_METHOD */ + public static class LuceneTestCaseRunner extends BlockJUnit4ClassRunner { + private List testMethods; + + @Override + protected List computeTestMethods() { + if (testMethods != null) + return testMethods; + testClassesRun.add(getTestClass().getJavaClass().getSimpleName()); + testMethods = new ArrayList(); + for (Method m : getTestClass().getJavaClass().getMethods()) { + // check if the current test's class has methods annotated with @Ignore + final Ignore ignored = m.getAnnotation(Ignore.class); + if (ignored != null && !m.getName().equals("alwaysIgnoredTestMethod")) { + System.err.println("NOTE: Ignoring test method '" + m.getName() + "': " + ignored.value()); + } + // add methods starting with "test" + final int mod = m.getModifiers(); + if (m.getAnnotation(Test.class) != null || + (m.getName().startsWith("test") && + !Modifier.isAbstract(mod) && + m.getParameterTypes().length == 0 && + m.getReturnType() == Void.TYPE)) + { + if (Modifier.isStatic(mod)) + throw new RuntimeException("Test methods must not be static."); + testMethods.add(new FrameworkMethod(m)); + } + } + + if (testMethods.isEmpty()) { + throw new RuntimeException("No runnable methods!"); + } + + if (TEST_NIGHTLY == false) { + if (getTestClass().getJavaClass().isAnnotationPresent(Nightly.class)) { + /* the test class is annotated with nightly, remove all methods */ + String className = getTestClass().getJavaClass().getSimpleName(); + System.err.println("NOTE: Ignoring nightly-only test class '" + className + "'"); + testMethods.clear(); + } else { + /* remove all nightly-only methods */ + for (int i = 0; i < testMethods.size(); i++) { + final FrameworkMethod m = testMethods.get(i); + if (m.getAnnotation(Nightly.class) != null) { + System.err.println("NOTE: Ignoring nightly-only test method '" + m.getName() + "'"); + testMethods.remove(i--); + } + } + } + /* dodge a possible "no-runnable methods" exception by adding a fake ignored test */ + if (testMethods.isEmpty()) { + try { + testMethods.add(new FrameworkMethod(LuceneTestCase.class.getMethod("alwaysIgnoredTestMethod"))); + } catch (Exception e) { throw new RuntimeException(e); } + } + } + return testMethods; + } + + @Override + protected void runChild(FrameworkMethod arg0, RunNotifier arg1) { + if (VERBOSE) { + System.out.println("\nNOTE: running test " + arg0.getName()); + } + + // only print iteration info if the user requested more than one iterations + final boolean verbose = VERBOSE && TEST_ITER > 1; + + final int currentIter[] = new int[1]; + arg1.addListener(new RunListener() { + @Override + public void testFailure(Failure failure) throws Exception { + if (verbose) { + System.out.println("\nNOTE: iteration " + currentIter[0] + " failed! "); + } + } + }); + for (int i = 0; i < TEST_ITER; i++) { + currentIter[0] = i; + if (verbose) { + System.out.println("\nNOTE: running iter=" + (1+i) + " of " + TEST_ITER); + } + super.runChild(arg0, arg1); + if (testsFailed) { + if (i >= TEST_ITER_MIN - 1) { // XXX is this still off-by-one? + break; + } + } + } + } + + public LuceneTestCaseRunner(Class clazz) throws InitializationError { + super(clazz); + Filter f = new Filter() { + + @Override + public String describe() { return "filters according to TEST_METHOD"; } + + @Override + public boolean shouldRun(Description d) { + return TEST_METHOD == null || d.getMethodName().equals(TEST_METHOD); + } + }; + + try { + f.apply(this); + } catch (NoTestsRemainException e) { + throw new RuntimeException(e); + } + } + } + + @Ignore("just a hack") + public final void alwaysIgnoredTestMethod() {} +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java new file mode 100644 index 0000000..fcded42 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java @@ -0,0 +1,149 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.IndexOutput; + +public class ThrottledIndexOutput extends IndexOutput { + public static final int DEFAULT_MIN_WRITTEN_BYTES = 1024; + private final int bytesPerSecond; + private IndexOutput delegate; + private long flushDelayMillis; + private long closeDelayMillis; + private long seekDelayMillis; + private long pendingBytes; + private long minBytesWritten; + private long timeElapsed; + private final byte[] bytes = new byte[1]; + + public ThrottledIndexOutput newFromDelegate(IndexOutput output) { + return new ThrottledIndexOutput(bytesPerSecond, flushDelayMillis, + closeDelayMillis, seekDelayMillis, minBytesWritten, output); + } + + public ThrottledIndexOutput(int bytesPerSecond, long delayInMillis, + IndexOutput delegate) { + this(bytesPerSecond, delayInMillis, delayInMillis, delayInMillis, + DEFAULT_MIN_WRITTEN_BYTES, delegate); + } + + public ThrottledIndexOutput(int bytesPerSecond, long delays, + int minBytesWritten, IndexOutput delegate) { + this(bytesPerSecond, delays, delays, delays, minBytesWritten, delegate); + } + + public static final int mBitsToBytes(int mbits) { + return mbits * 125000; + } + + public ThrottledIndexOutput(int bytesPerSecond, long flushDelayMillis, + long closeDelayMillis, long seekDelayMillis, long minBytesWritten, + IndexOutput delegate) { + assert bytesPerSecond > 0; + this.delegate = delegate; + this.bytesPerSecond = bytesPerSecond; + this.flushDelayMillis = flushDelayMillis; + this.closeDelayMillis = closeDelayMillis; + this.seekDelayMillis = seekDelayMillis; + this.minBytesWritten = minBytesWritten; + } + + @Override + public void flush() throws IOException { + sleep(flushDelayMillis); + delegate.flush(); + } + + @Override + public void close() throws IOException { + try { + sleep(closeDelayMillis + getDelay(true)); + } finally { + delegate.close(); + } + } + + @Override + public long getFilePointer() { + return delegate.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + sleep(seekDelayMillis); + delegate.seek(pos); + } + + @Override + public long length() throws IOException { + return delegate.length(); + } + + @Override + public void writeByte(byte b) throws IOException { + bytes[0] = b; + writeBytes(bytes, 0, 1); + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + final long before = System.nanoTime(); + delegate.writeBytes(b, offset, length); + timeElapsed += System.nanoTime() - before; + pendingBytes += length; + sleep(getDelay(false)); + + } + + protected long getDelay(boolean closing) { + if (pendingBytes > 0 && (closing || pendingBytes > minBytesWritten)) { + long actualBps = (timeElapsed / pendingBytes) * 1000000000l; // nano to sec + if (actualBps > bytesPerSecond) { + long expected = (pendingBytes * 1000l / bytesPerSecond) ; + final long delay = expected - (timeElapsed / 1000000l) ; + pendingBytes = 0; + timeElapsed = 0; + return delay; + } + } + return 0; + + } + + private static final void sleep(long ms) { + if (ms <= 0) + return; + try { + Thread.sleep(ms); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } + } + + @Override + public void setLength(long length) throws IOException { + delegate.setLength(length); + } + + @Override + public void copyBytes(DataInput input, long numBytes) throws IOException { + delegate.copyBytes(input, numBytes); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/_TestUtil.java b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/_TestUtil.java new file mode 100644 index 0000000..0cf26a2 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test-framework/org/apache/lucene/util/_TestUtil.java @@ -0,0 +1,443 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PrintStream; +import java.lang.reflect.Method; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import org.junit.Assert; + +import org.apache.lucene.index.CheckIndex; +import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; + +public class _TestUtil { + + /** Returns temp dir, based on String arg in its name; + * does not create the directory. */ + public static File getTempDir(String desc) { + try { + File f = createTempFile(desc, "tmp", LuceneTestCase.TEMP_DIR); + f.delete(); + LuceneTestCase.registerTempFile(f); + return f; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Deletes a directory and everything underneath it. + */ + public static void rmDir(File dir) throws IOException { + if (dir.exists()) { + for (File f : dir.listFiles()) { + if (f.isDirectory()) { + rmDir(f); + } else { + if (!f.delete()) { + throw new IOException("could not delete " + f); + } + } + } + if (!dir.delete()) { + throw new IOException("could not delete " + dir); + } + } + } + + /** + * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first + */ + public static void unzip(File zipName, File destDir) throws IOException { + + ZipFile zipFile = new ZipFile(zipName); + + Enumeration entries = zipFile.entries(); + + rmDir(destDir); + + destDir.mkdir(); + LuceneTestCase.registerTempFile(destDir); + + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + + InputStream in = zipFile.getInputStream(entry); + File targetFile = new File(destDir, entry.getName()); + if (entry.isDirectory()) { + // allow unzipping with directory structure + targetFile.mkdirs(); + } else { + if (targetFile.getParentFile()!=null) { + // be on the safe side: do not rely on that directories are always extracted + // before their children (although this makes sense, but is it guaranteed?) + targetFile.getParentFile().mkdirs(); + } + OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile)); + + byte[] buffer = new byte[8192]; + int len; + while((len = in.read(buffer)) >= 0) { + out.write(buffer, 0, len); + } + + in.close(); + out.close(); + } + } + + zipFile.close(); + } + + public static void syncConcurrentMerges(IndexWriter writer) { + syncConcurrentMerges(writer.getConfig().getMergeScheduler()); + } + + public static void syncConcurrentMerges(MergeScheduler ms) { + if (ms instanceof ConcurrentMergeScheduler) + ((ConcurrentMergeScheduler) ms).sync(); + } + + /** This runs the CheckIndex tool on the index in. If any + * issues are hit, a RuntimeException is thrown; else, + * true is returned. */ + public static CheckIndex.Status checkIndex(Directory dir) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + + CheckIndex checker = new CheckIndex(dir); + checker.setInfoStream(new PrintStream(bos)); + CheckIndex.Status indexStatus = checker.checkIndex(); + if (indexStatus == null || indexStatus.clean == false) { + System.out.println("CheckIndex failed"); + System.out.println(bos.toString()); + throw new RuntimeException("CheckIndex failed"); + } else { + return indexStatus; + } + } + + /** Use only for testing. + * @deprecated -- in 3.0 we can use Arrays.toString + * instead */ + @Deprecated + public static String arrayToString(int[] array) { + StringBuilder buf = new StringBuilder(); + buf.append("["); + for(int i=0;i 0) { + buf.append(" "); + } + buf.append(array[i]); + } + buf.append("]"); + return buf.toString(); + } + + /** Use only for testing. + * @deprecated -- in 3.0 we can use Arrays.toString + * instead */ + @Deprecated + public static String arrayToString(Object[] array) { + StringBuilder buf = new StringBuilder(); + buf.append("["); + for(int i=0;i 0) { + buf.append(" "); + } + buf.append(array[i]); + } + buf.append("]"); + return buf.toString(); + } + + public static String randomSimpleString(Random r) { + final int end = r.nextInt(10); + if (end == 0) { + // allow 0 length + return ""; + } + final char[] buffer = new char[end]; + for (int i = 0; i < end; i++) { + buffer[i] = (char) _TestUtil.nextInt(r, 97, 102); + } + return new String(buffer, 0, end); + } + + /** Returns random string, including full unicode range. */ + public static String randomUnicodeString(Random r) { + return randomUnicodeString(r, 20); + } + + /** + * Returns a random string up to a certain length. + */ + public static String randomUnicodeString(Random r, int maxLength) { + final int end = r.nextInt(maxLength); + if (end == 0) { + // allow 0 length + return ""; + } + final char[] buffer = new char[end]; + randomFixedLengthUnicodeString(r, buffer, 0, buffer.length); + return new String(buffer, 0, end); + } + + /** + * Fills provided char[] with valid random unicode code + * unit sequence. + */ + public static void randomFixedLengthUnicodeString(Random random, char[] chars, int offset, int length) { + int i = offset; + final int end = offset + length; + while(i < end) { + final int t = random.nextInt(5); + if (0 == t && i < length - 1) { + // Make a surrogate pair + // High surrogate + chars[i++] = (char) nextInt(random, 0xd800, 0xdbff); + // Low surrogate + chars[i++] = (char) nextInt(random, 0xdc00, 0xdfff); + } else if (t <= 1) { + chars[i++] = (char) random.nextInt(0x80); + } else if (2 == t) { + chars[i++] = (char) nextInt(random, 0x80, 0x800); + } else if (3 == t) { + chars[i++] = (char) nextInt(random, 0x800, 0xd7ff); + } else if (4 == t) { + chars[i++] = (char) nextInt(random, 0xe000, 0xfffe); + } + } + } + + private static final int[] blockStarts = { + 0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400, + 0x0500, 0x0530, 0x0590, 0x0600, 0x0700, 0x0750, 0x0780, 0x07C0, 0x0800, + 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, 0x0C00, 0x0C80, 0x0D00, + 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x1380, + 0x13A0, 0x1400, 0x1680, 0x16A0, 0x1700, 0x1720, 0x1740, 0x1760, 0x1780, + 0x1800, 0x18B0, 0x1900, 0x1950, 0x1980, 0x19E0, 0x1A00, 0x1A20, 0x1B00, + 0x1B80, 0x1C00, 0x1C50, 0x1CD0, 0x1D00, 0x1D80, 0x1DC0, 0x1E00, 0x1F00, + 0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, 0x2190, 0x2200, 0x2300, + 0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, 0x2600, 0x2700, 0x27C0, + 0x27F0, 0x2800, 0x2900, 0x2980, 0x2A00, 0x2B00, 0x2C00, 0x2C60, 0x2C80, + 0x2D00, 0x2D30, 0x2D80, 0x2DE0, 0x2E00, 0x2E80, 0x2F00, 0x2FF0, 0x3000, + 0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, 0x31A0, 0x31C0, 0x31F0, 0x3200, + 0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, 0xA4D0, 0xA500, 0xA640, + 0xA6A0, 0xA700, 0xA720, 0xA800, 0xA830, 0xA840, 0xA880, 0xA8E0, 0xA900, + 0xA930, 0xA960, 0xA980, 0xAA00, 0xAA60, 0xAA80, 0xABC0, 0xAC00, 0xD7B0, + 0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, 0xFE10, + 0xFE20, 0xFE30, 0xFE50, 0xFE70, 0xFF00, 0xFFF0, + 0x10000, 0x10080, 0x10100, 0x10140, 0x10190, 0x101D0, 0x10280, 0x102A0, + 0x10300, 0x10330, 0x10380, 0x103A0, 0x10400, 0x10450, 0x10480, 0x10800, + 0x10840, 0x10900, 0x10920, 0x10A00, 0x10A60, 0x10B00, 0x10B40, 0x10B60, + 0x10C00, 0x10E60, 0x11080, 0x12000, 0x12400, 0x13000, 0x1D000, 0x1D100, + 0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030, 0x1F100, 0x1F200, + 0x20000, 0x2A700, 0x2F800, 0xE0000, 0xE0100, 0xF0000, 0x100000 + }; + + private static final int[] blockEnds = { + 0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF, + 0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F, + 0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF, 0x0C7F, 0x0CFF, 0x0D7F, + 0x0DFF, 0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F, + 0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F, 0x17FF, + 0x18AF, 0x18FF, 0x194F, 0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AAF, 0x1B7F, + 0x1BBF, 0x1C4F, 0x1C7F, 0x1CFF, 0x1D7F, 0x1DBF, 0x1DFF, 0x1EFF, 0x1FFF, + 0x206F, 0x209F, 0x20CF, 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF, 0x23FF, + 0x243F, 0x245F, 0x24FF, 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF, + 0x27FF, 0x28FF, 0x297F, 0x29FF, 0x2AFF, 0x2BFF, 0x2C5F, 0x2C7F, 0x2CFF, + 0x2D2F, 0x2D7F, 0x2DDF, 0x2DFF, 0x2E7F, 0x2EFF, 0x2FDF, 0x2FFF, 0x303F, + 0x309F, 0x30FF, 0x312F, 0x318F, 0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF, + 0x33FF, 0x4DBF, 0x4DFF, 0x9FFF, 0xA48F, 0xA4CF, 0xA4FF, 0xA63F, 0xA69F, + 0xA6FF, 0xA71F, 0xA7FF, 0xA82F, 0xA83F, 0xA87F, 0xA8DF, 0xA8FF, 0xA92F, + 0xA95F, 0xA97F, 0xA9DF, 0xAA5F, 0xAA7F, 0xAADF, 0xABFF, 0xD7AF, 0xD7FF, + 0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F, 0xFE1F, + 0xFE2F, 0xFE4F, 0xFE6F, 0xFEFF, 0xFFEF, 0xFFFE, /* avoid 0xFFFF on 3.x */ + 0x1007F, 0x100FF, 0x1013F, 0x1018F, 0x101CF, 0x101FF, 0x1029F, 0x102DF, + 0x1032F, 0x1034F, 0x1039F, 0x103DF, 0x1044F, 0x1047F, 0x104AF, 0x1083F, + 0x1085F, 0x1091F, 0x1093F, 0x10A5F, 0x10A7F, 0x10B3F, 0x10B5F, 0x10B7F, + 0x10C4F, 0x10E7F, 0x110CF, 0x123FF, 0x1247F, 0x1342F, 0x1D0FF, 0x1D1FF, + 0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F, 0x1F1FF, 0x1F2FF, + 0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF + }; + + /** Returns random string, all codepoints within the same unicode block. */ + public static String randomRealisticUnicodeString(Random r) { + return randomRealisticUnicodeString(r, 20); + } + + /** Returns random string, all codepoints within the same unicode block. */ + public static String randomRealisticUnicodeString(Random r, int maxLength) { + final int end = r.nextInt(maxLength); + final int block = r.nextInt(blockStarts.length); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < end; i++) + sb.appendCodePoint(nextInt(r, blockStarts[block], blockEnds[block])); + return sb.toString(); + } + + /** start and end are BOTH inclusive */ + public static int nextInt(Random r, int start, int end) { + return start + r.nextInt(end-start+1); + } + + public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException { + String[] files = dir.listAll(); + if (files.length > 1 || (files.length == 1 && !files[0].equals("write.lock"))) { + return true; + } else { + return false; + } + } + + /** just tries to configure things to keep the open file + * count lowish */ + public static void reduceOpenFiles(IndexWriter w) { + // keep number of open files lowish + MergePolicy mp = w.getConfig().getMergePolicy(); + if (mp instanceof LogMergePolicy) { + LogMergePolicy lmp = (LogMergePolicy) mp; + lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); + } else if (mp instanceof TieredMergePolicy) { + TieredMergePolicy tmp = (TieredMergePolicy) mp; + tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce())); + tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier())); + } + + MergeScheduler ms = w.getConfig().getMergeScheduler(); + if (ms instanceof ConcurrentMergeScheduler) { + ((ConcurrentMergeScheduler) ms).setMaxThreadCount(2); + ((ConcurrentMergeScheduler) ms).setMaxMergeCount(3); + } + } + + /** Checks some basic behaviour of an AttributeImpl + * @param reflectedValues contains a map with "AttributeClass#key" as values + */ + public static void assertAttributeReflection(final AttributeImpl att, Map reflectedValues) { + final Map map = new HashMap(); + att.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + map.put(attClass.getName() + '#' + key, value); + } + }); + Assert.assertEquals("Reflection does not produce same map", reflectedValues, map); + } + + public static void keepFullyDeletedSegments(IndexWriter w) { + try { + // Carefully invoke what is a package-private (test + // only, internal) method on IndexWriter: + Method m = IndexWriter.class.getDeclaredMethod("keepFullyDeletedSegments"); + m.setAccessible(true); + m.invoke(w); + } catch (Exception e) { + // Should not happen? + throw new RuntimeException(e); + } + } + + /** + * insecure, fast version of File.createTempFile + * uses Random instead of SecureRandom. + */ + public static File createTempFile(String prefix, String suffix, File directory) + throws IOException { + // Force a prefix null check first + if (prefix.length() < 3) { + throw new IllegalArgumentException("prefix must be 3"); + } + String newSuffix = suffix == null ? ".tmp" : suffix; + File result; + do { + result = genTempFile(prefix, newSuffix, directory); + } while (!result.createNewFile()); + return result; + } + + /* Temp file counter */ + private static int counter = 0; + + /* identify for differnt VM processes */ + private static int counterBase = 0; + + private static class TempFileLocker {}; + private static TempFileLocker tempFileLocker = new TempFileLocker(); + + private static File genTempFile(String prefix, String suffix, File directory) { + int identify = 0; + + synchronized (tempFileLocker) { + if (counter == 0) { + int newInt = new Random().nextInt(); + counter = ((newInt / 65535) & 0xFFFF) + 0x2710; + counterBase = counter; + } + identify = counter++; + } + + StringBuilder newName = new StringBuilder(); + newName.append(prefix); + newName.append(counterBase); + newName.append(identify); + newName.append(suffix); + return new File(directory, newName.toString()); + } + + public static void assertEquals(TopDocs expected, TopDocs actual) { + Assert.assertEquals("wrong total hits", expected.totalHits, actual.totalHits); + Assert.assertEquals("wrong maxScore", expected.getMaxScore(), actual.getMaxScore(), 0.0); + Assert.assertEquals("wrong hit count", expected.scoreDocs.length, actual.scoreDocs.length); + for(int hitIDX=0;hitIDX + + + Apache Lucene Test Framework API + + +

+ The Lucene Test Framework is used by Lucene as the basis for its tests. + The framework can also be used for testing third-party code that uses + the Lucene API. +

+ + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestAssertions.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestAssertions.java new file mode 100644 index 0000000..ce51fd3 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestAssertions.java @@ -0,0 +1,108 @@ +package org.apache.lucene; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; + +public class TestAssertions extends LuceneTestCase { + + public void testBasics() { + try { + assert Boolean.FALSE.booleanValue(); + fail("assertions are not enabled!"); + } catch (AssertionError e) { + assert Boolean.TRUE.booleanValue(); + } + } + + static class TestAnalyzer1 extends Analyzer { + @Override + public final TokenStream tokenStream(String s, Reader r) { return null; } + @Override + public final TokenStream reusableTokenStream(String s, Reader r) { return null; } + } + + static final class TestAnalyzer2 extends Analyzer { + @Override + public TokenStream tokenStream(String s, Reader r) { return null; } + @Override + public TokenStream reusableTokenStream(String s, Reader r) { return null; } + } + + static class TestAnalyzer3 extends Analyzer { + @Override + public TokenStream tokenStream(String s, Reader r) { return null; } + @Override + public TokenStream reusableTokenStream(String s, Reader r) { return null; } + } + + static class TestAnalyzer4 extends Analyzer { + @Override + public final TokenStream tokenStream(String s, Reader r) { return null; } + @Override + public TokenStream reusableTokenStream(String s, Reader r) { return null; } + } + + static class TestTokenStream1 extends TokenStream { + @Override + public final boolean incrementToken() { return false; } + } + + static final class TestTokenStream2 extends TokenStream { + @Override + public boolean incrementToken() { return false; } + } + + static class TestTokenStream3 extends TokenStream { + @Override + public boolean incrementToken() { return false; } + } + + public void testTokenStreams() { + new TestAnalyzer1(); + + new TestAnalyzer2(); + + try { + new TestAnalyzer3(); + fail("TestAnalyzer3 should fail assertion"); + } catch (AssertionError e) { + } + + try { + new TestAnalyzer4(); + fail("TestAnalyzer4 should fail assertion"); + } catch (AssertionError e) { + } + + new TestTokenStream1(); + + new TestTokenStream2(); + + try { + new TestTokenStream3(); + fail("TestTokenStream3 should fail assertion"); + } catch (AssertionError e) { + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestDemo.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestDemo.java new file mode 100644 index 0000000..3414997 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestDemo.java @@ -0,0 +1,79 @@ +package org.apache.lucene; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * A very simple demo used in the API documentation (src/java/overview.html). + * + * Please try to keep src/java/overview.html up-to-date when making changes + * to this class. + */ +public class TestDemo extends LuceneTestCase { + + public void testDemo() throws IOException, ParseException { + Analyzer analyzer = new MockAnalyzer(random); + + // Store the index in memory: + Directory directory = newDirectory(); + // To store an index on disk, use this instead: + //Directory directory = FSDirectory.open("/tmp/testindex"); + RandomIndexWriter iwriter = new RandomIndexWriter(random, directory, analyzer); + iwriter.w.setInfoStream(VERBOSE ? System.out : null); + Document doc = new Document(); + String text = "This is the text to be indexed."; + doc.add(newField("fieldname", text, Field.Store.YES, + Field.Index.ANALYZED)); + iwriter.addDocument(doc); + iwriter.close(); + + // Now search the index: + IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true + // Parse a simple query that searches for "text": + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fieldname", analyzer); + Query query = parser.parse("text"); + TopDocs hits = isearcher.search(query, null, 1); + assertEquals(1, hits.totalHits); + // Iterate through the results: + for (int i = 0; i < hits.scoreDocs.length; i++) { + Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); + assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); + } + + // Test simple phrase query + query = parser.parse("\"to be\""); + assertEquals(1, isearcher.search(query, null, 1).totalHits); + + isearcher.close(); + directory.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestMergeSchedulerExternal.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestMergeSchedulerExternal.java new file mode 100644 index 0000000..e477ae9 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestMergeSchedulerExternal.java @@ -0,0 +1,149 @@ +package org.apache.lucene; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogMergePolicy; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.MergePolicy.OneMerge; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +/** + * Holds tests cases to verify external APIs are accessible + * while not being in org.apache.lucene.index package. + */ +public class TestMergeSchedulerExternal extends LuceneTestCase { + + volatile boolean mergeCalled; + volatile boolean mergeThreadCreated; + volatile boolean excCalled; + + private class MyMergeScheduler extends ConcurrentMergeScheduler { + + private class MyMergeThread extends ConcurrentMergeScheduler.MergeThread { + public MyMergeThread(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException { + super(writer, merge); + mergeThreadCreated = true; + } + } + + @Override + protected MergeThread getMergeThread(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException { + MergeThread thread = new MyMergeThread(writer, merge); + thread.setThreadPriority(getMergeThreadPriority()); + thread.setDaemon(true); + thread.setName("MyMergeThread"); + return thread; + } + + @Override + protected void handleMergeException(Throwable t) { + excCalled = true; + } + + @Override + protected void doMerge(MergePolicy.OneMerge merge) throws IOException { + mergeCalled = true; + super.doMerge(merge); + } + } + + private static class FailOnlyOnMerge extends MockDirectoryWrapper.Failure { + @Override + public void eval(MockDirectoryWrapper dir) throws IOException { + StackTraceElement[] trace = new Exception().getStackTrace(); + for (int i = 0; i < trace.length; i++) { + if ("doMerge".equals(trace[i].getMethodName())) + throw new IOException("now failing during merge"); + } + } + } + + public void testSubclassConcurrentMergeScheduler() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + dir.failOn(new FailOnlyOnMerge()); + + Document doc = new Document(); + Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(idField); + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergeScheduler(new MyMergeScheduler()) + .setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .setMergePolicy(newLogMergePolicy())); + LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy(); + logMP.setMergeFactor(10); + for(int i=0;i<20;i++) + writer.addDocument(doc); + + ((MyMergeScheduler) writer.getConfig().getMergeScheduler()).sync(); + writer.close(); + + assertTrue(mergeThreadCreated); + assertTrue(mergeCalled); + assertTrue(excCalled); + dir.close(); + } + + private static class ReportingMergeScheduler extends MergeScheduler { + + @Override + public void merge(IndexWriter writer) throws CorruptIndexException, IOException { + OneMerge merge = null; + while ((merge = writer.getNextMerge()) != null) { + if (VERBOSE) { + System.out.println("executing merge " + merge.segString(writer.getDirectory())); + } + writer.merge(merge); + } + } + + @Override + public void close() throws CorruptIndexException, IOException {} + + } + + public void testCustomMergeScheduler() throws Exception { + // we don't really need to execute anything, just to make sure the custom MS + // compiles. But ensure that it can be used as well, e.g., no other hidden + // dependencies or something. Therefore, don't use any random API ! + Directory dir = new RAMDirectory(); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); + conf.setMergeScheduler(new ReportingMergeScheduler()); + IndexWriter writer = new IndexWriter(dir, conf); + writer.addDocument(new Document()); + writer.commit(); // trigger flush + writer.addDocument(new Document()); + writer.commit(); // trigger flush + writer.optimize(); + writer.close(); + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestSearch.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestSearch.java new file mode 100644 index 0000000..7697878 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestSearch.java @@ -0,0 +1,144 @@ +package org.apache.lucene; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.GregorianCalendar; +import java.util.Random; +import java.io.PrintWriter; +import java.io.StringWriter; + +import org.apache.lucene.util.LuceneTestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; + +import org.apache.lucene.store.*; +import org.apache.lucene.document.*; +import org.apache.lucene.analysis.*; +import org.apache.lucene.index.*; +import org.apache.lucene.search.*; +import org.apache.lucene.queryParser.*; + +/** JUnit adaptation of an older test case SearchTest. */ +public class TestSearch extends LuceneTestCase { + + /** Main for running test case by itself. */ + public static void main(String args[]) { + TestRunner.run (new TestSuite(TestSearch.class)); + } + + /** This test performs a number of searches. It also compares output + * of searches using multi-file index segments with single-file + * index segments. + * + * TODO: someone should check that the results of the searches are + * still correct by adding assert statements. Right now, the test + * passes if the results are the same between multi-file and + * single-file formats, even if the results are wrong. + */ + public void testSearch() throws Exception { + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw, true); + doTestSearch(random, pw, false); + pw.close(); + sw.close(); + String multiFileOutput = sw.getBuffer().toString(); + //System.out.println(multiFileOutput); + + sw = new StringWriter(); + pw = new PrintWriter(sw, true); + doTestSearch(random, pw, true); + pw.close(); + sw.close(); + String singleFileOutput = sw.getBuffer().toString(); + + assertEquals(multiFileOutput, singleFileOutput); + } + + + private void doTestSearch(Random random, PrintWriter out, boolean useCompoundFile) + throws Exception { + Directory directory = newDirectory(); + Analyzer analyzer = new MockAnalyzer(random); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile); + } + + IndexWriter writer = new IndexWriter(directory, conf); + + String[] docs = { + "a b c d e", + "a b c d e a b c d e", + "a b c d e f g h i j", + "a c e", + "e c a", + "a c e a c e", + "a c e a b c" + }; + for (int j = 0; j < docs.length; j++) { + Document d = new Document(); + d.add(newField("contents", docs[j], Field.Store.YES, Field.Index.ANALYZED)); + d.add(newField("id", ""+j, Field.Index.NOT_ANALYZED_NO_NORMS)); + writer.addDocument(d); + } + writer.close(); + + Searcher searcher = new IndexSearcher(directory, true); + + String[] queries = { + "a b", + "\"a b\"", + "\"a b c\"", + "a c", + "\"a c\"", + "\"a c e\"", + }; + ScoreDoc[] hits = null; + + Sort sort = new Sort(new SortField[] { + SortField.FIELD_SCORE, + new SortField("id", SortField.INT)}); + + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "contents", analyzer); + parser.setPhraseSlop(4); + for (int j = 0; j < queries.length; j++) { + Query query = parser.parse(queries[j]); + out.println("Query: " + query.toString("contents")); + + hits = searcher.search(query, null, 1000, sort).scoreDocs; + + out.println(hits.length + " total results"); + for (int i = 0 ; i < hits.length && i < 10; i++) { + Document d = searcher.doc(hits[i].doc); + out.println(i + " " + hits[i].score +// + " " + DateField.stringToDate(d.get("modified")) + + " " + d.get("contents")); + } + } + searcher.close(); + directory.close(); + } + + static long Time(int year, int month, int day) { + GregorianCalendar calendar = new GregorianCalendar(); + calendar.clear(); + calendar.set(year, month, day); + return calendar.getTime().getTime(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestSearchForDuplicates.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestSearchForDuplicates.java new file mode 100644 index 0000000..32a74ce --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/TestSearchForDuplicates.java @@ -0,0 +1,158 @@ +package org.apache.lucene; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Random; + +import org.apache.lucene.store.*; +import org.apache.lucene.document.*; +import org.apache.lucene.analysis.*; +import org.apache.lucene.index.*; +import org.apache.lucene.search.*; +import org.apache.lucene.queryParser.*; +import org.apache.lucene.util.LuceneTestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; + +public class TestSearchForDuplicates extends LuceneTestCase { + + /** Main for running test case by itself. */ + public static void main(String args[]) { + TestRunner.run (new TestSuite(TestSearchForDuplicates.class)); + } + + + + static final String PRIORITY_FIELD ="priority"; + static final String ID_FIELD ="id"; + static final String HIGH_PRIORITY ="high"; + static final String MED_PRIORITY ="medium"; + static final String LOW_PRIORITY ="low"; + + + /** This test compares search results when using and not using compound + * files. + * + * TODO: There is rudimentary search result validation as well, but it is + * simply based on asserting the output observed in the old test case, + * without really knowing if the output is correct. Someone needs to + * validate this output and make any changes to the checkHits method. + */ + public void testRun() throws Exception { + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw, true); + final int MAX_DOCS = atLeast(225); + doTest(random, pw, false, MAX_DOCS); + pw.close(); + sw.close(); + String multiFileOutput = sw.getBuffer().toString(); + //System.out.println(multiFileOutput); + + sw = new StringWriter(); + pw = new PrintWriter(sw, true); + doTest(random, pw, true, MAX_DOCS); + pw.close(); + sw.close(); + String singleFileOutput = sw.getBuffer().toString(); + + assertEquals(multiFileOutput, singleFileOutput); + } + + + private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS) throws Exception { + Directory directory = newDirectory(); + Analyzer analyzer = new MockAnalyzer(random); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + final MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFiles); + } + IndexWriter writer = new IndexWriter(directory, conf); + if (VERBOSE) { + System.out.println("TEST: now build index"); + writer.setInfoStream(System.out); + } + + for (int j = 0; j < MAX_DOCS; j++) { + Document d = new Document(); + d.add(newField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.ANALYZED)); + d.add(newField(ID_FIELD, Integer.toString(j), Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(d); + } + writer.close(); + + // try a search without OR + Searcher searcher = new IndexSearcher(directory, true); + + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, PRIORITY_FIELD, analyzer); + + Query query = parser.parse(HIGH_PRIORITY); + out.println("Query: " + query.toString(PRIORITY_FIELD)); + + final Sort sort = new Sort(new SortField[] { + SortField.FIELD_SCORE, + new SortField(ID_FIELD, SortField.INT)}); + + ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs; + printHits(out, hits, searcher); + checkHits(hits, MAX_DOCS, searcher); + + searcher.close(); + + // try a new search with OR + searcher = new IndexSearcher(directory, true); + hits = null; + + parser = new QueryParser(TEST_VERSION_CURRENT, PRIORITY_FIELD, analyzer); + + query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY); + out.println("Query: " + query.toString(PRIORITY_FIELD)); + + hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs; + printHits(out, hits, searcher); + checkHits(hits, MAX_DOCS, searcher); + + searcher.close(); + directory.close(); + } + + + private void printHits(PrintWriter out, ScoreDoc[] hits, Searcher searcher ) throws IOException { + out.println(hits.length + " total results\n"); + for (int i = 0 ; i < hits.length; i++) { + if ( i < 10 || (i > 94 && i < 105) ) { + Document d = searcher.doc(hits[i].doc); + out.println(i + " " + d.get(ID_FIELD)); + } + } + } + + private void checkHits(ScoreDoc[] hits, int expectedCount, Searcher searcher) throws IOException { + assertEquals("total results", expectedCount, hits.length); + for (int i = 0 ; i < hits.length; i++) { + if (i < 10 || (i > 94 && i < 105) ) { + Document d = searcher.doc(hits[i].doc); + assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD)); + } + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPage.html b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPage.html new file mode 100644 index 0000000..cc23b3d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPage.html @@ -0,0 +1,267 @@ + + + + + + +Resources - Lucene-java Wiki + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+

+

Introductions

+

+

Blogs

+

+

Books

+

+

Articles

+

+

Interviews

+

+

Papers

+

+

Presentations

+

+

Training

+

+

Corpora

+

+

Other

+
  • Lucene Resources - Articles, Books, FAQs, Forums, Presentations, Wiki.

  • Lucene Search Forum - hosted by Nabble archiving all Lucene and Nutch mailing lists into a searchable archive/forum. The search is coded using Lucene.

  • LuceneTutorial.com - Tips and tricks, sample applications, code samples, best practices.

Resources (last edited 2010-05-03 22:31:43 by SteveRowe)

+ +
+
+ + + + + + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPageURLs.txt b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPageURLs.txt new file mode 100644 index 0000000..e8ca5aa --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPageURLs.txt @@ -0,0 +1,105 @@ +http://www.w3.org/TR/html4/strict.dtd +http://lucene.apache.org/java/3_0_1/api/all/overview-summary.html#overview_description +http://lucene.apache.org/java/3_0_1/gettingstarted.html +http://lucene.grantingersoll.com +http://www.lucidimagination.com/blog/ +http://blog.sematext.com/ +http://www.manning.com/hatcher3/hatcher3_cover150.jpg +http://www.manning.com/hatcher3/hatcher3_cover150.jpg +http://www.manning.com/hatcher3/hatcher3_cover150.jpg +http://www.manning.com/hatcher3/ +http://www.amazon.com/Building-Search-Applications-Lucene-Lingpipe/dp/0615204252/ +http://www.amazon.co.jp/exec/obidos/ASIN/4774127809/503-9461699-1775907 +http://www.lucenebook.com +http://www.amazon.com/exec/obidos/ASIN/1932394281 +Amazon.com +http://www.amazon.de/Suchmaschinen-entwickeln-mit-Apache-Lucene/dp/3935042450 +http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Getting-Started-with-Lucene/ +http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Optimizing-Findability-in-Lucene-and-Solr/ +http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Debugging-Relevance-Issues-in-Search/ +http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Scaling-Lucene-and-Solr/ +http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Introduction-to-Apache-Lucene-and-Solr/ +http://cephas.net/blog/2008/03/30/how-morelikethis-works-in-lucene/ +http://schmidt.devlib.org/software/lucene-wikipedia.html +http://marceloochoa.blogspot.com/2007/09/running-lucene-inside-your-oracle-jvm.html +http://www.onjava.com/pub/a/onjava/2007/05/24/using-the-lucene-query-parser-without-lucene.html +http://www.javaworld.com/javaworld/jw-09-2006/jw-0925-lucene.html +http://www-128.ibm.com/developerworks/java/library/wa-lucene2/index.html?ca=drs- +http://www.freesearch.pe.kr/tag/Lucene +http://www-128.ibm.com/developerworks/java/library/wa-lucene/index.html +http://www.onjava.com/pub/a/onjava/2006/01/18/using-lucene-to-search-java-source.html +http://www.jroller.com/page/wakaleo/?anchor=lucene_a_tutorial_introduction_to +http://blog.dev.sf.net/index.php?/archives/10-Behind-the-Scenes-of-the-SourceForge.net-Search-System.html +SourceForge.net +http://today.java.net/pub/a/today/2005/08/09/didyoumean.html +http://www.developer.com/java/other/article.php/3490471 +http://www.theserverside.com/tt/articles/article.tss?l=ILoveLucene +http://javaboutique.internet.com/tutorials/HTMLParser/article.html +http://bilgidata.com/localhost/bilgidata/yazi.jsp@dosya=a_lucene.xml.html +http://www.chedong.com/tech/lucene.html +http://javatechniques.com/public/java/docs/basics/lucene-memory-search.html +http://www.javaranch.com/newsletter/200404/Lucene.html +http://www.darksleep.com/lucene/ +http://www-igm.univ-mlv.fr/~dr/XPOSE2003/lucene/articleLucene.html +http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html +http://builder.com.com/5100-6389-5054799.html +http://today.java.net/pub/a/today/2003/07/30/LuceneIntro.html +http://www-106.ibm.com/developerworks/library/j-lucene/ +http://www.xml.com/pub/a/ws/2003/05/13/email.html +http://www.onjava.com/pub/a/onjava/2003/03/05/lucene.html +http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html +http://javangelist.snipsnap.org/space/Lucene-Mini-Tutorial +http://www.javaworld.com/javaworld/jw-09-2000/jw-0915-lucene.html +http://www.lucidimagination.com/index.php?option=com_content&task=view&id=109 +http://www.lucidimagination.com/index.php?option=com_content&task=view&id=108 +http://www.lucidimagination.com/index.php?option=com_content&task=view&id=113 +http://lucene.sourceforge.net/publications.html +http://lucene.sourceforge.net/publications.html +http://people.apache.org/~buschmi/apachecon/AdvancedIndexingLuceneAtlanta07.ppt +http://www.us.apachecon.com/us2007/ +http://people.apache.org/~yonik/presentations/lucene_intro.pdf +http://www.eu.apachecon.com +http://www.cnlp.org/presentations/slides/AdvancedLuceneEU.pdf +http://www.cnlp.org +http://www.eu.apachecon.com +http://blogs.atlassian.com/rebelutionary/downloads/tssjs2007-lucene-generic-data-indexing.pdf +http://www.atlassian.com/ +http://javasymposium.techtarget.com/lasvegas/index.html +http://www.cnlp.org/apachecon2005/AdvancedLucene.ppt +http://www.cnlp.org +http://www.apachecon.com +http://www.cnlp.org +http://www.cnlp.org +http://www.cnlp.org/apachecon2005 +http://lucene.sourceforge.net/talks/pisa/ +http://conferences.oreillynet.com/presentations/os2003/hatcher_erik_lucene.pdf +http://lucene.sourceforge.net/talks/inktomi/ +http://www.lucidimagination.com/How-We-Can-Help/Training/ +http://www.lucidimagination.com/How-We-Can-Help/Training/ +http://www.lucenebootcamp.com +http://www.apachecon.com +http://rdf.dmoz.org/ +http://rdf.dmoz.org/ +http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html +http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html +http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/ +http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/ +http://www.daviddlewis.com/resources/testcollections/reuters21578 +http://www.daviddlewis.com/resources/testcollections/reuters21578 +http://www-2.cs.cmu.edu/~enron/ +http://www-2.cs.cmu.edu/~enron/ +http://wt.jrc.it/lt/Acquis/ +http://wt.jrc.it/lt/Acquis/ +http://www.java201.com/resources/browse/38-all.html +http://www.nabble.com/Web-Search-f2787.html +http://www.nabble.com +http://www.lucenetutorial.com +LuceneTutorial.com +ist-h335-d03.syr.edu +128.230.84.100 +ist-h335-d03.syr.edu +128.230.84.100 +http://moinmo.in/ +http://moinmo.in/Python +http://moinmo.in/GPL +http://validator.w3.org/check?uri=referer diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java new file mode 100644 index 0000000..ca90ad3 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java @@ -0,0 +1,1907 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import java.io.StringReader; +import java.util.List; +import java.util.ArrayList; +import java.util.Iterator; + +public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase { + + // testLain1Accents() is a copy of TestLatin1AccentFilter.testU(). + public void testLatin1Accents() throws Exception { + TokenStream stream = new MockTokenizer(new StringReader + ("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ" + +" Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij" + +" ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"), MockTokenizer.WHITESPACE, false); + ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); + + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); + filter.reset(); + assertTermEquals("Des", filter, termAtt); + assertTermEquals("mot", filter, termAtt); + assertTermEquals("cles", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("LA", filter, termAtt); + assertTermEquals("CHAINE", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("AE", filter, termAtt); + assertTermEquals("C", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("IJ", filter, termAtt); + assertTermEquals("D", filter, termAtt); + assertTermEquals("N", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("OE", filter, termAtt); + assertTermEquals("TH", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("Y", filter, termAtt); + assertTermEquals("Y", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("ae", filter, termAtt); + assertTermEquals("c", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("ij", filter, termAtt); + assertTermEquals("d", filter, termAtt); + assertTermEquals("n", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("oe", filter, termAtt); + assertTermEquals("ss", filter, termAtt); + assertTermEquals("th", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("y", filter, termAtt); + assertTermEquals("y", filter, termAtt); + assertTermEquals("fi", filter, termAtt); + assertTermEquals("fl", filter, termAtt); + assertFalse(filter.incrementToken()); + } + + + // The following Perl script generated the foldings[] array automatically + // from ASCIIFoldingFilter.java: + // + // ============== begin get.test.cases.pl ============== + // + // use strict; + // use warnings; + // + // my $file = "ASCIIFoldingFilter.java"; + // my $output = "testcases.txt"; + // my %codes = (); + // my $folded = ''; + // + // open IN, "<:utf8", $file || die "Error opening input file '$file': $!"; + // open OUT, ">:utf8", $output || die "Error opening output file '$output': $!"; + // + // while (my $line = ) { + // chomp($line); + // # case '\u0133': // [ description ] + // if ($line =~ /case\s+'\\u(....)':.*\[([^\]]+)\]/) { + // my $code = $1; + // my $desc = $2; + // $codes{$code} = $desc; + // } + // # output[outputPos++] = 'A'; + // elsif ($line =~ /output\[outputPos\+\+\] = '(.+)';/) { + // my $output_char = $1; + // $folded .= $output_char; + // } + // elsif ($line =~ /break;/ && length($folded) > 0) { + // my $first = 1; + // for my $code (sort { hex($a) <=> hex($b) } keys %codes) { + // my $desc = $codes{$code}; + // print OUT ' '; + // print OUT '+ ' if (not $first); + // $first = 0; + // print OUT '"', chr(hex($code)), qq!" // U+$code: $desc\n!; + // } + // print OUT qq! ,"$folded", // Folded result\n\n!; + // %codes = (); + // $folded = ''; + // } + // } + // close OUT; + // + // ============== end get.test.cases.pl ============== + // + public void testAllFoldings() throws Exception { + // Alternating strings of: + // 1. All non-ASCII characters to be folded, concatenated together as a + // single string. + // 2. The string of ASCII characters to which each of the above + // characters should be folded. + String[] foldings = { + "À" // U+00C0: LATIN CAPITAL LETTER A WITH GRAVE + + "Á" // U+00C1: LATIN CAPITAL LETTER A WITH ACUTE + + "Â" // U+00C2: LATIN CAPITAL LETTER A WITH CIRCUMFLEX + + "Ã" // U+00C3: LATIN CAPITAL LETTER A WITH TILDE + + "Ä" // U+00C4: LATIN CAPITAL LETTER A WITH DIAERESIS + + "Å" // U+00C5: LATIN CAPITAL LETTER A WITH RING ABOVE + + "Ā" // U+0100: LATIN CAPITAL LETTER A WITH MACRON + + "Ă" // U+0102: LATIN CAPITAL LETTER A WITH BREVE + + "Ą" // U+0104: LATIN CAPITAL LETTER A WITH OGONEK + + "Ə" // U+018F: LATIN CAPITAL LETTER SCHWA + + "Ǎ" // U+01CD: LATIN CAPITAL LETTER A WITH CARON + + "Ǟ" // U+01DE: LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + + "Ǡ" // U+01E0: LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON + + "Ǻ" // U+01FA: LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE + + "Ȁ" // U+0200: LATIN CAPITAL LETTER A WITH DOUBLE GRAVE + + "Ȃ" // U+0202: LATIN CAPITAL LETTER A WITH INVERTED BREVE + + "Ȧ" // U+0226: LATIN CAPITAL LETTER A WITH DOT ABOVE + + "Ⱥ" // U+023A: LATIN CAPITAL LETTER A WITH STROKE + + "ᴀ" // U+1D00: LATIN LETTER SMALL CAPITAL A + + "Ḁ" // U+1E00: LATIN CAPITAL LETTER A WITH RING BELOW + + "Ạ" // U+1EA0: LATIN CAPITAL LETTER A WITH DOT BELOW + + "Ả" // U+1EA2: LATIN CAPITAL LETTER A WITH HOOK ABOVE + + "Ấ" // U+1EA4: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + + "Ầ" // U+1EA6: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + + "Ẩ" // U+1EA8: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + + "Ẫ" // U+1EAA: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + + "Ậ" // U+1EAC: LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + + "Ắ" // U+1EAE: LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + + "Ằ" // U+1EB0: LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + + "Ẳ" // U+1EB2: LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + + "Ẵ" // U+1EB4: LATIN CAPITAL LETTER A WITH BREVE AND TILDE + + "Ặ" // U+1EB6: LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + + "Ⓐ" // U+24B6: CIRCLED LATIN CAPITAL LETTER A + + "A" // U+FF21: FULLWIDTH LATIN CAPITAL LETTER A + ,"A", // Folded result + + "à" // U+00E0: LATIN SMALL LETTER A WITH GRAVE + + "á" // U+00E1: LATIN SMALL LETTER A WITH ACUTE + + "â" // U+00E2: LATIN SMALL LETTER A WITH CIRCUMFLEX + + "ã" // U+00E3: LATIN SMALL LETTER A WITH TILDE + + "ä" // U+00E4: LATIN SMALL LETTER A WITH DIAERESIS + + "å" // U+00E5: LATIN SMALL LETTER A WITH RING ABOVE + + "ā" // U+0101: LATIN SMALL LETTER A WITH MACRON + + "ă" // U+0103: LATIN SMALL LETTER A WITH BREVE + + "ą" // U+0105: LATIN SMALL LETTER A WITH OGONEK + + "ǎ" // U+01CE: LATIN SMALL LETTER A WITH CARON + + "ǟ" // U+01DF: LATIN SMALL LETTER A WITH DIAERESIS AND MACRON + + "ǡ" // U+01E1: LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON + + "ǻ" // U+01FB: LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE + + "ȁ" // U+0201: LATIN SMALL LETTER A WITH DOUBLE GRAVE + + "ȃ" // U+0203: LATIN SMALL LETTER A WITH INVERTED BREVE + + "ȧ" // U+0227: LATIN SMALL LETTER A WITH DOT ABOVE + + "ɐ" // U+0250: LATIN SMALL LETTER TURNED A + + "ə" // U+0259: LATIN SMALL LETTER SCHWA + + "ɚ" // U+025A: LATIN SMALL LETTER SCHWA WITH HOOK + + "ᶏ" // U+1D8F: LATIN SMALL LETTER A WITH RETROFLEX HOOK + + "ḁ" // U+1E01: LATIN SMALL LETTER A WITH RING BELOW + + "ᶕ" // U+1D95: LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK + + "ẚ" // U+1E9A: LATIN SMALL LETTER A WITH RIGHT HALF RING + + "ạ" // U+1EA1: LATIN SMALL LETTER A WITH DOT BELOW + + "ả" // U+1EA3: LATIN SMALL LETTER A WITH HOOK ABOVE + + "ấ" // U+1EA5: LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE + + "ầ" // U+1EA7: LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE + + "ẩ" // U+1EA9: LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + + "ẫ" // U+1EAB: LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE + + "ậ" // U+1EAD: LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW + + "ắ" // U+1EAF: LATIN SMALL LETTER A WITH BREVE AND ACUTE + + "ằ" // U+1EB1: LATIN SMALL LETTER A WITH BREVE AND GRAVE + + "ẳ" // U+1EB3: LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE + + "ẵ" // U+1EB5: LATIN SMALL LETTER A WITH BREVE AND TILDE + + "ặ" // U+1EB7: LATIN SMALL LETTER A WITH BREVE AND DOT BELOW + + "ₐ" // U+2090: LATIN SUBSCRIPT SMALL LETTER A + + "ₔ" // U+2094: LATIN SUBSCRIPT SMALL LETTER SCHWA + + "ⓐ" // U+24D0: CIRCLED LATIN SMALL LETTER A + + "ⱥ" // U+2C65: LATIN SMALL LETTER A WITH STROKE + + "Ɐ" // U+2C6F: LATIN CAPITAL LETTER TURNED A + + "a" // U+FF41: FULLWIDTH LATIN SMALL LETTER A + ,"a", // Folded result + + "Ꜳ" // U+A732: LATIN CAPITAL LETTER AA + ,"AA", // Folded result + + "Æ" // U+00C6: LATIN CAPITAL LETTER AE + + "Ǣ" // U+01E2: LATIN CAPITAL LETTER AE WITH MACRON + + "Ǽ" // U+01FC: LATIN CAPITAL LETTER AE WITH ACUTE + + "ᴁ" // U+1D01: LATIN LETTER SMALL CAPITAL AE + ,"AE", // Folded result + + "Ꜵ" // U+A734: LATIN CAPITAL LETTER AO + ,"AO", // Folded result + + "Ꜷ" // U+A736: LATIN CAPITAL LETTER AU + ,"AU", // Folded result + + "Ꜹ" // U+A738: LATIN CAPITAL LETTER AV + + "Ꜻ" // U+A73A: LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR + ,"AV", // Folded result + + "Ꜽ" // U+A73C: LATIN CAPITAL LETTER AY + ,"AY", // Folded result + + "⒜" // U+249C: PARENTHESIZED LATIN SMALL LETTER A + ,"(a)", // Folded result + + "ꜳ" // U+A733: LATIN SMALL LETTER AA + ,"aa", // Folded result + + "æ" // U+00E6: LATIN SMALL LETTER AE + + "ǣ" // U+01E3: LATIN SMALL LETTER AE WITH MACRON + + "ǽ" // U+01FD: LATIN SMALL LETTER AE WITH ACUTE + + "ᴂ" // U+1D02: LATIN SMALL LETTER TURNED AE + ,"ae", // Folded result + + "ꜵ" // U+A735: LATIN SMALL LETTER AO + ,"ao", // Folded result + + "ꜷ" // U+A737: LATIN SMALL LETTER AU + ,"au", // Folded result + + "ꜹ" // U+A739: LATIN SMALL LETTER AV + + "ꜻ" // U+A73B: LATIN SMALL LETTER AV WITH HORIZONTAL BAR + ,"av", // Folded result + + "ꜽ" // U+A73D: LATIN SMALL LETTER AY + ,"ay", // Folded result + + "Ɓ" // U+0181: LATIN CAPITAL LETTER B WITH HOOK + + "Ƃ" // U+0182: LATIN CAPITAL LETTER B WITH TOPBAR + + "Ƀ" // U+0243: LATIN CAPITAL LETTER B WITH STROKE + + "ʙ" // U+0299: LATIN LETTER SMALL CAPITAL B + + "ᴃ" // U+1D03: LATIN LETTER SMALL CAPITAL BARRED B + + "Ḃ" // U+1E02: LATIN CAPITAL LETTER B WITH DOT ABOVE + + "Ḅ" // U+1E04: LATIN CAPITAL LETTER B WITH DOT BELOW + + "Ḇ" // U+1E06: LATIN CAPITAL LETTER B WITH LINE BELOW + + "Ⓑ" // U+24B7: CIRCLED LATIN CAPITAL LETTER B + + "B" // U+FF22: FULLWIDTH LATIN CAPITAL LETTER B + ,"B", // Folded result + + "ƀ" // U+0180: LATIN SMALL LETTER B WITH STROKE + + "ƃ" // U+0183: LATIN SMALL LETTER B WITH TOPBAR + + "ɓ" // U+0253: LATIN SMALL LETTER B WITH HOOK + + "ᵬ" // U+1D6C: LATIN SMALL LETTER B WITH MIDDLE TILDE + + "ᶀ" // U+1D80: LATIN SMALL LETTER B WITH PALATAL HOOK + + "ḃ" // U+1E03: LATIN SMALL LETTER B WITH DOT ABOVE + + "ḅ" // U+1E05: LATIN SMALL LETTER B WITH DOT BELOW + + "ḇ" // U+1E07: LATIN SMALL LETTER B WITH LINE BELOW + + "ⓑ" // U+24D1: CIRCLED LATIN SMALL LETTER B + + "b" // U+FF42: FULLWIDTH LATIN SMALL LETTER B + ,"b", // Folded result + + "⒝" // U+249D: PARENTHESIZED LATIN SMALL LETTER B + ,"(b)", // Folded result + + "Ç" // U+00C7: LATIN CAPITAL LETTER C WITH CEDILLA + + "Ć" // U+0106: LATIN CAPITAL LETTER C WITH ACUTE + + "Ĉ" // U+0108: LATIN CAPITAL LETTER C WITH CIRCUMFLEX + + "Ċ" // U+010A: LATIN CAPITAL LETTER C WITH DOT ABOVE + + "Č" // U+010C: LATIN CAPITAL LETTER C WITH CARON + + "Ƈ" // U+0187: LATIN CAPITAL LETTER C WITH HOOK + + "Ȼ" // U+023B: LATIN CAPITAL LETTER C WITH STROKE + + "ʗ" // U+0297: LATIN LETTER STRETCHED C + + "ᴄ" // U+1D04: LATIN LETTER SMALL CAPITAL C + + "Ḉ" // U+1E08: LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + + "Ⓒ" // U+24B8: CIRCLED LATIN CAPITAL LETTER C + + "C" // U+FF23: FULLWIDTH LATIN CAPITAL LETTER C + ,"C", // Folded result + + "ç" // U+00E7: LATIN SMALL LETTER C WITH CEDILLA + + "ć" // U+0107: LATIN SMALL LETTER C WITH ACUTE + + "ĉ" // U+0109: LATIN SMALL LETTER C WITH CIRCUMFLEX + + "ċ" // U+010B: LATIN SMALL LETTER C WITH DOT ABOVE + + "č" // U+010D: LATIN SMALL LETTER C WITH CARON + + "ƈ" // U+0188: LATIN SMALL LETTER C WITH HOOK + + "ȼ" // U+023C: LATIN SMALL LETTER C WITH STROKE + + "ɕ" // U+0255: LATIN SMALL LETTER C WITH CURL + + "ḉ" // U+1E09: LATIN SMALL LETTER C WITH CEDILLA AND ACUTE + + "ↄ" // U+2184: LATIN SMALL LETTER REVERSED C + + "ⓒ" // U+24D2: CIRCLED LATIN SMALL LETTER C + + "Ꜿ" // U+A73E: LATIN CAPITAL LETTER REVERSED C WITH DOT + + "ꜿ" // U+A73F: LATIN SMALL LETTER REVERSED C WITH DOT + + "c" // U+FF43: FULLWIDTH LATIN SMALL LETTER C + ,"c", // Folded result + + "⒞" // U+249E: PARENTHESIZED LATIN SMALL LETTER C + ,"(c)", // Folded result + + "Ð" // U+00D0: LATIN CAPITAL LETTER ETH + + "Ď" // U+010E: LATIN CAPITAL LETTER D WITH CARON + + "Đ" // U+0110: LATIN CAPITAL LETTER D WITH STROKE + + "Ɖ" // U+0189: LATIN CAPITAL LETTER AFRICAN D + + "Ɗ" // U+018A: LATIN CAPITAL LETTER D WITH HOOK + + "Ƌ" // U+018B: LATIN CAPITAL LETTER D WITH TOPBAR + + "ᴅ" // U+1D05: LATIN LETTER SMALL CAPITAL D + + "ᴆ" // U+1D06: LATIN LETTER SMALL CAPITAL ETH + + "Ḋ" // U+1E0A: LATIN CAPITAL LETTER D WITH DOT ABOVE + + "Ḍ" // U+1E0C: LATIN CAPITAL LETTER D WITH DOT BELOW + + "Ḏ" // U+1E0E: LATIN CAPITAL LETTER D WITH LINE BELOW + + "Ḑ" // U+1E10: LATIN CAPITAL LETTER D WITH CEDILLA + + "Ḓ" // U+1E12: LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + + "Ⓓ" // U+24B9: CIRCLED LATIN CAPITAL LETTER D + + "Ꝺ" // U+A779: LATIN CAPITAL LETTER INSULAR D + + "D" // U+FF24: FULLWIDTH LATIN CAPITAL LETTER D + ,"D", // Folded result + + "ð" // U+00F0: LATIN SMALL LETTER ETH + + "ď" // U+010F: LATIN SMALL LETTER D WITH CARON + + "đ" // U+0111: LATIN SMALL LETTER D WITH STROKE + + "ƌ" // U+018C: LATIN SMALL LETTER D WITH TOPBAR + + "ȡ" // U+0221: LATIN SMALL LETTER D WITH CURL + + "ɖ" // U+0256: LATIN SMALL LETTER D WITH TAIL + + "ɗ" // U+0257: LATIN SMALL LETTER D WITH HOOK + + "ᵭ" // U+1D6D: LATIN SMALL LETTER D WITH MIDDLE TILDE + + "ᶁ" // U+1D81: LATIN SMALL LETTER D WITH PALATAL HOOK + + "ᶑ" // U+1D91: LATIN SMALL LETTER D WITH HOOK AND TAIL + + "ḋ" // U+1E0B: LATIN SMALL LETTER D WITH DOT ABOVE + + "ḍ" // U+1E0D: LATIN SMALL LETTER D WITH DOT BELOW + + "ḏ" // U+1E0F: LATIN SMALL LETTER D WITH LINE BELOW + + "ḑ" // U+1E11: LATIN SMALL LETTER D WITH CEDILLA + + "ḓ" // U+1E13: LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW + + "ⓓ" // U+24D3: CIRCLED LATIN SMALL LETTER D + + "ꝺ" // U+A77A: LATIN SMALL LETTER INSULAR D + + "d" // U+FF44: FULLWIDTH LATIN SMALL LETTER D + ,"d", // Folded result + + "DŽ" // U+01C4: LATIN CAPITAL LETTER DZ WITH CARON + + "DZ" // U+01F1: LATIN CAPITAL LETTER DZ + ,"DZ", // Folded result + + "Dž" // U+01C5: LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + + "Dz" // U+01F2: LATIN CAPITAL LETTER D WITH SMALL LETTER Z + ,"Dz", // Folded result + + "⒟" // U+249F: PARENTHESIZED LATIN SMALL LETTER D + ,"(d)", // Folded result + + "ȸ" // U+0238: LATIN SMALL LETTER DB DIGRAPH + ,"db", // Folded result + + "dž" // U+01C6: LATIN SMALL LETTER DZ WITH CARON + + "dz" // U+01F3: LATIN SMALL LETTER DZ + + "ʣ" // U+02A3: LATIN SMALL LETTER DZ DIGRAPH + + "ʥ" // U+02A5: LATIN SMALL LETTER DZ DIGRAPH WITH CURL + ,"dz", // Folded result + + "È" // U+00C8: LATIN CAPITAL LETTER E WITH GRAVE + + "É" // U+00C9: LATIN CAPITAL LETTER E WITH ACUTE + + "Ê" // U+00CA: LATIN CAPITAL LETTER E WITH CIRCUMFLEX + + "Ë" // U+00CB: LATIN CAPITAL LETTER E WITH DIAERESIS + + "Ē" // U+0112: LATIN CAPITAL LETTER E WITH MACRON + + "Ĕ" // U+0114: LATIN CAPITAL LETTER E WITH BREVE + + "Ė" // U+0116: LATIN CAPITAL LETTER E WITH DOT ABOVE + + "Ę" // U+0118: LATIN CAPITAL LETTER E WITH OGONEK + + "Ě" // U+011A: LATIN CAPITAL LETTER E WITH CARON + + "Ǝ" // U+018E: LATIN CAPITAL LETTER REVERSED E + + "Ɛ" // U+0190: LATIN CAPITAL LETTER OPEN E + + "Ȅ" // U+0204: LATIN CAPITAL LETTER E WITH DOUBLE GRAVE + + "Ȇ" // U+0206: LATIN CAPITAL LETTER E WITH INVERTED BREVE + + "Ȩ" // U+0228: LATIN CAPITAL LETTER E WITH CEDILLA + + "Ɇ" // U+0246: LATIN CAPITAL LETTER E WITH STROKE + + "ᴇ" // U+1D07: LATIN LETTER SMALL CAPITAL E + + "Ḕ" // U+1E14: LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + + "Ḗ" // U+1E16: LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + + "Ḙ" // U+1E18: LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + + "Ḛ" // U+1E1A: LATIN CAPITAL LETTER E WITH TILDE BELOW + + "Ḝ" // U+1E1C: LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + + "Ẹ" // U+1EB8: LATIN CAPITAL LETTER E WITH DOT BELOW + + "Ẻ" // U+1EBA: LATIN CAPITAL LETTER E WITH HOOK ABOVE + + "Ẽ" // U+1EBC: LATIN CAPITAL LETTER E WITH TILDE + + "Ế" // U+1EBE: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + + "Ề" // U+1EC0: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + + "Ể" // U+1EC2: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + + "Ễ" // U+1EC4: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + + "Ệ" // U+1EC6: LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + + "Ⓔ" // U+24BA: CIRCLED LATIN CAPITAL LETTER E + + "ⱻ" // U+2C7B: LATIN LETTER SMALL CAPITAL TURNED E + + "E" // U+FF25: FULLWIDTH LATIN CAPITAL LETTER E + ,"E", // Folded result + + "è" // U+00E8: LATIN SMALL LETTER E WITH GRAVE + + "é" // U+00E9: LATIN SMALL LETTER E WITH ACUTE + + "ê" // U+00EA: LATIN SMALL LETTER E WITH CIRCUMFLEX + + "ë" // U+00EB: LATIN SMALL LETTER E WITH DIAERESIS + + "ē" // U+0113: LATIN SMALL LETTER E WITH MACRON + + "ĕ" // U+0115: LATIN SMALL LETTER E WITH BREVE + + "ė" // U+0117: LATIN SMALL LETTER E WITH DOT ABOVE + + "ę" // U+0119: LATIN SMALL LETTER E WITH OGONEK + + "ě" // U+011B: LATIN SMALL LETTER E WITH CARON + + "ǝ" // U+01DD: LATIN SMALL LETTER TURNED E + + "ȅ" // U+0205: LATIN SMALL LETTER E WITH DOUBLE GRAVE + + "ȇ" // U+0207: LATIN SMALL LETTER E WITH INVERTED BREVE + + "ȩ" // U+0229: LATIN SMALL LETTER E WITH CEDILLA + + "ɇ" // U+0247: LATIN SMALL LETTER E WITH STROKE + + "ɘ" // U+0258: LATIN SMALL LETTER REVERSED E + + "ɛ" // U+025B: LATIN SMALL LETTER OPEN E + + "ɜ" // U+025C: LATIN SMALL LETTER REVERSED OPEN E + + "ɝ" // U+025D: LATIN SMALL LETTER REVERSED OPEN E WITH HOOK + + "ɞ" // U+025E: LATIN SMALL LETTER CLOSED REVERSED OPEN E + + "ʚ" // U+029A: LATIN SMALL LETTER CLOSED OPEN E + + "ᴈ" // U+1D08: LATIN SMALL LETTER TURNED OPEN E + + "ᶒ" // U+1D92: LATIN SMALL LETTER E WITH RETROFLEX HOOK + + "ᶓ" // U+1D93: LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK + + "ᶔ" // U+1D94: LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK + + "ḕ" // U+1E15: LATIN SMALL LETTER E WITH MACRON AND GRAVE + + "ḗ" // U+1E17: LATIN SMALL LETTER E WITH MACRON AND ACUTE + + "ḙ" // U+1E19: LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW + + "ḛ" // U+1E1B: LATIN SMALL LETTER E WITH TILDE BELOW + + "ḝ" // U+1E1D: LATIN SMALL LETTER E WITH CEDILLA AND BREVE + + "ẹ" // U+1EB9: LATIN SMALL LETTER E WITH DOT BELOW + + "ẻ" // U+1EBB: LATIN SMALL LETTER E WITH HOOK ABOVE + + "ẽ" // U+1EBD: LATIN SMALL LETTER E WITH TILDE + + "ế" // U+1EBF: LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE + + "ề" // U+1EC1: LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE + + "ể" // U+1EC3: LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + + "ễ" // U+1EC5: LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE + + "ệ" // U+1EC7: LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW + + "ₑ" // U+2091: LATIN SUBSCRIPT SMALL LETTER E + + "ⓔ" // U+24D4: CIRCLED LATIN SMALL LETTER E + + "ⱸ" // U+2C78: LATIN SMALL LETTER E WITH NOTCH + + "e" // U+FF45: FULLWIDTH LATIN SMALL LETTER E + ,"e", // Folded result + + "⒠" // U+24A0: PARENTHESIZED LATIN SMALL LETTER E + ,"(e)", // Folded result + + "Ƒ" // U+0191: LATIN CAPITAL LETTER F WITH HOOK + + "Ḟ" // U+1E1E: LATIN CAPITAL LETTER F WITH DOT ABOVE + + "Ⓕ" // U+24BB: CIRCLED LATIN CAPITAL LETTER F + + "ꜰ" // U+A730: LATIN LETTER SMALL CAPITAL F + + "Ꝼ" // U+A77B: LATIN CAPITAL LETTER INSULAR F + + "ꟻ" // U+A7FB: LATIN EPIGRAPHIC LETTER REVERSED F + + "F" // U+FF26: FULLWIDTH LATIN CAPITAL LETTER F + ,"F", // Folded result + + "ƒ" // U+0192: LATIN SMALL LETTER F WITH HOOK + + "ᵮ" // U+1D6E: LATIN SMALL LETTER F WITH MIDDLE TILDE + + "ᶂ" // U+1D82: LATIN SMALL LETTER F WITH PALATAL HOOK + + "ḟ" // U+1E1F: LATIN SMALL LETTER F WITH DOT ABOVE + + "ẛ" // U+1E9B: LATIN SMALL LETTER LONG S WITH DOT ABOVE + + "ⓕ" // U+24D5: CIRCLED LATIN SMALL LETTER F + + "ꝼ" // U+A77C: LATIN SMALL LETTER INSULAR F + + "f" // U+FF46: FULLWIDTH LATIN SMALL LETTER F + ,"f", // Folded result + + "⒡" // U+24A1: PARENTHESIZED LATIN SMALL LETTER F + ,"(f)", // Folded result + + "ff" // U+FB00: LATIN SMALL LIGATURE FF + ,"ff", // Folded result + + "ffi" // U+FB03: LATIN SMALL LIGATURE FFI + ,"ffi", // Folded result + + "ffl" // U+FB04: LATIN SMALL LIGATURE FFL + ,"ffl", // Folded result + + "fi" // U+FB01: LATIN SMALL LIGATURE FI + ,"fi", // Folded result + + "fl" // U+FB02: LATIN SMALL LIGATURE FL + ,"fl", // Folded result + + "Ĝ" // U+011C: LATIN CAPITAL LETTER G WITH CIRCUMFLEX + + "Ğ" // U+011E: LATIN CAPITAL LETTER G WITH BREVE + + "Ġ" // U+0120: LATIN CAPITAL LETTER G WITH DOT ABOVE + + "Ģ" // U+0122: LATIN CAPITAL LETTER G WITH CEDILLA + + "Ɠ" // U+0193: LATIN CAPITAL LETTER G WITH HOOK + + "Ǥ" // U+01E4: LATIN CAPITAL LETTER G WITH STROKE + + "ǥ" // U+01E5: LATIN SMALL LETTER G WITH STROKE + + "Ǧ" // U+01E6: LATIN CAPITAL LETTER G WITH CARON + + "ǧ" // U+01E7: LATIN SMALL LETTER G WITH CARON + + "Ǵ" // U+01F4: LATIN CAPITAL LETTER G WITH ACUTE + + "ɢ" // U+0262: LATIN LETTER SMALL CAPITAL G + + "ʛ" // U+029B: LATIN LETTER SMALL CAPITAL G WITH HOOK + + "Ḡ" // U+1E20: LATIN CAPITAL LETTER G WITH MACRON + + "Ⓖ" // U+24BC: CIRCLED LATIN CAPITAL LETTER G + + "Ᵹ" // U+A77D: LATIN CAPITAL LETTER INSULAR G + + "Ꝿ" // U+A77E: LATIN CAPITAL LETTER TURNED INSULAR G + + "G" // U+FF27: FULLWIDTH LATIN CAPITAL LETTER G + ,"G", // Folded result + + "ĝ" // U+011D: LATIN SMALL LETTER G WITH CIRCUMFLEX + + "ğ" // U+011F: LATIN SMALL LETTER G WITH BREVE + + "ġ" // U+0121: LATIN SMALL LETTER G WITH DOT ABOVE + + "ģ" // U+0123: LATIN SMALL LETTER G WITH CEDILLA + + "ǵ" // U+01F5: LATIN SMALL LETTER G WITH ACUTE + + "ɠ" // U+0260: LATIN SMALL LETTER G WITH HOOK + + "ɡ" // U+0261: LATIN SMALL LETTER SCRIPT G + + "ᵷ" // U+1D77: LATIN SMALL LETTER TURNED G + + "ᵹ" // U+1D79: LATIN SMALL LETTER INSULAR G + + "ᶃ" // U+1D83: LATIN SMALL LETTER G WITH PALATAL HOOK + + "ḡ" // U+1E21: LATIN SMALL LETTER G WITH MACRON + + "ⓖ" // U+24D6: CIRCLED LATIN SMALL LETTER G + + "ꝿ" // U+A77F: LATIN SMALL LETTER TURNED INSULAR G + + "g" // U+FF47: FULLWIDTH LATIN SMALL LETTER G + ,"g", // Folded result + + "⒢" // U+24A2: PARENTHESIZED LATIN SMALL LETTER G + ,"(g)", // Folded result + + "Ĥ" // U+0124: LATIN CAPITAL LETTER H WITH CIRCUMFLEX + + "Ħ" // U+0126: LATIN CAPITAL LETTER H WITH STROKE + + "Ȟ" // U+021E: LATIN CAPITAL LETTER H WITH CARON + + "ʜ" // U+029C: LATIN LETTER SMALL CAPITAL H + + "Ḣ" // U+1E22: LATIN CAPITAL LETTER H WITH DOT ABOVE + + "Ḥ" // U+1E24: LATIN CAPITAL LETTER H WITH DOT BELOW + + "Ḧ" // U+1E26: LATIN CAPITAL LETTER H WITH DIAERESIS + + "Ḩ" // U+1E28: LATIN CAPITAL LETTER H WITH CEDILLA + + "Ḫ" // U+1E2A: LATIN CAPITAL LETTER H WITH BREVE BELOW + + "Ⓗ" // U+24BD: CIRCLED LATIN CAPITAL LETTER H + + "Ⱨ" // U+2C67: LATIN CAPITAL LETTER H WITH DESCENDER + + "Ⱶ" // U+2C75: LATIN CAPITAL LETTER HALF H + + "H" // U+FF28: FULLWIDTH LATIN CAPITAL LETTER H + ,"H", // Folded result + + "ĥ" // U+0125: LATIN SMALL LETTER H WITH CIRCUMFLEX + + "ħ" // U+0127: LATIN SMALL LETTER H WITH STROKE + + "ȟ" // U+021F: LATIN SMALL LETTER H WITH CARON + + "ɥ" // U+0265: LATIN SMALL LETTER TURNED H + + "ɦ" // U+0266: LATIN SMALL LETTER H WITH HOOK + + "ʮ" // U+02AE: LATIN SMALL LETTER TURNED H WITH FISHHOOK + + "ʯ" // U+02AF: LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL + + "ḣ" // U+1E23: LATIN SMALL LETTER H WITH DOT ABOVE + + "ḥ" // U+1E25: LATIN SMALL LETTER H WITH DOT BELOW + + "ḧ" // U+1E27: LATIN SMALL LETTER H WITH DIAERESIS + + "ḩ" // U+1E29: LATIN SMALL LETTER H WITH CEDILLA + + "ḫ" // U+1E2B: LATIN SMALL LETTER H WITH BREVE BELOW + + "ẖ" // U+1E96: LATIN SMALL LETTER H WITH LINE BELOW + + "ⓗ" // U+24D7: CIRCLED LATIN SMALL LETTER H + + "ⱨ" // U+2C68: LATIN SMALL LETTER H WITH DESCENDER + + "ⱶ" // U+2C76: LATIN SMALL LETTER HALF H + + "h" // U+FF48: FULLWIDTH LATIN SMALL LETTER H + ,"h", // Folded result + + "Ƕ" // U+01F6: LATIN CAPITAL LETTER HWAIR + ,"HV", // Folded result + + "⒣" // U+24A3: PARENTHESIZED LATIN SMALL LETTER H + ,"(h)", // Folded result + + "ƕ" // U+0195: LATIN SMALL LETTER HV + ,"hv", // Folded result + + "Ì" // U+00CC: LATIN CAPITAL LETTER I WITH GRAVE + + "Í" // U+00CD: LATIN CAPITAL LETTER I WITH ACUTE + + "Î" // U+00CE: LATIN CAPITAL LETTER I WITH CIRCUMFLEX + + "Ï" // U+00CF: LATIN CAPITAL LETTER I WITH DIAERESIS + + "Ĩ" // U+0128: LATIN CAPITAL LETTER I WITH TILDE + + "Ī" // U+012A: LATIN CAPITAL LETTER I WITH MACRON + + "Ĭ" // U+012C: LATIN CAPITAL LETTER I WITH BREVE + + "Į" // U+012E: LATIN CAPITAL LETTER I WITH OGONEK + + "İ" // U+0130: LATIN CAPITAL LETTER I WITH DOT ABOVE + + "Ɩ" // U+0196: LATIN CAPITAL LETTER IOTA + + "Ɨ" // U+0197: LATIN CAPITAL LETTER I WITH STROKE + + "Ǐ" // U+01CF: LATIN CAPITAL LETTER I WITH CARON + + "Ȉ" // U+0208: LATIN CAPITAL LETTER I WITH DOUBLE GRAVE + + "Ȋ" // U+020A: LATIN CAPITAL LETTER I WITH INVERTED BREVE + + "ɪ" // U+026A: LATIN LETTER SMALL CAPITAL I + + "ᵻ" // U+1D7B: LATIN SMALL CAPITAL LETTER I WITH STROKE + + "Ḭ" // U+1E2C: LATIN CAPITAL LETTER I WITH TILDE BELOW + + "Ḯ" // U+1E2E: LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + + "Ỉ" // U+1EC8: LATIN CAPITAL LETTER I WITH HOOK ABOVE + + "Ị" // U+1ECA: LATIN CAPITAL LETTER I WITH DOT BELOW + + "Ⓘ" // U+24BE: CIRCLED LATIN CAPITAL LETTER I + + "ꟾ" // U+A7FE: LATIN EPIGRAPHIC LETTER I LONGA + + "I" // U+FF29: FULLWIDTH LATIN CAPITAL LETTER I + ,"I", // Folded result + + "ì" // U+00EC: LATIN SMALL LETTER I WITH GRAVE + + "í" // U+00ED: LATIN SMALL LETTER I WITH ACUTE + + "î" // U+00EE: LATIN SMALL LETTER I WITH CIRCUMFLEX + + "ï" // U+00EF: LATIN SMALL LETTER I WITH DIAERESIS + + "ĩ" // U+0129: LATIN SMALL LETTER I WITH TILDE + + "ī" // U+012B: LATIN SMALL LETTER I WITH MACRON + + "ĭ" // U+012D: LATIN SMALL LETTER I WITH BREVE + + "į" // U+012F: LATIN SMALL LETTER I WITH OGONEK + + "ı" // U+0131: LATIN SMALL LETTER DOTLESS I + + "ǐ" // U+01D0: LATIN SMALL LETTER I WITH CARON + + "ȉ" // U+0209: LATIN SMALL LETTER I WITH DOUBLE GRAVE + + "ȋ" // U+020B: LATIN SMALL LETTER I WITH INVERTED BREVE + + "ɨ" // U+0268: LATIN SMALL LETTER I WITH STROKE + + "ᴉ" // U+1D09: LATIN SMALL LETTER TURNED I + + "ᵢ" // U+1D62: LATIN SUBSCRIPT SMALL LETTER I + + "ᵼ" // U+1D7C: LATIN SMALL LETTER IOTA WITH STROKE + + "ᶖ" // U+1D96: LATIN SMALL LETTER I WITH RETROFLEX HOOK + + "ḭ" // U+1E2D: LATIN SMALL LETTER I WITH TILDE BELOW + + "ḯ" // U+1E2F: LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE + + "ỉ" // U+1EC9: LATIN SMALL LETTER I WITH HOOK ABOVE + + "ị" // U+1ECB: LATIN SMALL LETTER I WITH DOT BELOW + + "ⁱ" // U+2071: SUPERSCRIPT LATIN SMALL LETTER I + + "ⓘ" // U+24D8: CIRCLED LATIN SMALL LETTER I + + "i" // U+FF49: FULLWIDTH LATIN SMALL LETTER I + ,"i", // Folded result + + "IJ" // U+0132: LATIN CAPITAL LIGATURE IJ + ,"IJ", // Folded result + + "⒤" // U+24A4: PARENTHESIZED LATIN SMALL LETTER I + ,"(i)", // Folded result + + "ij" // U+0133: LATIN SMALL LIGATURE IJ + ,"ij", // Folded result + + "Ĵ" // U+0134: LATIN CAPITAL LETTER J WITH CIRCUMFLEX + + "Ɉ" // U+0248: LATIN CAPITAL LETTER J WITH STROKE + + "ᴊ" // U+1D0A: LATIN LETTER SMALL CAPITAL J + + "Ⓙ" // U+24BF: CIRCLED LATIN CAPITAL LETTER J + + "J" // U+FF2A: FULLWIDTH LATIN CAPITAL LETTER J + ,"J", // Folded result + + "ĵ" // U+0135: LATIN SMALL LETTER J WITH CIRCUMFLEX + + "ǰ" // U+01F0: LATIN SMALL LETTER J WITH CARON + + "ȷ" // U+0237: LATIN SMALL LETTER DOTLESS J + + "ɉ" // U+0249: LATIN SMALL LETTER J WITH STROKE + + "ɟ" // U+025F: LATIN SMALL LETTER DOTLESS J WITH STROKE + + "ʄ" // U+0284: LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK + + "ʝ" // U+029D: LATIN SMALL LETTER J WITH CROSSED-TAIL + + "ⓙ" // U+24D9: CIRCLED LATIN SMALL LETTER J + + "ⱼ" // U+2C7C: LATIN SUBSCRIPT SMALL LETTER J + + "j" // U+FF4A: FULLWIDTH LATIN SMALL LETTER J + ,"j", // Folded result + + "⒥" // U+24A5: PARENTHESIZED LATIN SMALL LETTER J + ,"(j)", // Folded result + + "Ķ" // U+0136: LATIN CAPITAL LETTER K WITH CEDILLA + + "Ƙ" // U+0198: LATIN CAPITAL LETTER K WITH HOOK + + "Ǩ" // U+01E8: LATIN CAPITAL LETTER K WITH CARON + + "ᴋ" // U+1D0B: LATIN LETTER SMALL CAPITAL K + + "Ḱ" // U+1E30: LATIN CAPITAL LETTER K WITH ACUTE + + "Ḳ" // U+1E32: LATIN CAPITAL LETTER K WITH DOT BELOW + + "Ḵ" // U+1E34: LATIN CAPITAL LETTER K WITH LINE BELOW + + "Ⓚ" // U+24C0: CIRCLED LATIN CAPITAL LETTER K + + "Ⱪ" // U+2C69: LATIN CAPITAL LETTER K WITH DESCENDER + + "Ꝁ" // U+A740: LATIN CAPITAL LETTER K WITH STROKE + + "Ꝃ" // U+A742: LATIN CAPITAL LETTER K WITH DIAGONAL STROKE + + "Ꝅ" // U+A744: LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE + + "K" // U+FF2B: FULLWIDTH LATIN CAPITAL LETTER K + ,"K", // Folded result + + "ķ" // U+0137: LATIN SMALL LETTER K WITH CEDILLA + + "ƙ" // U+0199: LATIN SMALL LETTER K WITH HOOK + + "ǩ" // U+01E9: LATIN SMALL LETTER K WITH CARON + + "ʞ" // U+029E: LATIN SMALL LETTER TURNED K + + "ᶄ" // U+1D84: LATIN SMALL LETTER K WITH PALATAL HOOK + + "ḱ" // U+1E31: LATIN SMALL LETTER K WITH ACUTE + + "ḳ" // U+1E33: LATIN SMALL LETTER K WITH DOT BELOW + + "ḵ" // U+1E35: LATIN SMALL LETTER K WITH LINE BELOW + + "ⓚ" // U+24DA: CIRCLED LATIN SMALL LETTER K + + "ⱪ" // U+2C6A: LATIN SMALL LETTER K WITH DESCENDER + + "ꝁ" // U+A741: LATIN SMALL LETTER K WITH STROKE + + "ꝃ" // U+A743: LATIN SMALL LETTER K WITH DIAGONAL STROKE + + "ꝅ" // U+A745: LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE + + "k" // U+FF4B: FULLWIDTH LATIN SMALL LETTER K + ,"k", // Folded result + + "⒦" // U+24A6: PARENTHESIZED LATIN SMALL LETTER K + ,"(k)", // Folded result + + "Ĺ" // U+0139: LATIN CAPITAL LETTER L WITH ACUTE + + "Ļ" // U+013B: LATIN CAPITAL LETTER L WITH CEDILLA + + "Ľ" // U+013D: LATIN CAPITAL LETTER L WITH CARON + + "Ŀ" // U+013F: LATIN CAPITAL LETTER L WITH MIDDLE DOT + + "Ł" // U+0141: LATIN CAPITAL LETTER L WITH STROKE + + "Ƚ" // U+023D: LATIN CAPITAL LETTER L WITH BAR + + "ʟ" // U+029F: LATIN LETTER SMALL CAPITAL L + + "ᴌ" // U+1D0C: LATIN LETTER SMALL CAPITAL L WITH STROKE + + "Ḷ" // U+1E36: LATIN CAPITAL LETTER L WITH DOT BELOW + + "Ḹ" // U+1E38: LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + + "Ḻ" // U+1E3A: LATIN CAPITAL LETTER L WITH LINE BELOW + + "Ḽ" // U+1E3C: LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + + "Ⓛ" // U+24C1: CIRCLED LATIN CAPITAL LETTER L + + "Ⱡ" // U+2C60: LATIN CAPITAL LETTER L WITH DOUBLE BAR + + "Ɫ" // U+2C62: LATIN CAPITAL LETTER L WITH MIDDLE TILDE + + "Ꝇ" // U+A746: LATIN CAPITAL LETTER BROKEN L + + "Ꝉ" // U+A748: LATIN CAPITAL LETTER L WITH HIGH STROKE + + "Ꞁ" // U+A780: LATIN CAPITAL LETTER TURNED L + + "L" // U+FF2C: FULLWIDTH LATIN CAPITAL LETTER L + ,"L", // Folded result + + "ĺ" // U+013A: LATIN SMALL LETTER L WITH ACUTE + + "ļ" // U+013C: LATIN SMALL LETTER L WITH CEDILLA + + "ľ" // U+013E: LATIN SMALL LETTER L WITH CARON + + "ŀ" // U+0140: LATIN SMALL LETTER L WITH MIDDLE DOT + + "ł" // U+0142: LATIN SMALL LETTER L WITH STROKE + + "ƚ" // U+019A: LATIN SMALL LETTER L WITH BAR + + "ȴ" // U+0234: LATIN SMALL LETTER L WITH CURL + + "ɫ" // U+026B: LATIN SMALL LETTER L WITH MIDDLE TILDE + + "ɬ" // U+026C: LATIN SMALL LETTER L WITH BELT + + "ɭ" // U+026D: LATIN SMALL LETTER L WITH RETROFLEX HOOK + + "ᶅ" // U+1D85: LATIN SMALL LETTER L WITH PALATAL HOOK + + "ḷ" // U+1E37: LATIN SMALL LETTER L WITH DOT BELOW + + "ḹ" // U+1E39: LATIN SMALL LETTER L WITH DOT BELOW AND MACRON + + "ḻ" // U+1E3B: LATIN SMALL LETTER L WITH LINE BELOW + + "ḽ" // U+1E3D: LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW + + "ⓛ" // U+24DB: CIRCLED LATIN SMALL LETTER L + + "ⱡ" // U+2C61: LATIN SMALL LETTER L WITH DOUBLE BAR + + "ꝇ" // U+A747: LATIN SMALL LETTER BROKEN L + + "ꝉ" // U+A749: LATIN SMALL LETTER L WITH HIGH STROKE + + "ꞁ" // U+A781: LATIN SMALL LETTER TURNED L + + "l" // U+FF4C: FULLWIDTH LATIN SMALL LETTER L + ,"l", // Folded result + + "LJ" // U+01C7: LATIN CAPITAL LETTER LJ + ,"LJ", // Folded result + + "Ỻ" // U+1EFA: LATIN CAPITAL LETTER MIDDLE-WELSH LL + ,"LL", // Folded result + + "Lj" // U+01C8: LATIN CAPITAL LETTER L WITH SMALL LETTER J + ,"Lj", // Folded result + + "⒧" // U+24A7: PARENTHESIZED LATIN SMALL LETTER L + ,"(l)", // Folded result + + "lj" // U+01C9: LATIN SMALL LETTER LJ + ,"lj", // Folded result + + "ỻ" // U+1EFB: LATIN SMALL LETTER MIDDLE-WELSH LL + ,"ll", // Folded result + + "ʪ" // U+02AA: LATIN SMALL LETTER LS DIGRAPH + ,"ls", // Folded result + + "ʫ" // U+02AB: LATIN SMALL LETTER LZ DIGRAPH + ,"lz", // Folded result + + "Ɯ" // U+019C: LATIN CAPITAL LETTER TURNED M + + "ᴍ" // U+1D0D: LATIN LETTER SMALL CAPITAL M + + "Ḿ" // U+1E3E: LATIN CAPITAL LETTER M WITH ACUTE + + "Ṁ" // U+1E40: LATIN CAPITAL LETTER M WITH DOT ABOVE + + "Ṃ" // U+1E42: LATIN CAPITAL LETTER M WITH DOT BELOW + + "Ⓜ" // U+24C2: CIRCLED LATIN CAPITAL LETTER M + + "Ɱ" // U+2C6E: LATIN CAPITAL LETTER M WITH HOOK + + "ꟽ" // U+A7FD: LATIN EPIGRAPHIC LETTER INVERTED M + + "ꟿ" // U+A7FF: LATIN EPIGRAPHIC LETTER ARCHAIC M + + "M" // U+FF2D: FULLWIDTH LATIN CAPITAL LETTER M + ,"M", // Folded result + + "ɯ" // U+026F: LATIN SMALL LETTER TURNED M + + "ɰ" // U+0270: LATIN SMALL LETTER TURNED M WITH LONG LEG + + "ɱ" // U+0271: LATIN SMALL LETTER M WITH HOOK + + "ᵯ" // U+1D6F: LATIN SMALL LETTER M WITH MIDDLE TILDE + + "ᶆ" // U+1D86: LATIN SMALL LETTER M WITH PALATAL HOOK + + "ḿ" // U+1E3F: LATIN SMALL LETTER M WITH ACUTE + + "ṁ" // U+1E41: LATIN SMALL LETTER M WITH DOT ABOVE + + "ṃ" // U+1E43: LATIN SMALL LETTER M WITH DOT BELOW + + "ⓜ" // U+24DC: CIRCLED LATIN SMALL LETTER M + + "m" // U+FF4D: FULLWIDTH LATIN SMALL LETTER M + ,"m", // Folded result + + "⒨" // U+24A8: PARENTHESIZED LATIN SMALL LETTER M + ,"(m)", // Folded result + + "Ñ" // U+00D1: LATIN CAPITAL LETTER N WITH TILDE + + "Ń" // U+0143: LATIN CAPITAL LETTER N WITH ACUTE + + "Ņ" // U+0145: LATIN CAPITAL LETTER N WITH CEDILLA + + "Ň" // U+0147: LATIN CAPITAL LETTER N WITH CARON + + "Ŋ" // U+014A: LATIN CAPITAL LETTER ENG + + "Ɲ" // U+019D: LATIN CAPITAL LETTER N WITH LEFT HOOK + + "Ǹ" // U+01F8: LATIN CAPITAL LETTER N WITH GRAVE + + "Ƞ" // U+0220: LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + + "ɴ" // U+0274: LATIN LETTER SMALL CAPITAL N + + "ᴎ" // U+1D0E: LATIN LETTER SMALL CAPITAL REVERSED N + + "Ṅ" // U+1E44: LATIN CAPITAL LETTER N WITH DOT ABOVE + + "Ṇ" // U+1E46: LATIN CAPITAL LETTER N WITH DOT BELOW + + "Ṉ" // U+1E48: LATIN CAPITAL LETTER N WITH LINE BELOW + + "Ṋ" // U+1E4A: LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + + "Ⓝ" // U+24C3: CIRCLED LATIN CAPITAL LETTER N + + "N" // U+FF2E: FULLWIDTH LATIN CAPITAL LETTER N + ,"N", // Folded result + + "ñ" // U+00F1: LATIN SMALL LETTER N WITH TILDE + + "ń" // U+0144: LATIN SMALL LETTER N WITH ACUTE + + "ņ" // U+0146: LATIN SMALL LETTER N WITH CEDILLA + + "ň" // U+0148: LATIN SMALL LETTER N WITH CARON + + "ʼn" // U+0149: LATIN SMALL LETTER N PRECEDED BY APOSTROPHE + + "ŋ" // U+014B: LATIN SMALL LETTER ENG + + "ƞ" // U+019E: LATIN SMALL LETTER N WITH LONG RIGHT LEG + + "ǹ" // U+01F9: LATIN SMALL LETTER N WITH GRAVE + + "ȵ" // U+0235: LATIN SMALL LETTER N WITH CURL + + "ɲ" // U+0272: LATIN SMALL LETTER N WITH LEFT HOOK + + "ɳ" // U+0273: LATIN SMALL LETTER N WITH RETROFLEX HOOK + + "ᵰ" // U+1D70: LATIN SMALL LETTER N WITH MIDDLE TILDE + + "ᶇ" // U+1D87: LATIN SMALL LETTER N WITH PALATAL HOOK + + "ṅ" // U+1E45: LATIN SMALL LETTER N WITH DOT ABOVE + + "ṇ" // U+1E47: LATIN SMALL LETTER N WITH DOT BELOW + + "ṉ" // U+1E49: LATIN SMALL LETTER N WITH LINE BELOW + + "ṋ" // U+1E4B: LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW + + "ⁿ" // U+207F: SUPERSCRIPT LATIN SMALL LETTER N + + "ⓝ" // U+24DD: CIRCLED LATIN SMALL LETTER N + + "n" // U+FF4E: FULLWIDTH LATIN SMALL LETTER N + ,"n", // Folded result + + "NJ" // U+01CA: LATIN CAPITAL LETTER NJ + ,"NJ", // Folded result + + "Nj" // U+01CB: LATIN CAPITAL LETTER N WITH SMALL LETTER J + ,"Nj", // Folded result + + "⒩" // U+24A9: PARENTHESIZED LATIN SMALL LETTER N + ,"(n)", // Folded result + + "nj" // U+01CC: LATIN SMALL LETTER NJ + ,"nj", // Folded result + + "Ò" // U+00D2: LATIN CAPITAL LETTER O WITH GRAVE + + "Ó" // U+00D3: LATIN CAPITAL LETTER O WITH ACUTE + + "Ô" // U+00D4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX + + "Õ" // U+00D5: LATIN CAPITAL LETTER O WITH TILDE + + "Ö" // U+00D6: LATIN CAPITAL LETTER O WITH DIAERESIS + + "Ø" // U+00D8: LATIN CAPITAL LETTER O WITH STROKE + + "Ō" // U+014C: LATIN CAPITAL LETTER O WITH MACRON + + "Ŏ" // U+014E: LATIN CAPITAL LETTER O WITH BREVE + + "Ő" // U+0150: LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + + "Ɔ" // U+0186: LATIN CAPITAL LETTER OPEN O + + "Ɵ" // U+019F: LATIN CAPITAL LETTER O WITH MIDDLE TILDE + + "Ơ" // U+01A0: LATIN CAPITAL LETTER O WITH HORN + + "Ǒ" // U+01D1: LATIN CAPITAL LETTER O WITH CARON + + "Ǫ" // U+01EA: LATIN CAPITAL LETTER O WITH OGONEK + + "Ǭ" // U+01EC: LATIN CAPITAL LETTER O WITH OGONEK AND MACRON + + "Ǿ" // U+01FE: LATIN CAPITAL LETTER O WITH STROKE AND ACUTE + + "Ȍ" // U+020C: LATIN CAPITAL LETTER O WITH DOUBLE GRAVE + + "Ȏ" // U+020E: LATIN CAPITAL LETTER O WITH INVERTED BREVE + + "Ȫ" // U+022A: LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON + + "Ȭ" // U+022C: LATIN CAPITAL LETTER O WITH TILDE AND MACRON + + "Ȯ" // U+022E: LATIN CAPITAL LETTER O WITH DOT ABOVE + + "Ȱ" // U+0230: LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + + "ᴏ" // U+1D0F: LATIN LETTER SMALL CAPITAL O + + "ᴐ" // U+1D10: LATIN LETTER SMALL CAPITAL OPEN O + + "Ṍ" // U+1E4C: LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + + "Ṏ" // U+1E4E: LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + + "Ṑ" // U+1E50: LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + + "Ṓ" // U+1E52: LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + + "Ọ" // U+1ECC: LATIN CAPITAL LETTER O WITH DOT BELOW + + "Ỏ" // U+1ECE: LATIN CAPITAL LETTER O WITH HOOK ABOVE + + "Ố" // U+1ED0: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + + "Ồ" // U+1ED2: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + + "Ổ" // U+1ED4: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + + "Ỗ" // U+1ED6: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + + "Ộ" // U+1ED8: LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + + "Ớ" // U+1EDA: LATIN CAPITAL LETTER O WITH HORN AND ACUTE + + "Ờ" // U+1EDC: LATIN CAPITAL LETTER O WITH HORN AND GRAVE + + "Ở" // U+1EDE: LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + + "Ỡ" // U+1EE0: LATIN CAPITAL LETTER O WITH HORN AND TILDE + + "Ợ" // U+1EE2: LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + + "Ⓞ" // U+24C4: CIRCLED LATIN CAPITAL LETTER O + + "Ꝋ" // U+A74A: LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY + + "Ꝍ" // U+A74C: LATIN CAPITAL LETTER O WITH LOOP + + "O" // U+FF2F: FULLWIDTH LATIN CAPITAL LETTER O + ,"O", // Folded result + + "ò" // U+00F2: LATIN SMALL LETTER O WITH GRAVE + + "ó" // U+00F3: LATIN SMALL LETTER O WITH ACUTE + + "ô" // U+00F4: LATIN SMALL LETTER O WITH CIRCUMFLEX + + "õ" // U+00F5: LATIN SMALL LETTER O WITH TILDE + + "ö" // U+00F6: LATIN SMALL LETTER O WITH DIAERESIS + + "ø" // U+00F8: LATIN SMALL LETTER O WITH STROKE + + "ō" // U+014D: LATIN SMALL LETTER O WITH MACRON + + "ŏ" // U+014F: LATIN SMALL LETTER O WITH BREVE + + "ő" // U+0151: LATIN SMALL LETTER O WITH DOUBLE ACUTE + + "ơ" // U+01A1: LATIN SMALL LETTER O WITH HORN + + "ǒ" // U+01D2: LATIN SMALL LETTER O WITH CARON + + "ǫ" // U+01EB: LATIN SMALL LETTER O WITH OGONEK + + "ǭ" // U+01ED: LATIN SMALL LETTER O WITH OGONEK AND MACRON + + "ǿ" // U+01FF: LATIN SMALL LETTER O WITH STROKE AND ACUTE + + "ȍ" // U+020D: LATIN SMALL LETTER O WITH DOUBLE GRAVE + + "ȏ" // U+020F: LATIN SMALL LETTER O WITH INVERTED BREVE + + "ȫ" // U+022B: LATIN SMALL LETTER O WITH DIAERESIS AND MACRON + + "ȭ" // U+022D: LATIN SMALL LETTER O WITH TILDE AND MACRON + + "ȯ" // U+022F: LATIN SMALL LETTER O WITH DOT ABOVE + + "ȱ" // U+0231: LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON + + "ɔ" // U+0254: LATIN SMALL LETTER OPEN O + + "ɵ" // U+0275: LATIN SMALL LETTER BARRED O + + "ᴖ" // U+1D16: LATIN SMALL LETTER TOP HALF O + + "ᴗ" // U+1D17: LATIN SMALL LETTER BOTTOM HALF O + + "ᶗ" // U+1D97: LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK + + "ṍ" // U+1E4D: LATIN SMALL LETTER O WITH TILDE AND ACUTE + + "ṏ" // U+1E4F: LATIN SMALL LETTER O WITH TILDE AND DIAERESIS + + "ṑ" // U+1E51: LATIN SMALL LETTER O WITH MACRON AND GRAVE + + "ṓ" // U+1E53: LATIN SMALL LETTER O WITH MACRON AND ACUTE + + "ọ" // U+1ECD: LATIN SMALL LETTER O WITH DOT BELOW + + "ỏ" // U+1ECF: LATIN SMALL LETTER O WITH HOOK ABOVE + + "ố" // U+1ED1: LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE + + "ồ" // U+1ED3: LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE + + "ổ" // U+1ED5: LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + + "ỗ" // U+1ED7: LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE + + "ộ" // U+1ED9: LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW + + "ớ" // U+1EDB: LATIN SMALL LETTER O WITH HORN AND ACUTE + + "ờ" // U+1EDD: LATIN SMALL LETTER O WITH HORN AND GRAVE + + "ở" // U+1EDF: LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE + + "ỡ" // U+1EE1: LATIN SMALL LETTER O WITH HORN AND TILDE + + "ợ" // U+1EE3: LATIN SMALL LETTER O WITH HORN AND DOT BELOW + + "ₒ" // U+2092: LATIN SUBSCRIPT SMALL LETTER O + + "ⓞ" // U+24DE: CIRCLED LATIN SMALL LETTER O + + "ⱺ" // U+2C7A: LATIN SMALL LETTER O WITH LOW RING INSIDE + + "ꝋ" // U+A74B: LATIN SMALL LETTER O WITH LONG STROKE OVERLAY + + "ꝍ" // U+A74D: LATIN SMALL LETTER O WITH LOOP + + "o" // U+FF4F: FULLWIDTH LATIN SMALL LETTER O + ,"o", // Folded result + + "Œ" // U+0152: LATIN CAPITAL LIGATURE OE + + "ɶ" // U+0276: LATIN LETTER SMALL CAPITAL OE + ,"OE", // Folded result + + "Ꝏ" // U+A74E: LATIN CAPITAL LETTER OO + ,"OO", // Folded result + + "Ȣ" // U+0222: LATIN CAPITAL LETTER OU + + "ᴕ" // U+1D15: LATIN LETTER SMALL CAPITAL OU + ,"OU", // Folded result + + "⒪" // U+24AA: PARENTHESIZED LATIN SMALL LETTER O + ,"(o)", // Folded result + + "œ" // U+0153: LATIN SMALL LIGATURE OE + + "ᴔ" // U+1D14: LATIN SMALL LETTER TURNED OE + ,"oe", // Folded result + + "ꝏ" // U+A74F: LATIN SMALL LETTER OO + ,"oo", // Folded result + + "ȣ" // U+0223: LATIN SMALL LETTER OU + ,"ou", // Folded result + + "Ƥ" // U+01A4: LATIN CAPITAL LETTER P WITH HOOK + + "ᴘ" // U+1D18: LATIN LETTER SMALL CAPITAL P + + "Ṕ" // U+1E54: LATIN CAPITAL LETTER P WITH ACUTE + + "Ṗ" // U+1E56: LATIN CAPITAL LETTER P WITH DOT ABOVE + + "Ⓟ" // U+24C5: CIRCLED LATIN CAPITAL LETTER P + + "Ᵽ" // U+2C63: LATIN CAPITAL LETTER P WITH STROKE + + "Ꝑ" // U+A750: LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER + + "Ꝓ" // U+A752: LATIN CAPITAL LETTER P WITH FLOURISH + + "Ꝕ" // U+A754: LATIN CAPITAL LETTER P WITH SQUIRREL TAIL + + "P" // U+FF30: FULLWIDTH LATIN CAPITAL LETTER P + ,"P", // Folded result + + "ƥ" // U+01A5: LATIN SMALL LETTER P WITH HOOK + + "ᵱ" // U+1D71: LATIN SMALL LETTER P WITH MIDDLE TILDE + + "ᵽ" // U+1D7D: LATIN SMALL LETTER P WITH STROKE + + "ᶈ" // U+1D88: LATIN SMALL LETTER P WITH PALATAL HOOK + + "ṕ" // U+1E55: LATIN SMALL LETTER P WITH ACUTE + + "ṗ" // U+1E57: LATIN SMALL LETTER P WITH DOT ABOVE + + "ⓟ" // U+24DF: CIRCLED LATIN SMALL LETTER P + + "ꝑ" // U+A751: LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER + + "ꝓ" // U+A753: LATIN SMALL LETTER P WITH FLOURISH + + "ꝕ" // U+A755: LATIN SMALL LETTER P WITH SQUIRREL TAIL + + "ꟼ" // U+A7FC: LATIN EPIGRAPHIC LETTER REVERSED P + + "p" // U+FF50: FULLWIDTH LATIN SMALL LETTER P + ,"p", // Folded result + + "⒫" // U+24AB: PARENTHESIZED LATIN SMALL LETTER P + ,"(p)", // Folded result + + "Ɋ" // U+024A: LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL + + "Ⓠ" // U+24C6: CIRCLED LATIN CAPITAL LETTER Q + + "Ꝗ" // U+A756: LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER + + "Ꝙ" // U+A758: LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE + + "Q" // U+FF31: FULLWIDTH LATIN CAPITAL LETTER Q + ,"Q", // Folded result + + "ĸ" // U+0138: LATIN SMALL LETTER KRA + + "ɋ" // U+024B: LATIN SMALL LETTER Q WITH HOOK TAIL + + "ʠ" // U+02A0: LATIN SMALL LETTER Q WITH HOOK + + "ⓠ" // U+24E0: CIRCLED LATIN SMALL LETTER Q + + "ꝗ" // U+A757: LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER + + "ꝙ" // U+A759: LATIN SMALL LETTER Q WITH DIAGONAL STROKE + + "q" // U+FF51: FULLWIDTH LATIN SMALL LETTER Q + ,"q", // Folded result + + "⒬" // U+24AC: PARENTHESIZED LATIN SMALL LETTER Q + ,"(q)", // Folded result + + "ȹ" // U+0239: LATIN SMALL LETTER QP DIGRAPH + ,"qp", // Folded result + + "Ŕ" // U+0154: LATIN CAPITAL LETTER R WITH ACUTE + + "Ŗ" // U+0156: LATIN CAPITAL LETTER R WITH CEDILLA + + "Ř" // U+0158: LATIN CAPITAL LETTER R WITH CARON + + "Ȑ" // U+0210: LATIN CAPITAL LETTER R WITH DOUBLE GRAVE + + "Ȓ" // U+0212: LATIN CAPITAL LETTER R WITH INVERTED BREVE + + "Ɍ" // U+024C: LATIN CAPITAL LETTER R WITH STROKE + + "ʀ" // U+0280: LATIN LETTER SMALL CAPITAL R + + "ʁ" // U+0281: LATIN LETTER SMALL CAPITAL INVERTED R + + "ᴙ" // U+1D19: LATIN LETTER SMALL CAPITAL REVERSED R + + "ᴚ" // U+1D1A: LATIN LETTER SMALL CAPITAL TURNED R + + "Ṙ" // U+1E58: LATIN CAPITAL LETTER R WITH DOT ABOVE + + "Ṛ" // U+1E5A: LATIN CAPITAL LETTER R WITH DOT BELOW + + "Ṝ" // U+1E5C: LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + + "Ṟ" // U+1E5E: LATIN CAPITAL LETTER R WITH LINE BELOW + + "Ⓡ" // U+24C7: CIRCLED LATIN CAPITAL LETTER R + + "Ɽ" // U+2C64: LATIN CAPITAL LETTER R WITH TAIL + + "Ꝛ" // U+A75A: LATIN CAPITAL LETTER R ROTUNDA + + "Ꞃ" // U+A782: LATIN CAPITAL LETTER INSULAR R + + "R" // U+FF32: FULLWIDTH LATIN CAPITAL LETTER R + ,"R", // Folded result + + "ŕ" // U+0155: LATIN SMALL LETTER R WITH ACUTE + + "ŗ" // U+0157: LATIN SMALL LETTER R WITH CEDILLA + + "ř" // U+0159: LATIN SMALL LETTER R WITH CARON + + "ȑ" // U+0211: LATIN SMALL LETTER R WITH DOUBLE GRAVE + + "ȓ" // U+0213: LATIN SMALL LETTER R WITH INVERTED BREVE + + "ɍ" // U+024D: LATIN SMALL LETTER R WITH STROKE + + "ɼ" // U+027C: LATIN SMALL LETTER R WITH LONG LEG + + "ɽ" // U+027D: LATIN SMALL LETTER R WITH TAIL + + "ɾ" // U+027E: LATIN SMALL LETTER R WITH FISHHOOK + + "ɿ" // U+027F: LATIN SMALL LETTER REVERSED R WITH FISHHOOK + + "ᵣ" // U+1D63: LATIN SUBSCRIPT SMALL LETTER R + + "ᵲ" // U+1D72: LATIN SMALL LETTER R WITH MIDDLE TILDE + + "ᵳ" // U+1D73: LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE + + "ᶉ" // U+1D89: LATIN SMALL LETTER R WITH PALATAL HOOK + + "ṙ" // U+1E59: LATIN SMALL LETTER R WITH DOT ABOVE + + "ṛ" // U+1E5B: LATIN SMALL LETTER R WITH DOT BELOW + + "ṝ" // U+1E5D: LATIN SMALL LETTER R WITH DOT BELOW AND MACRON + + "ṟ" // U+1E5F: LATIN SMALL LETTER R WITH LINE BELOW + + "ⓡ" // U+24E1: CIRCLED LATIN SMALL LETTER R + + "ꝛ" // U+A75B: LATIN SMALL LETTER R ROTUNDA + + "ꞃ" // U+A783: LATIN SMALL LETTER INSULAR R + + "r" // U+FF52: FULLWIDTH LATIN SMALL LETTER R + ,"r", // Folded result + + "⒭" // U+24AD: PARENTHESIZED LATIN SMALL LETTER R + ,"(r)", // Folded result + + "Ś" // U+015A: LATIN CAPITAL LETTER S WITH ACUTE + + "Ŝ" // U+015C: LATIN CAPITAL LETTER S WITH CIRCUMFLEX + + "Ş" // U+015E: LATIN CAPITAL LETTER S WITH CEDILLA + + "Š" // U+0160: LATIN CAPITAL LETTER S WITH CARON + + "Ș" // U+0218: LATIN CAPITAL LETTER S WITH COMMA BELOW + + "Ṡ" // U+1E60: LATIN CAPITAL LETTER S WITH DOT ABOVE + + "Ṣ" // U+1E62: LATIN CAPITAL LETTER S WITH DOT BELOW + + "Ṥ" // U+1E64: LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + + "Ṧ" // U+1E66: LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + + "Ṩ" // U+1E68: LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + + "Ⓢ" // U+24C8: CIRCLED LATIN CAPITAL LETTER S + + "ꜱ" // U+A731: LATIN LETTER SMALL CAPITAL S + + "ꞅ" // U+A785: LATIN SMALL LETTER INSULAR S + + "S" // U+FF33: FULLWIDTH LATIN CAPITAL LETTER S + ,"S", // Folded result + + "ś" // U+015B: LATIN SMALL LETTER S WITH ACUTE + + "ŝ" // U+015D: LATIN SMALL LETTER S WITH CIRCUMFLEX + + "ş" // U+015F: LATIN SMALL LETTER S WITH CEDILLA + + "š" // U+0161: LATIN SMALL LETTER S WITH CARON + + "ſ" // U+017F: LATIN SMALL LETTER LONG S + + "ș" // U+0219: LATIN SMALL LETTER S WITH COMMA BELOW + + "ȿ" // U+023F: LATIN SMALL LETTER S WITH SWASH TAIL + + "ʂ" // U+0282: LATIN SMALL LETTER S WITH HOOK + + "ᵴ" // U+1D74: LATIN SMALL LETTER S WITH MIDDLE TILDE + + "ᶊ" // U+1D8A: LATIN SMALL LETTER S WITH PALATAL HOOK + + "ṡ" // U+1E61: LATIN SMALL LETTER S WITH DOT ABOVE + + "ṣ" // U+1E63: LATIN SMALL LETTER S WITH DOT BELOW + + "ṥ" // U+1E65: LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE + + "ṧ" // U+1E67: LATIN SMALL LETTER S WITH CARON AND DOT ABOVE + + "ṩ" // U+1E69: LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE + + "ẜ" // U+1E9C: LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE + + "ẝ" // U+1E9D: LATIN SMALL LETTER LONG S WITH HIGH STROKE + + "ⓢ" // U+24E2: CIRCLED LATIN SMALL LETTER S + + "Ꞅ" // U+A784: LATIN CAPITAL LETTER INSULAR S + + "s" // U+FF53: FULLWIDTH LATIN SMALL LETTER S + ,"s", // Folded result + + "ẞ" // U+1E9E: LATIN CAPITAL LETTER SHARP S + ,"SS", // Folded result + + "⒮" // U+24AE: PARENTHESIZED LATIN SMALL LETTER S + ,"(s)", // Folded result + + "ß" // U+00DF: LATIN SMALL LETTER SHARP S + ,"ss", // Folded result + + "st" // U+FB06: LATIN SMALL LIGATURE ST + ,"st", // Folded result + + "Ţ" // U+0162: LATIN CAPITAL LETTER T WITH CEDILLA + + "Ť" // U+0164: LATIN CAPITAL LETTER T WITH CARON + + "Ŧ" // U+0166: LATIN CAPITAL LETTER T WITH STROKE + + "Ƭ" // U+01AC: LATIN CAPITAL LETTER T WITH HOOK + + "Ʈ" // U+01AE: LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + + "Ț" // U+021A: LATIN CAPITAL LETTER T WITH COMMA BELOW + + "Ⱦ" // U+023E: LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + + "ᴛ" // U+1D1B: LATIN LETTER SMALL CAPITAL T + + "Ṫ" // U+1E6A: LATIN CAPITAL LETTER T WITH DOT ABOVE + + "Ṭ" // U+1E6C: LATIN CAPITAL LETTER T WITH DOT BELOW + + "Ṯ" // U+1E6E: LATIN CAPITAL LETTER T WITH LINE BELOW + + "Ṱ" // U+1E70: LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + + "Ⓣ" // U+24C9: CIRCLED LATIN CAPITAL LETTER T + + "Ꞇ" // U+A786: LATIN CAPITAL LETTER INSULAR T + + "T" // U+FF34: FULLWIDTH LATIN CAPITAL LETTER T + ,"T", // Folded result + + "ţ" // U+0163: LATIN SMALL LETTER T WITH CEDILLA + + "ť" // U+0165: LATIN SMALL LETTER T WITH CARON + + "ŧ" // U+0167: LATIN SMALL LETTER T WITH STROKE + + "ƫ" // U+01AB: LATIN SMALL LETTER T WITH PALATAL HOOK + + "ƭ" // U+01AD: LATIN SMALL LETTER T WITH HOOK + + "ț" // U+021B: LATIN SMALL LETTER T WITH COMMA BELOW + + "ȶ" // U+0236: LATIN SMALL LETTER T WITH CURL + + "ʇ" // U+0287: LATIN SMALL LETTER TURNED T + + "ʈ" // U+0288: LATIN SMALL LETTER T WITH RETROFLEX HOOK + + "ᵵ" // U+1D75: LATIN SMALL LETTER T WITH MIDDLE TILDE + + "ṫ" // U+1E6B: LATIN SMALL LETTER T WITH DOT ABOVE + + "ṭ" // U+1E6D: LATIN SMALL LETTER T WITH DOT BELOW + + "ṯ" // U+1E6F: LATIN SMALL LETTER T WITH LINE BELOW + + "ṱ" // U+1E71: LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW + + "ẗ" // U+1E97: LATIN SMALL LETTER T WITH DIAERESIS + + "ⓣ" // U+24E3: CIRCLED LATIN SMALL LETTER T + + "ⱦ" // U+2C66: LATIN SMALL LETTER T WITH DIAGONAL STROKE + + "t" // U+FF54: FULLWIDTH LATIN SMALL LETTER T + ,"t", // Folded result + + "Þ" // U+00DE: LATIN CAPITAL LETTER THORN + + "Ꝧ" // U+A766: LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER + ,"TH", // Folded result + + "Ꜩ" // U+A728: LATIN CAPITAL LETTER TZ + ,"TZ", // Folded result + + "⒯" // U+24AF: PARENTHESIZED LATIN SMALL LETTER T + ,"(t)", // Folded result + + "ʨ" // U+02A8: LATIN SMALL LETTER TC DIGRAPH WITH CURL + ,"tc", // Folded result + + "þ" // U+00FE: LATIN SMALL LETTER THORN + + "ᵺ" // U+1D7A: LATIN SMALL LETTER TH WITH STRIKETHROUGH + + "ꝧ" // U+A767: LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER + ,"th", // Folded result + + "ʦ" // U+02A6: LATIN SMALL LETTER TS DIGRAPH + ,"ts", // Folded result + + "ꜩ" // U+A729: LATIN SMALL LETTER TZ + ,"tz", // Folded result + + "Ù" // U+00D9: LATIN CAPITAL LETTER U WITH GRAVE + + "Ú" // U+00DA: LATIN CAPITAL LETTER U WITH ACUTE + + "Û" // U+00DB: LATIN CAPITAL LETTER U WITH CIRCUMFLEX + + "Ü" // U+00DC: LATIN CAPITAL LETTER U WITH DIAERESIS + + "Ũ" // U+0168: LATIN CAPITAL LETTER U WITH TILDE + + "Ū" // U+016A: LATIN CAPITAL LETTER U WITH MACRON + + "Ŭ" // U+016C: LATIN CAPITAL LETTER U WITH BREVE + + "Ů" // U+016E: LATIN CAPITAL LETTER U WITH RING ABOVE + + "Ű" // U+0170: LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + + "Ų" // U+0172: LATIN CAPITAL LETTER U WITH OGONEK + + "Ư" // U+01AF: LATIN CAPITAL LETTER U WITH HORN + + "Ǔ" // U+01D3: LATIN CAPITAL LETTER U WITH CARON + + "Ǖ" // U+01D5: LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON + + "Ǘ" // U+01D7: LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE + + "Ǚ" // U+01D9: LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON + + "Ǜ" // U+01DB: LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE + + "Ȕ" // U+0214: LATIN CAPITAL LETTER U WITH DOUBLE GRAVE + + "Ȗ" // U+0216: LATIN CAPITAL LETTER U WITH INVERTED BREVE + + "Ʉ" // U+0244: LATIN CAPITAL LETTER U BAR + + "ᴜ" // U+1D1C: LATIN LETTER SMALL CAPITAL U + + "ᵾ" // U+1D7E: LATIN SMALL CAPITAL LETTER U WITH STROKE + + "Ṳ" // U+1E72: LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + + "Ṵ" // U+1E74: LATIN CAPITAL LETTER U WITH TILDE BELOW + + "Ṷ" // U+1E76: LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + + "Ṹ" // U+1E78: LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + + "Ṻ" // U+1E7A: LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + + "Ụ" // U+1EE4: LATIN CAPITAL LETTER U WITH DOT BELOW + + "Ủ" // U+1EE6: LATIN CAPITAL LETTER U WITH HOOK ABOVE + + "Ứ" // U+1EE8: LATIN CAPITAL LETTER U WITH HORN AND ACUTE + + "Ừ" // U+1EEA: LATIN CAPITAL LETTER U WITH HORN AND GRAVE + + "Ử" // U+1EEC: LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + + "Ữ" // U+1EEE: LATIN CAPITAL LETTER U WITH HORN AND TILDE + + "Ự" // U+1EF0: LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + + "Ⓤ" // U+24CA: CIRCLED LATIN CAPITAL LETTER U + + "U" // U+FF35: FULLWIDTH LATIN CAPITAL LETTER U + ,"U", // Folded result + + "ù" // U+00F9: LATIN SMALL LETTER U WITH GRAVE + + "ú" // U+00FA: LATIN SMALL LETTER U WITH ACUTE + + "û" // U+00FB: LATIN SMALL LETTER U WITH CIRCUMFLEX + + "ü" // U+00FC: LATIN SMALL LETTER U WITH DIAERESIS + + "ũ" // U+0169: LATIN SMALL LETTER U WITH TILDE + + "ū" // U+016B: LATIN SMALL LETTER U WITH MACRON + + "ŭ" // U+016D: LATIN SMALL LETTER U WITH BREVE + + "ů" // U+016F: LATIN SMALL LETTER U WITH RING ABOVE + + "ű" // U+0171: LATIN SMALL LETTER U WITH DOUBLE ACUTE + + "ų" // U+0173: LATIN SMALL LETTER U WITH OGONEK + + "ư" // U+01B0: LATIN SMALL LETTER U WITH HORN + + "ǔ" // U+01D4: LATIN SMALL LETTER U WITH CARON + + "ǖ" // U+01D6: LATIN SMALL LETTER U WITH DIAERESIS AND MACRON + + "ǘ" // U+01D8: LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE + + "ǚ" // U+01DA: LATIN SMALL LETTER U WITH DIAERESIS AND CARON + + "ǜ" // U+01DC: LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE + + "ȕ" // U+0215: LATIN SMALL LETTER U WITH DOUBLE GRAVE + + "ȗ" // U+0217: LATIN SMALL LETTER U WITH INVERTED BREVE + + "ʉ" // U+0289: LATIN SMALL LETTER U BAR + + "ᵤ" // U+1D64: LATIN SUBSCRIPT SMALL LETTER U + + "ᶙ" // U+1D99: LATIN SMALL LETTER U WITH RETROFLEX HOOK + + "ṳ" // U+1E73: LATIN SMALL LETTER U WITH DIAERESIS BELOW + + "ṵ" // U+1E75: LATIN SMALL LETTER U WITH TILDE BELOW + + "ṷ" // U+1E77: LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW + + "ṹ" // U+1E79: LATIN SMALL LETTER U WITH TILDE AND ACUTE + + "ṻ" // U+1E7B: LATIN SMALL LETTER U WITH MACRON AND DIAERESIS + + "ụ" // U+1EE5: LATIN SMALL LETTER U WITH DOT BELOW + + "ủ" // U+1EE7: LATIN SMALL LETTER U WITH HOOK ABOVE + + "ứ" // U+1EE9: LATIN SMALL LETTER U WITH HORN AND ACUTE + + "ừ" // U+1EEB: LATIN SMALL LETTER U WITH HORN AND GRAVE + + "ử" // U+1EED: LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE + + "ữ" // U+1EEF: LATIN SMALL LETTER U WITH HORN AND TILDE + + "ự" // U+1EF1: LATIN SMALL LETTER U WITH HORN AND DOT BELOW + + "ⓤ" // U+24E4: CIRCLED LATIN SMALL LETTER U + + "u" // U+FF55: FULLWIDTH LATIN SMALL LETTER U + ,"u", // Folded result + + "⒰" // U+24B0: PARENTHESIZED LATIN SMALL LETTER U + ,"(u)", // Folded result + + "ᵫ" // U+1D6B: LATIN SMALL LETTER UE + ,"ue", // Folded result + + "Ʋ" // U+01B2: LATIN CAPITAL LETTER V WITH HOOK + + "Ʌ" // U+0245: LATIN CAPITAL LETTER TURNED V + + "ᴠ" // U+1D20: LATIN LETTER SMALL CAPITAL V + + "Ṽ" // U+1E7C: LATIN CAPITAL LETTER V WITH TILDE + + "Ṿ" // U+1E7E: LATIN CAPITAL LETTER V WITH DOT BELOW + + "Ỽ" // U+1EFC: LATIN CAPITAL LETTER MIDDLE-WELSH V + + "Ⓥ" // U+24CB: CIRCLED LATIN CAPITAL LETTER V + + "Ꝟ" // U+A75E: LATIN CAPITAL LETTER V WITH DIAGONAL STROKE + + "Ꝩ" // U+A768: LATIN CAPITAL LETTER VEND + + "V" // U+FF36: FULLWIDTH LATIN CAPITAL LETTER V + ,"V", // Folded result + + "ʋ" // U+028B: LATIN SMALL LETTER V WITH HOOK + + "ʌ" // U+028C: LATIN SMALL LETTER TURNED V + + "ᵥ" // U+1D65: LATIN SUBSCRIPT SMALL LETTER V + + "ᶌ" // U+1D8C: LATIN SMALL LETTER V WITH PALATAL HOOK + + "ṽ" // U+1E7D: LATIN SMALL LETTER V WITH TILDE + + "ṿ" // U+1E7F: LATIN SMALL LETTER V WITH DOT BELOW + + "ⓥ" // U+24E5: CIRCLED LATIN SMALL LETTER V + + "ⱱ" // U+2C71: LATIN SMALL LETTER V WITH RIGHT HOOK + + "ⱴ" // U+2C74: LATIN SMALL LETTER V WITH CURL + + "ꝟ" // U+A75F: LATIN SMALL LETTER V WITH DIAGONAL STROKE + + "v" // U+FF56: FULLWIDTH LATIN SMALL LETTER V + ,"v", // Folded result + + "Ꝡ" // U+A760: LATIN CAPITAL LETTER VY + ,"VY", // Folded result + + "⒱" // U+24B1: PARENTHESIZED LATIN SMALL LETTER V + ,"(v)", // Folded result + + "ꝡ" // U+A761: LATIN SMALL LETTER VY + ,"vy", // Folded result + + "Ŵ" // U+0174: LATIN CAPITAL LETTER W WITH CIRCUMFLEX + + "Ƿ" // U+01F7: LATIN CAPITAL LETTER WYNN + + "ᴡ" // U+1D21: LATIN LETTER SMALL CAPITAL W + + "Ẁ" // U+1E80: LATIN CAPITAL LETTER W WITH GRAVE + + "Ẃ" // U+1E82: LATIN CAPITAL LETTER W WITH ACUTE + + "Ẅ" // U+1E84: LATIN CAPITAL LETTER W WITH DIAERESIS + + "Ẇ" // U+1E86: LATIN CAPITAL LETTER W WITH DOT ABOVE + + "Ẉ" // U+1E88: LATIN CAPITAL LETTER W WITH DOT BELOW + + "Ⓦ" // U+24CC: CIRCLED LATIN CAPITAL LETTER W + + "Ⱳ" // U+2C72: LATIN CAPITAL LETTER W WITH HOOK + + "W" // U+FF37: FULLWIDTH LATIN CAPITAL LETTER W + ,"W", // Folded result + + "ŵ" // U+0175: LATIN SMALL LETTER W WITH CIRCUMFLEX + + "ƿ" // U+01BF: LATIN LETTER WYNN + + "ʍ" // U+028D: LATIN SMALL LETTER TURNED W + + "ẁ" // U+1E81: LATIN SMALL LETTER W WITH GRAVE + + "ẃ" // U+1E83: LATIN SMALL LETTER W WITH ACUTE + + "ẅ" // U+1E85: LATIN SMALL LETTER W WITH DIAERESIS + + "ẇ" // U+1E87: LATIN SMALL LETTER W WITH DOT ABOVE + + "ẉ" // U+1E89: LATIN SMALL LETTER W WITH DOT BELOW + + "ẘ" // U+1E98: LATIN SMALL LETTER W WITH RING ABOVE + + "ⓦ" // U+24E6: CIRCLED LATIN SMALL LETTER W + + "ⱳ" // U+2C73: LATIN SMALL LETTER W WITH HOOK + + "w" // U+FF57: FULLWIDTH LATIN SMALL LETTER W + ,"w", // Folded result + + "⒲" // U+24B2: PARENTHESIZED LATIN SMALL LETTER W + ,"(w)", // Folded result + + "Ẋ" // U+1E8A: LATIN CAPITAL LETTER X WITH DOT ABOVE + + "Ẍ" // U+1E8C: LATIN CAPITAL LETTER X WITH DIAERESIS + + "Ⓧ" // U+24CD: CIRCLED LATIN CAPITAL LETTER X + + "X" // U+FF38: FULLWIDTH LATIN CAPITAL LETTER X + ,"X", // Folded result + + "ᶍ" // U+1D8D: LATIN SMALL LETTER X WITH PALATAL HOOK + + "ẋ" // U+1E8B: LATIN SMALL LETTER X WITH DOT ABOVE + + "ẍ" // U+1E8D: LATIN SMALL LETTER X WITH DIAERESIS + + "ₓ" // U+2093: LATIN SUBSCRIPT SMALL LETTER X + + "ⓧ" // U+24E7: CIRCLED LATIN SMALL LETTER X + + "x" // U+FF58: FULLWIDTH LATIN SMALL LETTER X + ,"x", // Folded result + + "⒳" // U+24B3: PARENTHESIZED LATIN SMALL LETTER X + ,"(x)", // Folded result + + "Ý" // U+00DD: LATIN CAPITAL LETTER Y WITH ACUTE + + "Ŷ" // U+0176: LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + + "Ÿ" // U+0178: LATIN CAPITAL LETTER Y WITH DIAERESIS + + "Ƴ" // U+01B3: LATIN CAPITAL LETTER Y WITH HOOK + + "Ȳ" // U+0232: LATIN CAPITAL LETTER Y WITH MACRON + + "Ɏ" // U+024E: LATIN CAPITAL LETTER Y WITH STROKE + + "ʏ" // U+028F: LATIN LETTER SMALL CAPITAL Y + + "Ẏ" // U+1E8E: LATIN CAPITAL LETTER Y WITH DOT ABOVE + + "Ỳ" // U+1EF2: LATIN CAPITAL LETTER Y WITH GRAVE + + "Ỵ" // U+1EF4: LATIN CAPITAL LETTER Y WITH DOT BELOW + + "Ỷ" // U+1EF6: LATIN CAPITAL LETTER Y WITH HOOK ABOVE + + "Ỹ" // U+1EF8: LATIN CAPITAL LETTER Y WITH TILDE + + "Ỿ" // U+1EFE: LATIN CAPITAL LETTER Y WITH LOOP + + "Ⓨ" // U+24CE: CIRCLED LATIN CAPITAL LETTER Y + + "Y" // U+FF39: FULLWIDTH LATIN CAPITAL LETTER Y + ,"Y", // Folded result + + "ý" // U+00FD: LATIN SMALL LETTER Y WITH ACUTE + + "ÿ" // U+00FF: LATIN SMALL LETTER Y WITH DIAERESIS + + "ŷ" // U+0177: LATIN SMALL LETTER Y WITH CIRCUMFLEX + + "ƴ" // U+01B4: LATIN SMALL LETTER Y WITH HOOK + + "ȳ" // U+0233: LATIN SMALL LETTER Y WITH MACRON + + "ɏ" // U+024F: LATIN SMALL LETTER Y WITH STROKE + + "ʎ" // U+028E: LATIN SMALL LETTER TURNED Y + + "ẏ" // U+1E8F: LATIN SMALL LETTER Y WITH DOT ABOVE + + "ẙ" // U+1E99: LATIN SMALL LETTER Y WITH RING ABOVE + + "ỳ" // U+1EF3: LATIN SMALL LETTER Y WITH GRAVE + + "ỵ" // U+1EF5: LATIN SMALL LETTER Y WITH DOT BELOW + + "ỷ" // U+1EF7: LATIN SMALL LETTER Y WITH HOOK ABOVE + + "ỹ" // U+1EF9: LATIN SMALL LETTER Y WITH TILDE + + "ỿ" // U+1EFF: LATIN SMALL LETTER Y WITH LOOP + + "ⓨ" // U+24E8: CIRCLED LATIN SMALL LETTER Y + + "y" // U+FF59: FULLWIDTH LATIN SMALL LETTER Y + ,"y", // Folded result + + "⒴" // U+24B4: PARENTHESIZED LATIN SMALL LETTER Y + ,"(y)", // Folded result + + "Ź" // U+0179: LATIN CAPITAL LETTER Z WITH ACUTE + + "Ż" // U+017B: LATIN CAPITAL LETTER Z WITH DOT ABOVE + + "Ž" // U+017D: LATIN CAPITAL LETTER Z WITH CARON + + "Ƶ" // U+01B5: LATIN CAPITAL LETTER Z WITH STROKE + + "Ȝ" // U+021C: LATIN CAPITAL LETTER YOGH + + "Ȥ" // U+0224: LATIN CAPITAL LETTER Z WITH HOOK + + "ᴢ" // U+1D22: LATIN LETTER SMALL CAPITAL Z + + "Ẑ" // U+1E90: LATIN CAPITAL LETTER Z WITH CIRCUMFLEX + + "Ẓ" // U+1E92: LATIN CAPITAL LETTER Z WITH DOT BELOW + + "Ẕ" // U+1E94: LATIN CAPITAL LETTER Z WITH LINE BELOW + + "Ⓩ" // U+24CF: CIRCLED LATIN CAPITAL LETTER Z + + "Ⱬ" // U+2C6B: LATIN CAPITAL LETTER Z WITH DESCENDER + + "Ꝣ" // U+A762: LATIN CAPITAL LETTER VISIGOTHIC Z + + "Z" // U+FF3A: FULLWIDTH LATIN CAPITAL LETTER Z + ,"Z", // Folded result + + "ź" // U+017A: LATIN SMALL LETTER Z WITH ACUTE + + "ż" // U+017C: LATIN SMALL LETTER Z WITH DOT ABOVE + + "ž" // U+017E: LATIN SMALL LETTER Z WITH CARON + + "ƶ" // U+01B6: LATIN SMALL LETTER Z WITH STROKE + + "ȝ" // U+021D: LATIN SMALL LETTER YOGH + + "ȥ" // U+0225: LATIN SMALL LETTER Z WITH HOOK + + "ɀ" // U+0240: LATIN SMALL LETTER Z WITH SWASH TAIL + + "ʐ" // U+0290: LATIN SMALL LETTER Z WITH RETROFLEX HOOK + + "ʑ" // U+0291: LATIN SMALL LETTER Z WITH CURL + + "ᵶ" // U+1D76: LATIN SMALL LETTER Z WITH MIDDLE TILDE + + "ᶎ" // U+1D8E: LATIN SMALL LETTER Z WITH PALATAL HOOK + + "ẑ" // U+1E91: LATIN SMALL LETTER Z WITH CIRCUMFLEX + + "ẓ" // U+1E93: LATIN SMALL LETTER Z WITH DOT BELOW + + "ẕ" // U+1E95: LATIN SMALL LETTER Z WITH LINE BELOW + + "ⓩ" // U+24E9: CIRCLED LATIN SMALL LETTER Z + + "ⱬ" // U+2C6C: LATIN SMALL LETTER Z WITH DESCENDER + + "ꝣ" // U+A763: LATIN SMALL LETTER VISIGOTHIC Z + + "z" // U+FF5A: FULLWIDTH LATIN SMALL LETTER Z + ,"z", // Folded result + + "⒵" // U+24B5: PARENTHESIZED LATIN SMALL LETTER Z + ,"(z)", // Folded result + + "⁰" // U+2070: SUPERSCRIPT ZERO + + "₀" // U+2080: SUBSCRIPT ZERO + + "⓪" // U+24EA: CIRCLED DIGIT ZERO + + "⓿" // U+24FF: NEGATIVE CIRCLED DIGIT ZERO + + "0" // U+FF10: FULLWIDTH DIGIT ZERO + ,"0", // Folded result + + "¹" // U+00B9: SUPERSCRIPT ONE + + "₁" // U+2081: SUBSCRIPT ONE + + "①" // U+2460: CIRCLED DIGIT ONE + + "⓵" // U+24F5: DOUBLE CIRCLED DIGIT ONE + + "❶" // U+2776: DINGBAT NEGATIVE CIRCLED DIGIT ONE + + "➀" // U+2780: DINGBAT CIRCLED SANS-SERIF DIGIT ONE + + "➊" // U+278A: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE + + "1" // U+FF11: FULLWIDTH DIGIT ONE + ,"1", // Folded result + + "⒈" // U+2488: DIGIT ONE FULL STOP + ,"1.", // Folded result + + "⑴" // U+2474: PARENTHESIZED DIGIT ONE + ,"(1)", // Folded result + + "²" // U+00B2: SUPERSCRIPT TWO + + "₂" // U+2082: SUBSCRIPT TWO + + "②" // U+2461: CIRCLED DIGIT TWO + + "⓶" // U+24F6: DOUBLE CIRCLED DIGIT TWO + + "❷" // U+2777: DINGBAT NEGATIVE CIRCLED DIGIT TWO + + "➁" // U+2781: DINGBAT CIRCLED SANS-SERIF DIGIT TWO + + "➋" // U+278B: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO + + "2" // U+FF12: FULLWIDTH DIGIT TWO + ,"2", // Folded result + + "⒉" // U+2489: DIGIT TWO FULL STOP + ,"2.", // Folded result + + "⑵" // U+2475: PARENTHESIZED DIGIT TWO + ,"(2)", // Folded result + + "³" // U+00B3: SUPERSCRIPT THREE + + "₃" // U+2083: SUBSCRIPT THREE + + "③" // U+2462: CIRCLED DIGIT THREE + + "⓷" // U+24F7: DOUBLE CIRCLED DIGIT THREE + + "❸" // U+2778: DINGBAT NEGATIVE CIRCLED DIGIT THREE + + "➂" // U+2782: DINGBAT CIRCLED SANS-SERIF DIGIT THREE + + "➌" // U+278C: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE + + "3" // U+FF13: FULLWIDTH DIGIT THREE + ,"3", // Folded result + + "⒊" // U+248A: DIGIT THREE FULL STOP + ,"3.", // Folded result + + "⑶" // U+2476: PARENTHESIZED DIGIT THREE + ,"(3)", // Folded result + + "⁴" // U+2074: SUPERSCRIPT FOUR + + "₄" // U+2084: SUBSCRIPT FOUR + + "④" // U+2463: CIRCLED DIGIT FOUR + + "⓸" // U+24F8: DOUBLE CIRCLED DIGIT FOUR + + "❹" // U+2779: DINGBAT NEGATIVE CIRCLED DIGIT FOUR + + "➃" // U+2783: DINGBAT CIRCLED SANS-SERIF DIGIT FOUR + + "➍" // U+278D: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR + + "4" // U+FF14: FULLWIDTH DIGIT FOUR + ,"4", // Folded result + + "⒋" // U+248B: DIGIT FOUR FULL STOP + ,"4.", // Folded result + + "⑷" // U+2477: PARENTHESIZED DIGIT FOUR + ,"(4)", // Folded result + + "⁵" // U+2075: SUPERSCRIPT FIVE + + "₅" // U+2085: SUBSCRIPT FIVE + + "⑤" // U+2464: CIRCLED DIGIT FIVE + + "⓹" // U+24F9: DOUBLE CIRCLED DIGIT FIVE + + "❺" // U+277A: DINGBAT NEGATIVE CIRCLED DIGIT FIVE + + "➄" // U+2784: DINGBAT CIRCLED SANS-SERIF DIGIT FIVE + + "➎" // U+278E: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE + + "5" // U+FF15: FULLWIDTH DIGIT FIVE + ,"5", // Folded result + + "⒌" // U+248C: DIGIT FIVE FULL STOP + ,"5.", // Folded result + + "⑸" // U+2478: PARENTHESIZED DIGIT FIVE + ,"(5)", // Folded result + + "⁶" // U+2076: SUPERSCRIPT SIX + + "₆" // U+2086: SUBSCRIPT SIX + + "⑥" // U+2465: CIRCLED DIGIT SIX + + "⓺" // U+24FA: DOUBLE CIRCLED DIGIT SIX + + "❻" // U+277B: DINGBAT NEGATIVE CIRCLED DIGIT SIX + + "➅" // U+2785: DINGBAT CIRCLED SANS-SERIF DIGIT SIX + + "➏" // U+278F: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX + + "6" // U+FF16: FULLWIDTH DIGIT SIX + ,"6", // Folded result + + "⒍" // U+248D: DIGIT SIX FULL STOP + ,"6.", // Folded result + + "⑹" // U+2479: PARENTHESIZED DIGIT SIX + ,"(6)", // Folded result + + "⁷" // U+2077: SUPERSCRIPT SEVEN + + "₇" // U+2087: SUBSCRIPT SEVEN + + "⑦" // U+2466: CIRCLED DIGIT SEVEN + + "⓻" // U+24FB: DOUBLE CIRCLED DIGIT SEVEN + + "❼" // U+277C: DINGBAT NEGATIVE CIRCLED DIGIT SEVEN + + "➆" // U+2786: DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN + + "➐" // U+2790: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN + + "7" // U+FF17: FULLWIDTH DIGIT SEVEN + ,"7", // Folded result + + "⒎" // U+248E: DIGIT SEVEN FULL STOP + ,"7.", // Folded result + + "⑺" // U+247A: PARENTHESIZED DIGIT SEVEN + ,"(7)", // Folded result + + "⁸" // U+2078: SUPERSCRIPT EIGHT + + "₈" // U+2088: SUBSCRIPT EIGHT + + "⑧" // U+2467: CIRCLED DIGIT EIGHT + + "⓼" // U+24FC: DOUBLE CIRCLED DIGIT EIGHT + + "❽" // U+277D: DINGBAT NEGATIVE CIRCLED DIGIT EIGHT + + "➇" // U+2787: DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT + + "➑" // U+2791: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT + + "8" // U+FF18: FULLWIDTH DIGIT EIGHT + ,"8", // Folded result + + "⒏" // U+248F: DIGIT EIGHT FULL STOP + ,"8.", // Folded result + + "⑻" // U+247B: PARENTHESIZED DIGIT EIGHT + ,"(8)", // Folded result + + "⁹" // U+2079: SUPERSCRIPT NINE + + "₉" // U+2089: SUBSCRIPT NINE + + "⑨" // U+2468: CIRCLED DIGIT NINE + + "⓽" // U+24FD: DOUBLE CIRCLED DIGIT NINE + + "❾" // U+277E: DINGBAT NEGATIVE CIRCLED DIGIT NINE + + "➈" // U+2788: DINGBAT CIRCLED SANS-SERIF DIGIT NINE + + "➒" // U+2792: DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE + + "9" // U+FF19: FULLWIDTH DIGIT NINE + ,"9", // Folded result + + "⒐" // U+2490: DIGIT NINE FULL STOP + ,"9.", // Folded result + + "⑼" // U+247C: PARENTHESIZED DIGIT NINE + ,"(9)", // Folded result + + "⑩" // U+2469: CIRCLED NUMBER TEN + + "⓾" // U+24FE: DOUBLE CIRCLED NUMBER TEN + + "❿" // U+277F: DINGBAT NEGATIVE CIRCLED NUMBER TEN + + "➉" // U+2789: DINGBAT CIRCLED SANS-SERIF NUMBER TEN + + "➓" // U+2793: DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN + ,"10", // Folded result + + "⒑" // U+2491: NUMBER TEN FULL STOP + ,"10.", // Folded result + + "⑽" // U+247D: PARENTHESIZED NUMBER TEN + ,"(10)", // Folded result + + "⑪" // U+246A: CIRCLED NUMBER ELEVEN + + "⓫" // U+24EB: NEGATIVE CIRCLED NUMBER ELEVEN + ,"11", // Folded result + + "⒒" // U+2492: NUMBER ELEVEN FULL STOP + ,"11.", // Folded result + + "⑾" // U+247E: PARENTHESIZED NUMBER ELEVEN + ,"(11)", // Folded result + + "⑫" // U+246B: CIRCLED NUMBER TWELVE + + "⓬" // U+24EC: NEGATIVE CIRCLED NUMBER TWELVE + ,"12", // Folded result + + "⒓" // U+2493: NUMBER TWELVE FULL STOP + ,"12.", // Folded result + + "⑿" // U+247F: PARENTHESIZED NUMBER TWELVE + ,"(12)", // Folded result + + "⑬" // U+246C: CIRCLED NUMBER THIRTEEN + + "⓭" // U+24ED: NEGATIVE CIRCLED NUMBER THIRTEEN + ,"13", // Folded result + + "⒔" // U+2494: NUMBER THIRTEEN FULL STOP + ,"13.", // Folded result + + "⒀" // U+2480: PARENTHESIZED NUMBER THIRTEEN + ,"(13)", // Folded result + + "⑭" // U+246D: CIRCLED NUMBER FOURTEEN + + "⓮" // U+24EE: NEGATIVE CIRCLED NUMBER FOURTEEN + ,"14", // Folded result + + "⒕" // U+2495: NUMBER FOURTEEN FULL STOP + ,"14.", // Folded result + + "⒁" // U+2481: PARENTHESIZED NUMBER FOURTEEN + ,"(14)", // Folded result + + "⑮" // U+246E: CIRCLED NUMBER FIFTEEN + + "⓯" // U+24EF: NEGATIVE CIRCLED NUMBER FIFTEEN + ,"15", // Folded result + + "⒖" // U+2496: NUMBER FIFTEEN FULL STOP + ,"15.", // Folded result + + "⒂" // U+2482: PARENTHESIZED NUMBER FIFTEEN + ,"(15)", // Folded result + + "⑯" // U+246F: CIRCLED NUMBER SIXTEEN + + "⓰" // U+24F0: NEGATIVE CIRCLED NUMBER SIXTEEN + ,"16", // Folded result + + "⒗" // U+2497: NUMBER SIXTEEN FULL STOP + ,"16.", // Folded result + + "⒃" // U+2483: PARENTHESIZED NUMBER SIXTEEN + ,"(16)", // Folded result + + "⑰" // U+2470: CIRCLED NUMBER SEVENTEEN + + "⓱" // U+24F1: NEGATIVE CIRCLED NUMBER SEVENTEEN + ,"17", // Folded result + + "⒘" // U+2498: NUMBER SEVENTEEN FULL STOP + ,"17.", // Folded result + + "⒄" // U+2484: PARENTHESIZED NUMBER SEVENTEEN + ,"(17)", // Folded result + + "⑱" // U+2471: CIRCLED NUMBER EIGHTEEN + + "⓲" // U+24F2: NEGATIVE CIRCLED NUMBER EIGHTEEN + ,"18", // Folded result + + "⒙" // U+2499: NUMBER EIGHTEEN FULL STOP + ,"18.", // Folded result + + "⒅" // U+2485: PARENTHESIZED NUMBER EIGHTEEN + ,"(18)", // Folded result + + "⑲" // U+2472: CIRCLED NUMBER NINETEEN + + "⓳" // U+24F3: NEGATIVE CIRCLED NUMBER NINETEEN + ,"19", // Folded result + + "⒚" // U+249A: NUMBER NINETEEN FULL STOP + ,"19.", // Folded result + + "⒆" // U+2486: PARENTHESIZED NUMBER NINETEEN + ,"(19)", // Folded result + + "⑳" // U+2473: CIRCLED NUMBER TWENTY + + "⓴" // U+24F4: NEGATIVE CIRCLED NUMBER TWENTY + ,"20", // Folded result + + "⒛" // U+249B: NUMBER TWENTY FULL STOP + ,"20.", // Folded result + + "⒇" // U+2487: PARENTHESIZED NUMBER TWENTY + ,"(20)", // Folded result + + "«" // U+00AB: LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + + "»" // U+00BB: RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + + "“" // U+201C: LEFT DOUBLE QUOTATION MARK + + "”" // U+201D: RIGHT DOUBLE QUOTATION MARK + + "„" // U+201E: DOUBLE LOW-9 QUOTATION MARK + + "″" // U+2033: DOUBLE PRIME + + "‶" // U+2036: REVERSED DOUBLE PRIME + + "❝" // U+275D: HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT + + "❞" // U+275E: HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT + + "❮" // U+276E: HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT + + "❯" // U+276F: HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT + + """ // U+FF02: FULLWIDTH QUOTATION MARK + ,"\"", // Folded result + + "‘" // U+2018: LEFT SINGLE QUOTATION MARK + + "’" // U+2019: RIGHT SINGLE QUOTATION MARK + + "‚" // U+201A: SINGLE LOW-9 QUOTATION MARK + + "‛" // U+201B: SINGLE HIGH-REVERSED-9 QUOTATION MARK + + "′" // U+2032: PRIME + + "‵" // U+2035: REVERSED PRIME + + "‹" // U+2039: SINGLE LEFT-POINTING ANGLE QUOTATION MARK + + "›" // U+203A: SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + + "❛" // U+275B: HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT + + "❜" // U+275C: HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT + + "'" // U+FF07: FULLWIDTH APOSTROPHE + ,"'", // Folded result + + "‐" // U+2010: HYPHEN + + "‑" // U+2011: NON-BREAKING HYPHEN + + "‒" // U+2012: FIGURE DASH + + "–" // U+2013: EN DASH + + "—" // U+2014: EM DASH + + "⁻" // U+207B: SUPERSCRIPT MINUS + + "₋" // U+208B: SUBSCRIPT MINUS + + "-" // U+FF0D: FULLWIDTH HYPHEN-MINUS + ,"-", // Folded result + + "⁅" // U+2045: LEFT SQUARE BRACKET WITH QUILL + + "❲" // U+2772: LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT + + "[" // U+FF3B: FULLWIDTH LEFT SQUARE BRACKET + ,"[", // Folded result + + "⁆" // U+2046: RIGHT SQUARE BRACKET WITH QUILL + + "❳" // U+2773: LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT + + "]" // U+FF3D: FULLWIDTH RIGHT SQUARE BRACKET + ,"]", // Folded result + + "⁽" // U+207D: SUPERSCRIPT LEFT PARENTHESIS + + "₍" // U+208D: SUBSCRIPT LEFT PARENTHESIS + + "❨" // U+2768: MEDIUM LEFT PARENTHESIS ORNAMENT + + "❪" // U+276A: MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT + + "(" // U+FF08: FULLWIDTH LEFT PARENTHESIS + ,"(", // Folded result + + "⸨" // U+2E28: LEFT DOUBLE PARENTHESIS + ,"((", // Folded result + + "⁾" // U+207E: SUPERSCRIPT RIGHT PARENTHESIS + + "₎" // U+208E: SUBSCRIPT RIGHT PARENTHESIS + + "❩" // U+2769: MEDIUM RIGHT PARENTHESIS ORNAMENT + + "❫" // U+276B: MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT + + ")" // U+FF09: FULLWIDTH RIGHT PARENTHESIS + ,")", // Folded result + + "⸩" // U+2E29: RIGHT DOUBLE PARENTHESIS + ,"))", // Folded result + + "❬" // U+276C: MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT + + "❰" // U+2770: HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT + + "<" // U+FF1C: FULLWIDTH LESS-THAN SIGN + ,"<", // Folded result + + "❭" // U+276D: MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT + + "❱" // U+2771: HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT + + ">" // U+FF1E: FULLWIDTH GREATER-THAN SIGN + ,">", // Folded result + + "❴" // U+2774: MEDIUM LEFT CURLY BRACKET ORNAMENT + + "{" // U+FF5B: FULLWIDTH LEFT CURLY BRACKET + ,"{", // Folded result + + "❵" // U+2775: MEDIUM RIGHT CURLY BRACKET ORNAMENT + + "}" // U+FF5D: FULLWIDTH RIGHT CURLY BRACKET + ,"}", // Folded result + + "⁺" // U+207A: SUPERSCRIPT PLUS SIGN + + "₊" // U+208A: SUBSCRIPT PLUS SIGN + + "+" // U+FF0B: FULLWIDTH PLUS SIGN + ,"+", // Folded result + + "⁼" // U+207C: SUPERSCRIPT EQUALS SIGN + + "₌" // U+208C: SUBSCRIPT EQUALS SIGN + + "=" // U+FF1D: FULLWIDTH EQUALS SIGN + ,"=", // Folded result + + "!" // U+FF01: FULLWIDTH EXCLAMATION MARK + ,"!", // Folded result + + "‼" // U+203C: DOUBLE EXCLAMATION MARK + ,"!!", // Folded result + + "⁉" // U+2049: EXCLAMATION QUESTION MARK + ,"!?", // Folded result + + "#" // U+FF03: FULLWIDTH NUMBER SIGN + ,"#", // Folded result + + "$" // U+FF04: FULLWIDTH DOLLAR SIGN + ,"$", // Folded result + + "⁒" // U+2052: COMMERCIAL MINUS SIGN + + "%" // U+FF05: FULLWIDTH PERCENT SIGN + ,"%", // Folded result + + "&" // U+FF06: FULLWIDTH AMPERSAND + ,"&", // Folded result + + "⁎" // U+204E: LOW ASTERISK + + "*" // U+FF0A: FULLWIDTH ASTERISK + ,"*", // Folded result + + "," // U+FF0C: FULLWIDTH COMMA + ,",", // Folded result + + "." // U+FF0E: FULLWIDTH FULL STOP + ,".", // Folded result + + "⁄" // U+2044: FRACTION SLASH + + "/" // U+FF0F: FULLWIDTH SOLIDUS + ,"/", // Folded result + + ":" // U+FF1A: FULLWIDTH COLON + ,":", // Folded result + + "⁏" // U+204F: REVERSED SEMICOLON + + ";" // U+FF1B: FULLWIDTH SEMICOLON + ,";", // Folded result + + "?" // U+FF1F: FULLWIDTH QUESTION MARK + ,"?", // Folded result + + "⁇" // U+2047: DOUBLE QUESTION MARK + ,"??", // Folded result + + "⁈" // U+2048: QUESTION EXCLAMATION MARK + ,"?!", // Folded result + + "@" // U+FF20: FULLWIDTH COMMERCIAL AT + ,"@", // Folded result + + "\" // U+FF3C: FULLWIDTH REVERSE SOLIDUS + ,"\\", // Folded result + + "‸" // U+2038: CARET + + "^" // U+FF3E: FULLWIDTH CIRCUMFLEX ACCENT + ,"^", // Folded result + + "_" // U+FF3F: FULLWIDTH LOW LINE + ,"_", // Folded result + + "⁓" // U+2053: SWUNG DASH + + "~" // U+FF5E: FULLWIDTH TILDE + ,"~", // Folded result + }; + + // Construct input text and expected output tokens + List expectedOutputTokens = new ArrayList(); + StringBuilder inputText = new StringBuilder(); + for (int n = 0 ; n < foldings.length ; n += 2) { + if (n > 0) { + inputText.append(' '); // Space between tokens + } + inputText.append(foldings[n]); + + // Construct the expected output token: the ASCII string to fold to, + // duplicated as many times as the number of characters in the input text. + StringBuilder expected = new StringBuilder(); + int numChars = foldings[n].length(); + for (int m = 0 ; m < numChars; ++m) { + expected.append(foldings[n + 1]); + } + expectedOutputTokens.add(expected.toString()); + } + + TokenStream stream = new MockTokenizer(new StringReader(inputText.toString()), MockTokenizer.WHITESPACE, false); + ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); + Iterator expectedIter = expectedOutputTokens.iterator(); + filter.reset(); + while (expectedIter.hasNext()) { + assertTermEquals(expectedIter.next(), filter, termAtt); + } + assertFalse(filter.incrementToken()); + } + + void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception { + assertTrue(stream.incrementToken()); + assertEquals(expected, termAtt.toString()); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java new file mode 100644 index 0000000..c97da67 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java @@ -0,0 +1,251 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.StringReader; +import java.io.Reader; + +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.index.Payload; + +public class TestAnalyzers extends BaseTokenStreamTestCase { + + public void testSimple() throws Exception { + Analyzer a = new SimpleAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(a, "foo bar FOO BAR", + new String[] { "foo", "bar", "foo", "bar" }); + assertAnalyzesTo(a, "foo bar . FOO <> BAR", + new String[] { "foo", "bar", "foo", "bar" }); + assertAnalyzesTo(a, "foo.bar.FOO.BAR", + new String[] { "foo", "bar", "foo", "bar" }); + assertAnalyzesTo(a, "U.S.A.", + new String[] { "u", "s", "a" }); + assertAnalyzesTo(a, "C++", + new String[] { "c" }); + assertAnalyzesTo(a, "B2B", + new String[] { "b", "b" }); + assertAnalyzesTo(a, "2B", + new String[] { "b" }); + assertAnalyzesTo(a, "\"QUOTED\" word", + new String[] { "quoted", "word" }); + } + + public void testNull() throws Exception { + Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(a, "foo bar FOO BAR", + new String[] { "foo", "bar", "FOO", "BAR" }); + assertAnalyzesTo(a, "foo bar . FOO <> BAR", + new String[] { "foo", "bar", ".", "FOO", "<>", "BAR" }); + assertAnalyzesTo(a, "foo.bar.FOO.BAR", + new String[] { "foo.bar.FOO.BAR" }); + assertAnalyzesTo(a, "U.S.A.", + new String[] { "U.S.A." }); + assertAnalyzesTo(a, "C++", + new String[] { "C++" }); + assertAnalyzesTo(a, "B2B", + new String[] { "B2B" }); + assertAnalyzesTo(a, "2B", + new String[] { "2B" }); + assertAnalyzesTo(a, "\"QUOTED\" word", + new String[] { "\"QUOTED\"", "word" }); + } + + public void testStop() throws Exception { + Analyzer a = new StopAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(a, "foo bar FOO BAR", + new String[] { "foo", "bar", "foo", "bar" }); + assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", + new String[] { "foo", "bar", "foo", "bar" }); + } + + void verifyPayload(TokenStream ts) throws IOException { + PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class); + for(byte b=1;;b++) { + boolean hasNext = ts.incrementToken(); + if (!hasNext) break; + // System.out.println("id="+System.identityHashCode(nextToken) + " " + t); + // System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]); + assertEquals(b, payloadAtt.getPayload().toByteArray()[0]); + } + } + + // Make sure old style next() calls result in a new copy of payloads + public void testPayloadCopy() throws IOException { + String s = "how now brown cow"; + TokenStream ts; + ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(s)); + ts = new PayloadSetter(ts); + verifyPayload(ts); + + ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(s)); + ts = new PayloadSetter(ts); + verifyPayload(ts); + } + + // LUCENE-1150: Just a compile time test, to ensure the + // StandardAnalyzer constants remain publicly accessible + @SuppressWarnings("unused") + public void _testStandardConstants() { + int x = StandardTokenizer.ALPHANUM; + x = StandardTokenizer.APOSTROPHE; + x = StandardTokenizer.ACRONYM; + x = StandardTokenizer.COMPANY; + x = StandardTokenizer.EMAIL; + x = StandardTokenizer.HOST; + x = StandardTokenizer.NUM; + x = StandardTokenizer.CJ; + String[] y = StandardTokenizer.TOKEN_TYPES; + } + + private static class LowerCaseWhitespaceAnalyzer extends Analyzer { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new LowerCaseFilter(TEST_VERSION_CURRENT, + new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + } + + } + + /** + * @deprecated remove this when lucene 3.0 "broken unicode 4" support + * is no longer needed. + */ + @Deprecated + private static class LowerCaseWhitespaceAnalyzerBWComp extends Analyzer { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new LowerCaseFilter(new WhitespaceTokenizer(reader)); + } + + } + + /** + * Test that LowercaseFilter handles entire unicode range correctly + */ + public void testLowerCaseFilter() throws IOException { + Analyzer a = new LowerCaseWhitespaceAnalyzer(); + // BMP + assertAnalyzesTo(a, "AbaCaDabA", new String[] { "abacadaba" }); + // supplementary + assertAnalyzesTo(a, "\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16", + new String[] {"\ud801\udc3e\ud801\udc3e\ud801\udc3e\ud801\udc3e"}); + assertAnalyzesTo(a, "AbaCa\ud801\udc16DabA", + new String[] { "abaca\ud801\udc3edaba" }); + // unpaired lead surrogate + assertAnalyzesTo(a, "AbaC\uD801AdaBa", + new String [] { "abac\uD801adaba" }); + // unpaired trail surrogate + assertAnalyzesTo(a, "AbaC\uDC16AdaBa", + new String [] { "abac\uDC16adaba" }); + } + + /** + * Test that LowercaseFilter handles the lowercasing correctly if the term + * buffer has a trailing surrogate character leftover and the current term in + * the buffer ends with a corresponding leading surrogate. + */ + public void testLowerCaseFilterLowSurrogateLeftover() throws IOException { + // test if the limit of the termbuffer is correctly used with supplementary + // chars + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, + new StringReader("BogustermBogusterm\udc16")); + LowerCaseFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, + tokenizer); + assertTokenStreamContents(filter, new String[] {"bogustermbogusterm\udc16"}); + filter.reset(); + String highSurEndingUpper = "BogustermBoguster\ud801"; + String highSurEndingLower = "bogustermboguster\ud801"; + tokenizer.reset(new StringReader(highSurEndingUpper)); + assertTokenStreamContents(filter, new String[] {highSurEndingLower}); + assertTrue(filter.hasAttribute(CharTermAttribute.class)); + char[] termBuffer = filter.getAttribute(CharTermAttribute.class).buffer(); + int length = highSurEndingLower.length(); + assertEquals('\ud801', termBuffer[length - 1]); + assertEquals('\udc3e', termBuffer[length]); + + } + + public void testLimitTokenCountAnalyzer() throws IOException { + Analyzer a = new LimitTokenCountAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2); + // dont use assertAnalyzesTo here, as the end offset is not the end of the string! + assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, 4); + assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3); + + a = new LimitTokenCountAnalyzer(new StandardAnalyzer(TEST_VERSION_CURRENT), 2); + // dont use assertAnalyzesTo here, as the end offset is not the end of the string! + assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3); + assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3); + } + + /** + * Test that LowercaseFilter only works on BMP for back compat, + * depending upon version + * @deprecated remove this test when lucene 3.0 "broken unicode 4" support + * is no longer needed. + */ + @Deprecated + public void testLowerCaseFilterBWComp() throws IOException { + Analyzer a = new LowerCaseWhitespaceAnalyzerBWComp(); + // BMP + assertAnalyzesTo(a, "AbaCaDabA", new String[] { "abacadaba" }); + // supplementary, no-op + assertAnalyzesTo(a, "\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16", + new String[] {"\ud801\udc16\ud801\udc16\ud801\udc16\ud801\udc16"}); + assertAnalyzesTo(a, "AbaCa\ud801\udc16DabA", + new String[] { "abaca\ud801\udc16daba" }); + // unpaired lead surrogate + assertAnalyzesTo(a, "AbaC\uD801AdaBa", + new String [] { "abac\uD801adaba" }); + // unpaired trail surrogate + assertAnalyzesTo(a, "AbaC\uDC16AdaBa", + new String [] { "abac\uDC16adaba" }); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } +} + +final class PayloadSetter extends TokenFilter { + PayloadAttribute payloadAtt; + public PayloadSetter(TokenStream input) { + super(input); + payloadAtt = addAttribute(PayloadAttribute.class); + } + + byte[] data = new byte[1]; + Payload p = new Payload(data,0,1); + + @Override + public boolean incrementToken() throws IOException { + boolean hasNext = input.incrementToken(); + if (!hasNext) return false; + payloadAtt.setPayload(p); // reuse the payload / byte[] + data[0]++; + return true; + } +} \ No newline at end of file diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java new file mode 100644 index 0000000..3a122cd --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java @@ -0,0 +1,110 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; + +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermPositions; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; + +public class TestCachingTokenFilter extends BaseTokenStreamTestCase { + private String[] tokens = new String[] {"term1", "term2", "term3", "term2"}; + + public void testCaching() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + + Document doc = new Document(); + TokenStream stream = new TokenStream() { + private int index = 0; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + + @Override + public boolean incrementToken() throws IOException { + if (index == tokens.length) { + return false; + } else { + clearAttributes(); + termAtt.append(tokens[index++]); + offsetAtt.setOffset(0,0); + return true; + } + } + + }; + + stream = new CachingTokenFilter(stream); + + doc.add(new Field("preanalyzed", stream, TermVector.NO)); + + // 1) we consume all tokens twice before we add the doc to the index + checkTokens(stream); + stream.reset(); + checkTokens(stream); + + // 2) now add the document to the index and verify if all tokens are indexed + // don't reset the stream here, the DocumentWriter should do that implicitly + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1")); + assertTrue(termPositions.next()); + assertEquals(1, termPositions.freq()); + assertEquals(0, termPositions.nextPosition()); + + termPositions.seek(new Term("preanalyzed", "term2")); + assertTrue(termPositions.next()); + assertEquals(2, termPositions.freq()); + assertEquals(1, termPositions.nextPosition()); + assertEquals(3, termPositions.nextPosition()); + + termPositions.seek(new Term("preanalyzed", "term3")); + assertTrue(termPositions.next()); + assertEquals(1, termPositions.freq()); + assertEquals(2, termPositions.nextPosition()); + reader.close(); + writer.close(); + // 3) reset stream and consume tokens again + stream.reset(); + checkTokens(stream); + dir.close(); + } + + private void checkTokens(TokenStream stream) throws IOException { + int count = 0; + + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + while (stream.incrementToken()) { + assertTrue(count < tokens.length); + assertEquals(tokens[count], termAtt.toString()); + count++; + } + + assertEquals(tokens.length, count); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharArrayMap.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharArrayMap.java new file mode 100644 index 0000000..c18830d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharArrayMap.java @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Locale; +import java.util.Map; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestCharArrayMap extends LuceneTestCase { + + public void doRandom(int iter, boolean ignoreCase) { + CharArrayMap map = new CharArrayMap(TEST_VERSION_CURRENT, 1, ignoreCase); + HashMap hmap = new HashMap(); + + char[] key; + for (int i=0; i cm = new CharArrayMap(TEST_VERSION_CURRENT, 2, false); + HashMap hm = new HashMap(); + hm.put("foo",1); + hm.put("bar",2); + cm.putAll(hm); + assertEquals(hm.size(), cm.size()); + hm.put("baz", 3); + cm.putAll(hm); + assertEquals(hm.size(), cm.size()); + + CharArraySet cs = cm.keySet(); + int n=0; + for (Object o : cs) { + assertTrue(cm.containsKey(o)); + char[] co = (char[]) o; + assertTrue(cm.containsKey(co, 0, co.length)); + n++; + } + assertEquals(hm.size(), n); + assertEquals(hm.size(), cs.size()); + assertEquals(cm.size(), cs.size()); + cs.clear(); + assertEquals(0, cs.size()); + assertEquals(0, cm.size()); + try { + cs.add("test"); + fail("keySet() allows adding new keys"); + } catch (UnsupportedOperationException ue) { + // pass + } + cm.putAll(hm); + assertEquals(hm.size(), cs.size()); + assertEquals(cm.size(), cs.size()); + + Iterator> iter1 = cm.entrySet().iterator(); + n=0; + while (iter1.hasNext()) { + Map.Entry entry = iter1.next(); + Object key = entry.getKey(); + Integer val = entry.getValue(); + assertEquals(cm.get(key), val); + entry.setValue(val*100); + assertEquals(val*100, (int)cm.get(key)); + n++; + } + assertEquals(hm.size(), n); + cm.clear(); + cm.putAll(hm); + assertEquals(cm.size(), n); + + CharArrayMap.EntryIterator iter2 = cm.entrySet().iterator(); + n=0; + while (iter2.hasNext()) { + char[] keyc = iter2.nextKey(); + Integer val = iter2.currentValue(); + assertEquals(hm.get(new String(keyc)), val); + iter2.setValue(val*100); + assertEquals(val*100, (int)cm.get(keyc)); + n++; + } + assertEquals(hm.size(), n); + + cm.entrySet().clear(); + assertEquals(0, cm.size()); + assertEquals(0, cm.entrySet().size()); + assertTrue(cm.isEmpty()); + } + + public void testModifyOnUnmodifiable(){ + CharArrayMap map = new CharArrayMap(TEST_VERSION_CURRENT, 2, false); + map.put("foo",1); + map.put("bar",2); + final int size = map.size(); + assertEquals(2, size); + assertTrue(map.containsKey("foo")); + assertEquals(1, map.get("foo").intValue()); + assertTrue(map.containsKey("bar")); + assertEquals(2, map.get("bar").intValue()); + + map = CharArrayMap.unmodifiableMap(map); + assertEquals("Map size changed due to unmodifiableMap call" , size, map.size()); + String NOT_IN_MAP = "SirGallahad"; + assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String already exists in map", map.get(NOT_IN_MAP)); + + try{ + map.put(NOT_IN_MAP.toCharArray(), 3); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.put(NOT_IN_MAP, 3); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.put(new StringBuilder(NOT_IN_MAP), 3); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.clear(); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.entrySet().clear(); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.keySet().clear(); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.put((Object) NOT_IN_MAP, 3); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + try{ + map.putAll(Collections.singletonMap(NOT_IN_MAP, 3)); + fail("Modified unmodifiable map"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP)); + assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP)); + assertEquals("Size of unmodifiable map has changed", size, map.size()); + } + + assertTrue(map.containsKey("foo")); + assertEquals(1, map.get("foo").intValue()); + assertTrue(map.containsKey("bar")); + assertEquals(2, map.get("bar").intValue()); + } + + public void testToString() { + CharArrayMap cm = new CharArrayMap(TEST_VERSION_CURRENT, Collections.singletonMap("test",1), false); + assertEquals("[test]",cm.keySet().toString()); + assertEquals("[1]",cm.values().toString()); + assertEquals("[test=1]",cm.entrySet().toString()); + assertEquals("{test=1}",cm.toString()); + cm.put("test2", 2); + assertTrue(cm.keySet().toString().contains(", ")); + assertTrue(cm.values().toString().contains(", ")); + assertTrue(cm.entrySet().toString().contains(", ")); + assertTrue(cm.toString().contains(", ")); + } +} + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharArraySet.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharArraySet.java new file mode 100755 index 0000000..cbeaf74 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharArraySet.java @@ -0,0 +1,541 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.Iterator; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Version; + + +public class TestCharArraySet extends LuceneTestCase { + + static final String[] TEST_STOP_WORDS = { + "a", "an", "and", "are", "as", "at", "be", "but", "by", + "for", "if", "in", "into", "is", "it", + "no", "not", "of", "on", "or", "such", + "that", "the", "their", "then", "there", "these", + "they", "this", "to", "was", "will", "with" + }; + + + public void testRehash() throws Exception { + CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true); + for(int i=0;i would not hit any element of the CAS and therefor never call + // remove() on the iterator + try{ + set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true)); + fail("Modified unmodifiable set"); + }catch (UnsupportedOperationException e) { + // expected + assertEquals("Size of unmodifiable set has changed", size, set.size()); + } + + try{ + set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true)); + fail("Modified unmodifiable set"); + }catch (UnsupportedOperationException e) { + // expected + assertEquals("Size of unmodifiable set has changed", size, set.size()); + } + + try{ + set.addAll(Arrays.asList(new String[]{NOT_IN_SET})); + fail("Modified unmodifiable set"); + }catch (UnsupportedOperationException e) { + // expected + assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET)); + } + + for (int i = 0; i < TEST_STOP_WORDS.length; i++) { + assertTrue(set.contains(TEST_STOP_WORDS[i])); + } + } + + public void testUnmodifiableSet(){ + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true); + set.addAll(Arrays.asList(TEST_STOP_WORDS)); + set.add(Integer.valueOf(1)); + final int size = set.size(); + set = CharArraySet.unmodifiableSet(set); + assertEquals("Set size changed due to unmodifiableSet call" , size, set.size()); + for (String stopword : TEST_STOP_WORDS) { + assertTrue(set.contains(stopword)); + } + assertTrue(set.contains(Integer.valueOf(1))); + assertTrue(set.contains("1")); + assertTrue(set.contains(new char[]{'1'})); + + try{ + CharArraySet.unmodifiableSet(null); + fail("can not make null unmodifiable"); + }catch (NullPointerException e) { + // expected + } + } + + public void testSupplementaryChars() { + String missing = "Term %s is missing in the set"; + String falsePos = "Term %s is in the set but shouldn't"; + // for reference see + // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on + String[] upperArr = new String[] {"Abc\ud801\udc1c", + "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"}; + String[] lowerArr = new String[] {"abc\ud801\udc44", + "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"}; + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true); + for (String upper : upperArr) { + set.add(upper); + } + for (int i = 0; i < upperArr.length; i++) { + assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); + assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i])); + } + set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); + for (String upper : upperArr) { + set.add(upper); + } + for (int i = 0; i < upperArr.length; i++) { + assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); + assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); + } + } + + public void testSingleHighSurrogate() { + String missing = "Term %s is missing in the set"; + String falsePos = "Term %s is in the set but shouldn't"; + String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG", + "\uD800EfG", "\uD800\ud801\udc1cB" }; + + String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg", + "\uD800efg", "\uD800\ud801\udc44b" }; + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays + .asList(TEST_STOP_WORDS), true); + for (String upper : upperArr) { + set.add(upper); + } + for (int i = 0; i < upperArr.length; i++) { + assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); + assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i])); + } + set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), + false); + for (String upper : upperArr) { + set.add(upper); + } + for (int i = 0; i < upperArr.length; i++) { + assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); + assertFalse(String.format(falsePos, upperArr[i]), set + .contains(lowerArr[i])); + } + } + + /** + * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is + * no longer needed. + */ + @Deprecated + public void testSupplementaryCharsBWCompat() { + String missing = "Term %s is missing in the set"; + String falsePos = "Term %s is in the set but shouldn't"; + // for reference see + // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on + String[] upperArr = new String[] {"Abc\ud801\udc1c", + "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"}; + String[] lowerArr = new String[] {"abc\ud801\udc44", + "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"}; + CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), true); + for (String upper : upperArr) { + set.add(upper); + } + for (int i = 0; i < upperArr.length; i++) { + assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); + assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); + } + set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), false); + for (String upper : upperArr) { + set.add(upper); + } + for (int i = 0; i < upperArr.length; i++) { + assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); + assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); + } + } + + /** + * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is + * no longer needed. + */ + @Deprecated + public void testSingleHighSurrogateBWComapt() { + String missing = "Term %s is missing in the set"; + String falsePos = "Term %s is in the set but shouldn't"; + String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG", + "\uD800EfG", "\uD800\ud801\udc1cB" }; + + String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg", + "\uD800efg", "\uD800\ud801\udc44b" }; + CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays + .asList(TEST_STOP_WORDS), true); + for (String upper : upperArr) { + set.add(upper); + } + for (int i = 0; i < upperArr.length; i++) { + assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); + if (i == lowerArr.length - 1) + assertFalse(String.format(falsePos, lowerArr[i]), set + .contains(lowerArr[i])); + else + assertTrue(String.format(missing, lowerArr[i]), set + .contains(lowerArr[i])); + } + set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), + false); + for (String upper : upperArr) { + set.add(upper); + } + for (int i = 0; i < upperArr.length; i++) { + assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); + assertFalse(String.format(falsePos, lowerArr[i]), set + .contains(lowerArr[i])); + } + } + + @SuppressWarnings("deprecated") + public void testCopyCharArraySetBWCompat() { + CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); + CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); + + List stopwords = Arrays.asList(TEST_STOP_WORDS); + List stopwordsUpper = new ArrayList(); + for (String string : stopwords) { + stopwordsUpper.add(string.toUpperCase()); + } + setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS)); + setIngoreCase.add(Integer.valueOf(1)); + setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS)); + setCaseSensitive.add(Integer.valueOf(1)); + + // This should use the deprecated methods, because it checks a bw compatibility. + CharArraySet copy = CharArraySet.copy(setIngoreCase); + CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive); + + assertEquals(setIngoreCase.size(), copy.size()); + assertEquals(setCaseSensitive.size(), copy.size()); + + assertTrue(copy.containsAll(stopwords)); + assertTrue(copy.containsAll(stopwordsUpper)); + assertTrue(copyCaseSens.containsAll(stopwords)); + for (String string : stopwordsUpper) { + assertFalse(copyCaseSens.contains(string)); + } + // test adding terms to the copy + List newWords = new ArrayList(); + for (String string : stopwords) { + newWords.add(string+"_1"); + } + copy.addAll(newWords); + + assertTrue(copy.containsAll(stopwords)); + assertTrue(copy.containsAll(stopwordsUpper)); + assertTrue(copy.containsAll(newWords)); + // new added terms are not in the source set + for (String string : newWords) { + assertFalse(setIngoreCase.contains(string)); + assertFalse(setCaseSensitive.contains(string)); + + } + } + + /** + * Test the static #copy() function with a CharArraySet as a source + */ + public void testCopyCharArraySet() { + CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true); + CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false); + + List stopwords = Arrays.asList(TEST_STOP_WORDS); + List stopwordsUpper = new ArrayList(); + for (String string : stopwords) { + stopwordsUpper.add(string.toUpperCase()); + } + setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS)); + setIngoreCase.add(Integer.valueOf(1)); + setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS)); + setCaseSensitive.add(Integer.valueOf(1)); + + CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase); + CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive); + + assertEquals(setIngoreCase.size(), copy.size()); + assertEquals(setCaseSensitive.size(), copy.size()); + + assertTrue(copy.containsAll(stopwords)); + assertTrue(copy.containsAll(stopwordsUpper)); + assertTrue(copyCaseSens.containsAll(stopwords)); + for (String string : stopwordsUpper) { + assertFalse(copyCaseSens.contains(string)); + } + // test adding terms to the copy + List newWords = new ArrayList(); + for (String string : stopwords) { + newWords.add(string+"_1"); + } + copy.addAll(newWords); + + assertTrue(copy.containsAll(stopwords)); + assertTrue(copy.containsAll(stopwordsUpper)); + assertTrue(copy.containsAll(newWords)); + // new added terms are not in the source set + for (String string : newWords) { + assertFalse(setIngoreCase.contains(string)); + assertFalse(setCaseSensitive.contains(string)); + + } + } + + /** + * Test the static #copy() function with a JDK {@link Set} as a source + */ + public void testCopyJDKSet() { + Set set = new HashSet(); + + List stopwords = Arrays.asList(TEST_STOP_WORDS); + List stopwordsUpper = new ArrayList(); + for (String string : stopwords) { + stopwordsUpper.add(string.toUpperCase()); + } + set.addAll(Arrays.asList(TEST_STOP_WORDS)); + + CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set); + + assertEquals(set.size(), copy.size()); + assertEquals(set.size(), copy.size()); + + assertTrue(copy.containsAll(stopwords)); + for (String string : stopwordsUpper) { + assertFalse(copy.contains(string)); + } + + List newWords = new ArrayList(); + for (String string : stopwords) { + newWords.add(string+"_1"); + } + copy.addAll(newWords); + + assertTrue(copy.containsAll(stopwords)); + assertTrue(copy.containsAll(newWords)); + // new added terms are not in the source set + for (String string : newWords) { + assertFalse(set.contains(string)); + } + } + + /** + * Tests a special case of {@link CharArraySet#copy(Version, Set)} where the + * set to copy is the {@link CharArraySet#EMPTY_SET} + */ + public void testCopyEmptySet() { + assertSame(CharArraySet.EMPTY_SET, + CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET)); + } + + /** + * Smoketests the static empty set + */ + public void testEmptySet() { + assertEquals(0, CharArraySet.EMPTY_SET.size()); + + assertTrue(CharArraySet.EMPTY_SET.isEmpty()); + for (String stopword : TEST_STOP_WORDS) { + assertFalse(CharArraySet.EMPTY_SET.contains(stopword)); + } + assertFalse(CharArraySet.EMPTY_SET.contains("foo")); + assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo")); + assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray())); + assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(),0,3)); + } + + /** + * Test for NPE + */ + public void testContainsWithNull() { + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + try { + set.contains((char[]) null, 0, 10); + fail("null value must raise NPE"); + } catch (NullPointerException e) {} + try { + set.contains((CharSequence) null); + fail("null value must raise NPE"); + } catch (NullPointerException e) {} + try { + set.contains((Object) null); + fail("null value must raise NPE"); + } catch (NullPointerException e) {} + } + + @Deprecated @SuppressWarnings("unchecked") + public void testIterator() { + HashSet hset = new HashSet(); + hset.addAll(Arrays.asList(TEST_STOP_WORDS)); + + assertTrue("in 3.0 version, iterator should be CharArraySetIterator", + ((Iterator) CharArraySet.copy(Version.LUCENE_30, hset).iterator()) instanceof CharArraySet.CharArraySetIterator); + + CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, hset); + assertFalse("in current version, iterator should not be CharArraySetIterator", + ((Iterator) set.iterator()) instanceof CharArraySet.CharArraySetIterator); + + Iterator it = set.stringIterator(); + assertTrue(it instanceof CharArraySet.CharArraySetIterator); + while (it.hasNext()) { + // as the set returns String instances, this must work: + assertTrue(hset.contains(it.next())); + try { + it.remove(); + fail("remove() should not work on CharArraySetIterator"); + } catch (UnsupportedOperationException uoe) { + // pass + } + } + } + + public void testToString() { + CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test")); + assertEquals("[test]", set.toString()); + set.add("test2"); + assertTrue(set.toString().contains(", ")); + + set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test")); + assertEquals("[test]", set.toString()); + set.add("test2"); + assertTrue(set.toString().contains(", ")); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharFilter.java new file mode 100644 index 0000000..1d2394f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharFilter.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis; + +import java.io.StringReader; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestCharFilter extends LuceneTestCase { + + public void testCharFilter1() throws Exception { + CharStream cs = new CharFilter1( CharReader.get( new StringReader("") ) ); + assertEquals( "corrected offset is invalid", 1, cs.correctOffset( 0 ) ); + } + + public void testCharFilter2() throws Exception { + CharStream cs = new CharFilter2( CharReader.get( new StringReader("") ) ); + assertEquals( "corrected offset is invalid", 2, cs.correctOffset( 0 ) ); + } + + public void testCharFilter12() throws Exception { + CharStream cs = new CharFilter2( new CharFilter1( CharReader.get( new StringReader("") ) ) ); + assertEquals( "corrected offset is invalid", 3, cs.correctOffset( 0 ) ); + } + + public void testCharFilter11() throws Exception { + CharStream cs = new CharFilter1( new CharFilter1( CharReader.get( new StringReader("") ) ) ); + assertEquals( "corrected offset is invalid", 2, cs.correctOffset( 0 ) ); + } + + static class CharFilter1 extends CharFilter { + + protected CharFilter1(CharStream in) { + super(in); + } + + @Override + protected int correct(int currentOff) { + return currentOff + 1; + } + } + + static class CharFilter2 extends CharFilter { + + protected CharFilter2(CharStream in) { + super(in); + } + + @Override + protected int correct(int currentOff) { + return currentOff + 2; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharTokenizers.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharTokenizers.java new file mode 100644 index 0000000..ff6f961 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestCharTokenizers.java @@ -0,0 +1,222 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.util.Version; + +/** + * Testcase for {@link CharTokenizer} subclasses + */ +public class TestCharTokenizers extends BaseTokenStreamTestCase { + + /* + * test to read surrogate pairs without loosing the pairing + * if the surrogate pair is at the border of the internal IO buffer + */ + public void testReadSupplementaryChars() throws IOException { + StringBuilder builder = new StringBuilder(); + // create random input + int num = 1024 + random.nextInt(1024); + num *= RANDOM_MULTIPLIER; + for (int i = 1; i < num; i++) { + builder.append("\ud801\udc1cabc"); + if((i % 10) == 0) + builder.append(" "); + } + // internal buffer size is 1024 make sure we have a surrogate pair right at the border + builder.insert(1023, "\ud801\udc1c"); + LowerCaseTokenizer tokenizer = new LowerCaseTokenizer( + TEST_VERSION_CURRENT, new StringReader(builder.toString())); + assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" ")); + } + + /* + * test to extend the buffer TermAttribute buffer internally. If the internal + * alg that extends the size of the char array only extends by 1 char and the + * next char to be filled in is a supplementary codepoint (using 2 chars) an + * index out of bound exception is triggered. + */ + public void testExtendCharBuffer() throws IOException { + for (int i = 0; i < 40; i++) { + StringBuilder builder = new StringBuilder(); + for (int j = 0; j < 1+i; j++) { + builder.append("a"); + } + builder.append("\ud801\udc1cabc"); + LowerCaseTokenizer tokenizer = new LowerCaseTokenizer( + TEST_VERSION_CURRENT, new StringReader(builder.toString())); + assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()}); + } + } + + /* + * tests the max word length of 255 - tokenizer will split at the 255 char no matter what happens + */ + public void testMaxWordLength() throws IOException { + StringBuilder builder = new StringBuilder(); + + for (int i = 0; i < 255; i++) { + builder.append("A"); + } + LowerCaseTokenizer tokenizer = new LowerCaseTokenizer( + TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString())); + assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()}); + } + + /* + * tests the max word length of 255 with a surrogate pair at position 255 + */ + public void testMaxWordLengthWithSupplementary() throws IOException { + StringBuilder builder = new StringBuilder(); + + for (int i = 0; i < 254; i++) { + builder.append("A"); + } + builder.append("\ud801\udc1c"); + LowerCaseTokenizer tokenizer = new LowerCaseTokenizer( + TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString())); + assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()}); + } + + public void testLowerCaseTokenizer() throws IOException { + StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); + LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, + reader); + assertTokenStreamContents(tokenizer, new String[] { "tokenizer", + "\ud801\udc44test" }); + } + + public void testLowerCaseTokenizerBWCompat() throws IOException { + StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); + LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30, + reader); + assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test" }); + } + + public void testWhitespaceTokenizer() throws IOException { + StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, + reader); + assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", + "\ud801\udc1ctest" }); + } + + public void testWhitespaceTokenizerBWCompat() throws IOException { + StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); + WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_30, + reader); + assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", + "\ud801\udc1ctest" }); + } + + public void testIsTokenCharCharInSubclass() { + new TestingCharTokenizer(Version.LUCENE_30, new StringReader("")); + try { + new TestingCharTokenizer(TEST_VERSION_CURRENT, new StringReader("")); + fail("version 3.1 is not permitted if char based method is implemented"); + } catch (IllegalArgumentException e) { + // expected + } + } + + public void testNormalizeCharInSubclass() { + new TestingCharTokenizerNormalize(Version.LUCENE_30, new StringReader("")); + try { + new TestingCharTokenizerNormalize(TEST_VERSION_CURRENT, + new StringReader("")); + fail("version 3.1 is not permitted if char based method is implemented"); + } catch (IllegalArgumentException e) { + // expected + } + } + + public void testNormalizeAndIsTokenCharCharInSubclass() { + new TestingCharTokenizerNormalizeIsTokenChar(Version.LUCENE_30, + new StringReader("")); + try { + new TestingCharTokenizerNormalizeIsTokenChar(TEST_VERSION_CURRENT, + new StringReader("")); + fail("version 3.1 is not permitted if char based method is implemented"); + } catch (IllegalArgumentException e) { + // expected + } + } + + static final class TestingCharTokenizer extends CharTokenizer { + public TestingCharTokenizer(Version matchVersion, Reader input) { + super(matchVersion, input); + } + + @Override + protected boolean isTokenChar(int c) { + return Character.isLetter(c); + } + + @Deprecated @Override + protected boolean isTokenChar(char c) { + return Character.isLetter(c); + } + } + + static final class TestingCharTokenizerNormalize extends CharTokenizer { + public TestingCharTokenizerNormalize(Version matchVersion, Reader input) { + super(matchVersion, input); + } + + @Deprecated @Override + protected char normalize(char c) { + return c; + } + + @Override + protected int normalize(int c) { + return c; + } + } + + static final class TestingCharTokenizerNormalizeIsTokenChar extends CharTokenizer { + public TestingCharTokenizerNormalizeIsTokenChar(Version matchVersion, + Reader input) { + super(matchVersion, input); + } + + @Deprecated @Override + protected char normalize(char c) { + return c; + } + + @Override + protected int normalize(int c) { + return c; + } + + @Override + protected boolean isTokenChar(int c) { + return Character.isLetter(c); + } + + @Deprecated @Override + protected boolean isTokenChar(char c) { + return Character.isLetter(c); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java new file mode 100644 index 0000000..1987e46 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestClassicAnalyzer.java @@ -0,0 +1,309 @@ +package org.apache.lucene.analysis; + +import org.apache.lucene.analysis.standard.ClassicAnalyzer; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermPositions; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; + +import java.io.IOException; +import java.util.Arrays; + + +/** + * Copyright 2004 The Apache Software Foundation + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestClassicAnalyzer extends BaseTokenStreamTestCase { + + private Analyzer a = new ClassicAnalyzer(TEST_VERSION_CURRENT); + + public void testMaxTermLength() throws Exception { + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + sa.setMaxTokenLength(5); + assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}); + } + + public void testMaxTermLength2() throws Exception { + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"}); + sa.setMaxTokenLength(5); + + assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1}); + } + + public void testMaxTermLength3() throws Exception { + char[] chars = new char[255]; + for(int i=0;i<255;i++) + chars[i] = 'a'; + String longTerm = new String(chars, 0, 255); + + assertAnalyzesTo(a, "ab cd " + longTerm + " xy z", new String[]{"ab", "cd", longTerm, "xy", "z"}); + assertAnalyzesTo(a, "ab cd " + longTerm + "a xy z", new String[]{"ab", "cd", "xy", "z"}); + } + + public void testAlphanumeric() throws Exception { + // alphanumeric tokens + assertAnalyzesTo(a, "B2B", new String[]{"b2b"}); + assertAnalyzesTo(a, "2B", new String[]{"2b"}); + } + + public void testUnderscores() throws Exception { + // underscores are delimiters, but not in email addresses (below) + assertAnalyzesTo(a, "word_having_underscore", new String[]{"word", "having", "underscore"}); + assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new String[]{"word", "underscore", "stopwords"}); + } + + public void testDelimiters() throws Exception { + // other delimiters: "-", "/", "," + assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"}); + assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"}); + assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"}); + } + + public void testApostrophes() throws Exception { + // internal apostrophes: O'Reilly, you're, O'Reilly's + // possessives are actually removed by StardardFilter, not the tokenizer + assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"}); + assertAnalyzesTo(a, "you're", new String[]{"you're"}); + assertAnalyzesTo(a, "she's", new String[]{"she"}); + assertAnalyzesTo(a, "Jim's", new String[]{"jim"}); + assertAnalyzesTo(a, "don't", new String[]{"don't"}); + assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly"}); + } + + public void testTSADash() throws Exception { + // t and s had been stopwords in Lucene <= 2.0, which made it impossible + // to correctly search for these terms: + assertAnalyzesTo(a, "s-class", new String[]{"s", "class"}); + assertAnalyzesTo(a, "t-com", new String[]{"t", "com"}); + // 'a' is still a stopword: + assertAnalyzesTo(a, "a-class", new String[]{"class"}); + } + + public void testCompanyNames() throws Exception { + // company names + assertAnalyzesTo(a, "AT&T", new String[]{"at&t"}); + assertAnalyzesTo(a, "Excite@Home", new String[]{"excite@home"}); + } + + public void testLucene1140() throws Exception { + try { + ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); + } catch (NullPointerException e) { + fail("Should not throw an NPE and it did"); + } + + } + + public void testDomainNames() throws Exception { + // Current lucene should not show the bug + ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT); + + // domain names + assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"}); + //Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068. + // the following should be recognized as HOST: + assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); + + // 2.3 should show the bug + a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23); + assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "" }); + + // 2.4 should not show the bug + a2 = new ClassicAnalyzer(Version.LUCENE_24); + assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "" }); + } + + public void testEMailAddresses() throws Exception { + // email addresses, possibly with underscores, periods, etc + assertAnalyzesTo(a, "test@example.com", new String[]{"test@example.com"}); + assertAnalyzesTo(a, "first.lastname@example.com", new String[]{"first.lastname@example.com"}); + assertAnalyzesTo(a, "first_lastname@example.com", new String[]{"first_lastname@example.com"}); + } + + public void testNumeric() throws Exception { + // floating point, serial, model numbers, ip addresses, etc. + // every other segment must have at least one digit + assertAnalyzesTo(a, "21.35", new String[]{"21.35"}); + assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"}); + assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"}); + assertAnalyzesTo(a, "1-2-3", new String[]{"1-2-3"}); + assertAnalyzesTo(a, "a1-b2-c3", new String[]{"a1-b2-c3"}); + assertAnalyzesTo(a, "a1-b-c3", new String[]{"a1-b-c3"}); + } + + public void testTextWithNumbers() throws Exception { + // numbers + assertAnalyzesTo(a, "David has 5000 bones", new String[]{"david", "has", "5000", "bones"}); + } + + public void testVariousText() throws Exception { + // various + assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted"}); + assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"}); + assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"}); + assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"}); + } + + public void testAcronyms() throws Exception { + // acronyms have their dots stripped + assertAnalyzesTo(a, "U.S.A.", new String[]{"usa"}); + } + + public void testCPlusPlusHash() throws Exception { + // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens. + assertAnalyzesTo(a, "C++", new String[]{"c"}); + assertAnalyzesTo(a, "C#", new String[]{"c"}); + } + + public void testKorean() throws Exception { + // Korean words + assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"}); + } + + // Compliance with the "old" JavaCC-based analyzer, see: + // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752 + + public void testComplianceFileName() throws Exception { + assertAnalyzesTo(a, "2004.jpg", + new String[]{"2004.jpg"}, + new String[]{""}); + } + + public void testComplianceNumericIncorrect() throws Exception { + assertAnalyzesTo(a, "62.46", + new String[]{"62.46"}, + new String[]{""}); + } + + public void testComplianceNumericLong() throws Exception { + assertAnalyzesTo(a, "978-0-94045043-1", + new String[]{"978-0-94045043-1"}, + new String[]{""}); + } + + public void testComplianceNumericFile() throws Exception { + assertAnalyzesTo( + a, + "78academyawards/rules/rule02.html", + new String[]{"78academyawards/rules/rule02.html"}, + new String[]{""}); + } + + public void testComplianceNumericWithUnderscores() throws Exception { + assertAnalyzesTo( + a, + "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs", + new String[]{"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"}, + new String[]{""}); + } + + public void testComplianceNumericWithDash() throws Exception { + assertAnalyzesTo(a, "mid-20th", new String[]{"mid-20th"}, + new String[]{""}); + } + + public void testComplianceManyTokens() throws Exception { + assertAnalyzesTo( + a, + "/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm " + + "safari-0-sheikh-zayed-grand-mosque.jpg", + new String[]{"money.cnn.com", "magazines", "fortune", + "fortune", "archive/2007/03/19/8402357", "index.htm", + "safari-0-sheikh", "zayed", "grand", "mosque.jpg"}, + new String[]{"", "", "", + "", "", "", "", "", + "", ""}); + } + + public void testJava14BWCompatibility() throws Exception { + ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30); + assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" }); + } + + /** + * Make sure we skip wicked long terms. + */ + public void testWickedLongTerm() throws IOException { + RAMDirectory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))); + + char[] chars = new char[IndexWriter.MAX_TERM_LENGTH]; + Arrays.fill(chars, 'x'); + Document doc = new Document(); + final String bigTerm = new String(chars); + + // This produces a too-long term: + String contents = "abc xyz x" + bigTerm + " another term"; + doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + + // Make sure we can add another normal document + doc = new Document(); + doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + + // Make sure all terms < max size were indexed + assertEquals(2, reader.docFreq(new Term("content", "abc"))); + assertEquals(1, reader.docFreq(new Term("content", "bbb"))); + assertEquals(1, reader.docFreq(new Term("content", "term"))); + assertEquals(1, reader.docFreq(new Term("content", "another"))); + + // Make sure position is still incremented when + // massive term is skipped: + TermPositions tps = reader.termPositions(new Term("content", "another")); + assertTrue(tps.next()); + assertEquals(1, tps.freq()); + assertEquals(3, tps.nextPosition()); + + // Make sure the doc that has the massive term is in + // the index: + assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); + + reader.close(); + + // Make sure we can add a document with exactly the + // maximum length term, and search on that term: + doc = new Document(); + doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED)); + ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT); + sa.setMaxTokenLength(100000); + writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); + writer.addDocument(doc); + writer.close(); + reader = IndexReader.open(dir, true); + assertEquals(1, reader.docFreq(new Term("content", bigTerm))); + reader.close(); + + dir.close(); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java new file mode 100644 index 0000000..a7d2b95 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java @@ -0,0 +1,110 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import java.io.StringReader; + +public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase { + public void testU() throws Exception { + TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä Ã¥ æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl")); + ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); + assertTermEquals("Des", filter, termAtt); + assertTermEquals("mot", filter, termAtt); + assertTermEquals("cles", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("LA", filter, termAtt); + assertTermEquals("CHAINE", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("A", filter, termAtt); + assertTermEquals("AE", filter, termAtt); + assertTermEquals("C", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("E", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("I", filter, termAtt); + assertTermEquals("IJ", filter, termAtt); + assertTermEquals("D", filter, termAtt); + assertTermEquals("N", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("O", filter, termAtt); + assertTermEquals("OE", filter, termAtt); + assertTermEquals("TH", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("U", filter, termAtt); + assertTermEquals("Y", filter, termAtt); + assertTermEquals("Y", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("a", filter, termAtt); + assertTermEquals("ae", filter, termAtt); + assertTermEquals("c", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("e", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("i", filter, termAtt); + assertTermEquals("ij", filter, termAtt); + assertTermEquals("d", filter, termAtt); + assertTermEquals("n", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("o", filter, termAtt); + assertTermEquals("oe", filter, termAtt); + assertTermEquals("ss", filter, termAtt); + assertTermEquals("th", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("u", filter, termAtt); + assertTermEquals("y", filter, termAtt); + assertTermEquals("y", filter, termAtt); + assertTermEquals("fi", filter, termAtt); + assertTermEquals("fl", filter, termAtt); + assertFalse(filter.incrementToken()); + } + + void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception { + assertTrue(stream.incrementToken()); + assertEquals(expected, termAtt.toString()); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java new file mode 100644 index 0000000..6fe328a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestKeywordAnalyzer.java @@ -0,0 +1,103 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.StringReader; + +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.store.RAMDirectory; + +public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { + + private RAMDirectory directory; + private IndexSearcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = new RAMDirectory(); + IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( + TEST_VERSION_CURRENT, new SimpleAnalyzer( + TEST_VERSION_CURRENT))); + + Document doc = new Document(); + doc.add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + writer.close(); + + searcher = new IndexSearcher(directory, true); + } + + public void testPerFieldAnalyzer() throws Exception { + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(TEST_VERSION_CURRENT)); + analyzer.addAnalyzer("partnum", new KeywordAnalyzer()); + + QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, "description", analyzer); + Query query = queryParser.parse("partnum:Q36 AND SPACE"); + + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("Q36 kept as-is", + "+partnum:Q36 +space", query.toString("description")); + assertEquals("doc found!", 1, hits.length); + } + + public void testMutipleDocument() throws Exception { + RAMDirectory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer())); + Document doc = new Document(); + doc.add(new Field("partnum", "Q36", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new Field("partnum", "Q37", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + TermDocs td = reader.termDocs(new Term("partnum", "Q36")); + assertTrue(td.next()); + td = reader.termDocs(new Term("partnum", "Q37")); + assertTrue(td.next()); + } + + // LUCENE-1441 + public void testOffsets() throws Exception { + TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd")); + OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); + assertTrue(stream.incrementToken()); + assertEquals(0, offsetAtt.startOffset()); + assertEquals(4, offsetAtt.endOffset()); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java new file mode 100644 index 0000000..825cf8f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestKeywordMarkerFilter.java @@ -0,0 +1,90 @@ +package org.apache.lucene.analysis; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.junit.Test; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Testcase for {@link KeywordMarkerFilter} + */ +public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { + + @Test + public void testIncrementToken() throws IOException { + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true); + set.add("lucenefox"); + String[] output = new String[] { "the", "quick", "brown", "LuceneFox", + "jumps" }; + assertTokenStreamContents(new LowerCaseFilterMock( + new KeywordMarkerFilter(new MockTokenizer(new StringReader( + "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output); + Set jdkSet = new HashSet(); + jdkSet.add("LuceneFox"); + assertTokenStreamContents(new LowerCaseFilterMock( + new KeywordMarkerFilter(new MockTokenizer(new StringReader( + "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), jdkSet)), output); + Set set2 = set; + assertTokenStreamContents(new LowerCaseFilterMock( + new KeywordMarkerFilter(new MockTokenizer(new StringReader( + "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output); + } + + // LUCENE-2901 + public void testComposition() throws Exception { + TokenStream ts = new LowerCaseFilterMock( + new KeywordMarkerFilter( + new KeywordMarkerFilter( + new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false), + new HashSet(Arrays.asList(new String[] { "Birds", "Houses" }))), + new HashSet(Arrays.asList(new String[] { "Dogs", "Trees" })))); + + assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); + } + + public static final class LowerCaseFilterMock extends TokenFilter { + + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + + public LowerCaseFilterMock(TokenStream in) { + super(in); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttr.isKeyword()) { + final String term = termAtt.toString().toLowerCase(Locale.ENGLISH); + termAtt.setEmpty().append(term); + } + return true; + } + return false; + } + + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestLengthFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestLengthFilter.java new file mode 100644 index 0000000..142819f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestLengthFilter.java @@ -0,0 +1,44 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.StringReader; + +public class TestLengthFilter extends BaseTokenStreamTestCase { + + public void testFilterNoPosIncr() throws Exception { + TokenStream stream = new MockTokenizer( + new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false); + LengthFilter filter = new LengthFilter(false, stream, 2, 6); + assertTokenStreamContents(filter, + new String[]{"short", "ab", "foo"}, + new int[]{1, 1, 1} + ); + } + + public void testFilterWithPosIncr() throws Exception { + TokenStream stream = new MockTokenizer( + new StringReader("short toolong evenmuchlongertext a ab toolong foo"), MockTokenizer.WHITESPACE, false); + LengthFilter filter = new LengthFilter(true, stream, 2, 6); + assertTokenStreamContents(filter, + new String[]{"short", "ab", "foo"}, + new int[]{1, 4, 2} + ); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java new file mode 100644 index 0000000..d5f10ef --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis; + +import java.io.StringReader; + +public class TestMappingCharFilter extends BaseTokenStreamTestCase { + + NormalizeCharMap normMap; + + @Override + public void setUp() throws Exception { + super.setUp(); + normMap = new NormalizeCharMap(); + + normMap.add( "aa", "a" ); + normMap.add( "bbb", "b" ); + normMap.add( "cccc", "cc" ); + + normMap.add( "h", "i" ); + normMap.add( "j", "jj" ); + normMap.add( "k", "kkk" ); + normMap.add( "ll", "llll" ); + + normMap.add( "empty", "" ); + } + + public void testReaderReset() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) ); + char[] buf = new char[10]; + int len = cs.read(buf, 0, 10); + assertEquals( 1, len ); + assertEquals( 'x', buf[0]) ; + len = cs.read(buf, 0, 10); + assertEquals( -1, len ); + + // rewind + cs.reset(); + len = cs.read(buf, 0, 10); + assertEquals( 1, len ); + assertEquals( 'x', buf[0]) ; + } + + public void testNothingChange() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "x" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"x"}, new int[]{0}, new int[]{1}); + } + + public void test1to1() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "h" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"i"}, new int[]{0}, new int[]{1}); + } + + public void test1to2() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "j" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"jj"}, new int[]{0}, new int[]{1}); + } + + public void test1to3() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "k" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"kkk"}, new int[]{0}, new int[]{1}); + } + + public void test2to4() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "ll" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"llll"}, new int[]{0}, new int[]{2}); + } + + public void test2to1() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "aa" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"a"}, new int[]{0}, new int[]{2}); + } + + public void test3to1() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "bbb" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"b"}, new int[]{0}, new int[]{3}); + } + + public void test4to2() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "cccc" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[]{"cc"}, new int[]{0}, new int[]{4}); + } + + public void test5to0() throws Exception { + CharStream cs = new MappingCharFilter( normMap, new StringReader( "empty" ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, new String[0]); + } + + // + // 1111111111222 + // 01234567890123456789012 + //(in) h i j k ll cccc bbb aa + // + // 1111111111222 + // 01234567890123456789012 + //(out) i i jj kkk llll cc b a + // + // h, 0, 1 => i, 0, 1 + // i, 2, 3 => i, 2, 3 + // j, 4, 5 => jj, 4, 5 + // k, 6, 7 => kkk, 6, 7 + // ll, 8,10 => llll, 8,10 + // cccc,11,15 => cc,11,15 + // bbb,16,19 => b,16,19 + // aa,20,22 => a,20,22 + // + public void testTokenStream() throws Exception { + CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h i j k ll cccc bbb aa" ) ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, + new String[]{"i","i","jj","kkk","llll","cc","b","a"}, + new int[]{0,2,4,6,8,11,16,20}, + new int[]{1,3,5,7,10,15,19,22} + ); + } + + // + // + // 0123456789 + //(in) aaaa ll h + //(out-1) aa llll i + //(out-2) a llllllll i + // + // aaaa,0,4 => a,0,4 + // ll,5,7 => llllllll,5,7 + // h,8,9 => i,8,9 + public void testChained() throws Exception { + CharStream cs = new MappingCharFilter( normMap, + new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) ) ) ); + TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); + assertTokenStreamContents(ts, + new String[]{"a","llllllll","i"}, + new int[]{0,5,8}, + new int[]{4,7,9} + ); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java new file mode 100644 index 0000000..4715993 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java @@ -0,0 +1,90 @@ +package org.apache.lucene.analysis; + +import java.io.StringReader; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestMockAnalyzer extends BaseTokenStreamTestCase { + + /** Test a configuration that behaves a lot like WhitespaceAnalyzer */ + public void testWhitespace() throws Exception { + Analyzer a = new MockAnalyzer(random); + assertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ", + new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" }); + assertAnalyzesToReuse(a, "aba cadaba shazam", + new String[] { "aba", "cadaba", "shazam" }); + assertAnalyzesToReuse(a, "break on whitespace", + new String[] { "break", "on", "whitespace" }); + } + + /** Test a configuration that behaves a lot like SimpleAnalyzer */ + public void testSimple() throws Exception { + Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); + assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ", + new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" }); + assertAnalyzesToReuse(a, "aba4cadaba-Shazam", + new String[] { "aba", "cadaba", "shazam" }); + assertAnalyzesToReuse(a, "break+on/Letters", + new String[] { "break", "on", "letters" }); + } + + /** Test a configuration that behaves a lot like KeywordAnalyzer */ + public void testKeyword() throws Exception { + Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false); + assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ", + new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " }); + assertAnalyzesToReuse(a, "aba4cadaba-Shazam", + new String[] { "aba4cadaba-Shazam" }); + assertAnalyzesToReuse(a, "break+on/Nothing", + new String[] { "break+on/Nothing" }); + } + + /** Test a configuration that behaves a lot like StopAnalyzer */ + public void testStop() throws Exception { + Analyzer a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET, true); + assertAnalyzesTo(a, "the quick brown a fox", + new String[] { "quick", "brown", "fox" }, + new int[] { 2, 1, 2 }); + + // disable positions + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET, false); + assertAnalyzesTo(a, "the quick brown a fox", + new String[] { "quick", "brown", "fox" }, + new int[] { 1, 1, 1 }); + } + + public void testLUCENE_3042() throws Exception { + String testString = "t"; + + Analyzer analyzer = new MockAnalyzer(random); + TokenStream stream = analyzer.reusableTokenStream("dummy", new StringReader(testString)); + stream.reset(); + while (stream.incrementToken()) { + // consume + } + stream.end(); + stream.close(); + + assertAnalyzesToReuse(analyzer, testString, new String[] { "t" }); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new MockAnalyzer(random), atLeast(1000)); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java new file mode 100644 index 0000000..6571170 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java @@ -0,0 +1,73 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; + +public class TestNumericTokenStream extends BaseTokenStreamTestCase { + + static final long lvalue = 4573245871874382L; + static final int ivalue = 123456; + + public void testLongStream() throws Exception { + final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue); + // use getAttribute to test if attributes really exist, if not an IAE will be throwed + final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); + for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { + assertTrue("New token is available", stream.incrementToken()); + assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.toString()); + assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); + } + assertFalse("No more tokens available", stream.incrementToken()); + } + + public void testIntStream() throws Exception { + final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue); + // use getAttribute to test if attributes really exist, if not an IAE will be throwed + final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); + for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { + assertTrue("New token is available", stream.incrementToken()); + assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.toString()); + assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); + } + assertFalse("No more tokens available", stream.incrementToken()); + } + + public void testNotInitialized() throws Exception { + final NumericTokenStream stream=new NumericTokenStream(); + + try { + stream.reset(); + fail("reset() should not succeed."); + } catch (IllegalStateException e) { + // pass + } + + try { + stream.incrementToken(); + fail("incrementToken() should not succeed."); + } catch (IllegalStateException e) { + // pass + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java new file mode 100644 index 0000000..790e6e8 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java @@ -0,0 +1,48 @@ +package org.apache.lucene.analysis; + +import java.io.StringReader; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase { + public void testPerField() throws Exception { + String text = "Qwerty"; + PerFieldAnalyzerWrapper analyzer = + new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + analyzer.addAnalyzer("special", new SimpleAnalyzer(TEST_VERSION_CURRENT)); + + TokenStream tokenStream = analyzer.tokenStream("field", + new StringReader(text)); + CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); + + assertTrue(tokenStream.incrementToken()); + assertEquals("WhitespaceAnalyzer does not lowercase", + "Qwerty", + termAtt.toString()); + + tokenStream = analyzer.tokenStream("special", + new StringReader(text)); + termAtt = tokenStream.getAttribute(CharTermAttribute.class); + assertTrue(tokenStream.incrementToken()); + assertEquals("SimpleAnalyzer lowercases", + "qwerty", + termAtt.toString()); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java new file mode 100644 index 0000000..b0347c8 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestPorterStemFilter.java @@ -0,0 +1,65 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; + +import static org.apache.lucene.analysis.VocabularyAssert.*; + +/** + * Test the PorterStemFilter with Martin Porter's test data. + */ +public class TestPorterStemFilter extends BaseTokenStreamTestCase { + Analyzer a = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer t = new MockTokenizer(reader, MockTokenizer.KEYWORD, false); + return new TokenStreamComponents(t, new PorterStemFilter(t)); + } + }; + + /** + * Run the stemmer against all strings in voc.txt + * The output should be the same as the string in output.txt + */ + public void testPorterStemFilter() throws Exception { + assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt"); + } + + public void testWithKeywordAttribute() throws IOException { + CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); + set.add("yourselves"); + Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false); + TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set)); + assertTokenStreamContents(filter, new String[] {"yourselves", "your"}); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java new file mode 100644 index 0000000..8699cde --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java @@ -0,0 +1,224 @@ +package org.apache.lucene.analysis; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.standard.StandardTokenizer; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.Arrays; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestStandardAnalyzer extends BaseTokenStreamTestCase { + + public void testHugeDoc() throws IOException { + StringBuilder sb = new StringBuilder(); + char whitespace[] = new char[4094]; + Arrays.fill(whitespace, ' '); + sb.append(whitespace); + sb.append("testing 1234"); + String input = sb.toString(); + StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); + BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); + } + + private Analyzer a = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents + (String fieldName, Reader reader) { + + Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer); + } + }; + + public void testArmenian() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 Õ´Õ«Õ¬Õ«Õ¸Õ¶ հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել Õ¥Õ¶ կամավորների կողմից ու համարյա բոլոր հոդվածները կարող Õ§ խմբագրել ցանկաց մարդ Õ¸Õ¾ կարող Õ§ բացել Վիքիպեդիայի կայքը։", + new String[] { "Վիքիպեդիայի", "13", "Õ´Õ«Õ¬Õ«Õ¸Õ¶", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "Õ¥Õ¶", "կամավորների", "կողմից", + "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "Õ§", "խմբագրել", "ցանկաց", "մարդ", "Õ¸Õ¾", "կարող", "Õ§", "բացել", "Վիքիպեդիայի", "կայքը" } ); + } + + public void testAmharic() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም", + new String[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" } ); + } + + public void testArabic() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.", + new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", + "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" } ); + } + + public void testAramaic() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀", + new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ", + "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"}); + } + + public void testBengali() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু à§§à§« জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।", + new String[] { "এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার", + "শুরু", "à§§à§«", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়েছে" }); + } + + public void testFarsi() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ Û²Ûµ دی Û±Û³Û·Û¹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.", + new String[] { "ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "Û²Ûµ", "دی", "Û±Û³Û·Û¹", "به", "صورت", "مکملی", + "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد" }); + } + + public void testGreek() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.", + new String[] { "Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που", + "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα" }); + } + + public void testThai() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔", + new String[] { "การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔" }); + } + + public void testLao() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", + new String[] { "ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ" }); + } + + public void testTibetan() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །", + new String[] { "སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", + "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", + "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ" }); + } + + /* + * For chinese, tokenize as char (these can later form bigrams or whatever) + */ + public void testChinese() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 1234 Tests ", + new String[] { "我", "是", "中", "国", "人", "1234", "Tests"}); + } + + public void testEmpty() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new String[] {}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new String[] {}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new String[] {}); + } + + /* test various jira issues this analyzer is related to */ + + public void testLUCENE1545() throws Exception { + /* + * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E. + * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost. + * Expected result is only on token "moͤchte". + */ + BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" }); + } + + /* Tests from StandardAnalyzer, just to show behavior is similar */ + public void testAlphanumericSA() throws Exception { + // alphanumeric tokens + BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new String[]{"B2B"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new String[]{"2B"}); + } + + public void testDelimitersSA() throws Exception { + // other delimiters: "-", "/", "," + BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"}); + } + + public void testApostrophesSA() throws Exception { + // internal apostrophes: O'Reilly, you're, O'Reilly's + BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"}); + } + + public void testNumericSA() throws Exception { + // floating point, serial, model numbers, ip addresses, etc. + BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"}); + } + + public void testTextWithNumbersSA() throws Exception { + // numbers + BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"}); + } + + public void testVariousTextSA() throws Exception { + // various + BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"}); + } + + public void testKoreanSA() throws Exception { + // Korean words + BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"}); + } + + public void testOffsets() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", + new String[] {"David", "has", "5000", "bones"}, + new int[] {0, 6, 10, 15}, + new int[] {5, 9, 14, 20}); + } + + public void testTypes() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", + new String[] {"David", "has", "5000", "bones"}, + new String[] { "", "", "", "" }); + } + + public void testUnicodeWordBreaks() throws Exception { + WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0(); + wordBreakTest.test(a); + } + + public void testSupplementary() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱鍟䇹愯瀛", + new String[] {"𩬅", "艱", "鍟", "䇹", "愯", "瀛"}, + new String[] { "", "", "", "", "", "" }); + } + + public void testKorean() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음", + new String[] { "훈민정음" }, + new String[] { "" }); + } + + public void testJapanese() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ", + new String[] { "ä»®", "名", "遣", "い", "カタカナ" }, + new String[] { "", "", "", "", "" }); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java new file mode 100644 index 0000000..e3e0b76 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java @@ -0,0 +1,97 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.Version; + +import java.io.StringReader; +import java.io.IOException; +import java.util.Iterator; +import java.util.Set; +import java.util.HashSet; + +public class TestStopAnalyzer extends BaseTokenStreamTestCase { + + private StopAnalyzer stop = new StopAnalyzer(TEST_VERSION_CURRENT); + private Set inValidTokens = new HashSet(); + + @Override + public void setUp() throws Exception { + super.setUp(); + + Iterator it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.iterator(); + while(it.hasNext()) { + inValidTokens.add(it.next()); + } + } + + public void testDefaults() throws IOException { + assertTrue(stop != null); + StringReader reader = new StringReader("This is a test of the english stop analyzer"); + TokenStream stream = stop.tokenStream("test", reader); + assertTrue(stream != null); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + + while (stream.incrementToken()) { + assertFalse(inValidTokens.contains(termAtt.toString())); + } + } + + public void testStopList() throws IOException { + Set stopWordsSet = new HashSet(); + stopWordsSet.add("good"); + stopWordsSet.add("test"); + stopWordsSet.add("analyzer"); + StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet); + StringReader reader = new StringReader("This is a good test of the english stop analyzer"); + TokenStream stream = newStop.tokenStream("test", reader); + assertNotNull(stream); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); + + while (stream.incrementToken()) { + String text = termAtt.toString(); + assertFalse(stopWordsSet.contains(text)); + assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments. + } + } + + public void testStopListPositions() throws IOException { + Set stopWordsSet = new HashSet(); + stopWordsSet.add("good"); + stopWordsSet.add("test"); + stopWordsSet.add("analyzer"); + StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); + StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions"); + int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1}; + TokenStream stream = newStop.tokenStream("test", reader); + assertNotNull(stream); + int i = 0; + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); + + while (stream.incrementToken()) { + String text = termAtt.toString(); + assertFalse(stopWordsSet.contains(text)); + assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement()); + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopFilter.java new file mode 100644 index 0000000..2b5865e --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestStopFilter.java @@ -0,0 +1,131 @@ +package org.apache.lucene.analysis; + +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.English; +import org.apache.lucene.util.Version; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Set; +import java.util.HashSet; + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; + +public class TestStopFilter extends BaseTokenStreamTestCase { + + // other StopFilter functionality is already tested by TestStopAnalyzer + + public void testExactCase() throws IOException { + StringReader reader = new StringReader("Now is The Time"); + Set stopWords = new HashSet(Arrays.asList("is", "the", "Time")); + TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, false); + assertTokenStreamContents(stream, new String[] { "Now", "The" }); + } + + public void testIgnoreCase() throws IOException { + StringReader reader = new StringReader("Now is The Time"); + Set stopWords = new HashSet(Arrays.asList( "is", "the", "Time" )); + TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopWords, true); + assertTokenStreamContents(stream, new String[] { "Now" }); + } + + public void testStopFilt() throws IOException { + StringReader reader = new StringReader("Now is The Time"); + String[] stopWords = new String[] { "is", "the", "Time" }; + Set stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); + TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); + assertTokenStreamContents(stream, new String[] { "Now", "The" }); + } + + /** + * Test Position increments applied by StopFilter with and without enabling this option. + */ + public void testStopPositons() throws IOException { + StringBuilder sb = new StringBuilder(); + ArrayList a = new ArrayList(); + for (int i=0; i<20; i++) { + String w = English.intToEnglish(i).trim(); + sb.append(w).append(" "); + if (i%3 != 0) a.add(w); + } + log(sb.toString()); + String stopWords[] = a.toArray(new String[0]); + for (int i=0; i stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); + // with increments + StringReader reader = new StringReader(sb.toString()); + StopFilter stpf = new StopFilter(Version.LUCENE_24, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); + doTestStopPositons(stpf,true); + // without increments + reader = new StringReader(sb.toString()); + stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet); + doTestStopPositons(stpf,false); + // with increments, concatenating two stop filters + ArrayList a0 = new ArrayList(); + ArrayList a1 = new ArrayList(); + for (int i=0; i stopSet0 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords0); + Set stopSet1 = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords1); + reader = new StringReader(sb.toString()); + StopFilter stpf0 = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), stopSet0); // first part of the set + stpf0.setEnablePositionIncrements(true); + StopFilter stpf01 = new StopFilter(TEST_VERSION_CURRENT, stpf0, stopSet1); // two stop filters concatenated! + doTestStopPositons(stpf01,true); + } + + private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException { + log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled")); + stpf.setEnablePositionIncrements(enableIcrements); + CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class); + stpf.reset(); + for (int i=0; i<20; i+=3) { + assertTrue(stpf.incrementToken()); + log("Token "+i+": "+stpf); + String w = English.intToEnglish(i).trim(); + assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString()); + assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement()); + } + assertFalse(stpf.incrementToken()); + stpf.end(); + stpf.close(); + } + + // print debug info depending on VERBOSE + private static void log(String s) { + if (VERBOSE) { + System.out.println(s); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java new file mode 100644 index 0000000..14b766e --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java @@ -0,0 +1,227 @@ +package org.apache.lucene.analysis; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.English; +import java.io.IOException; +import java.io.StringReader; + + +/** + * tests for the TestTeeSinkTokenFilter + */ +public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { + protected StringBuilder buffer1; + protected StringBuilder buffer2; + protected String[] tokens1; + protected String[] tokens2; + + @Override + public void setUp() throws Exception { + super.setUp(); + tokens1 = new String[]{"The", "quick", "Burgundy", "Fox", "jumped", "over", "the", "lazy", "Red", "Dogs"}; + tokens2 = new String[]{"The", "Lazy", "Dogs", "should", "stay", "on", "the", "porch"}; + buffer1 = new StringBuilder(); + + for (int i = 0; i < tokens1.length; i++) { + buffer1.append(tokens1[i]).append(' '); + } + buffer2 = new StringBuilder(); + for (int i = 0; i < tokens2.length; i++) { + buffer2.append(tokens2[i]).append(' '); + } + } + + static final TeeSinkTokenFilter.SinkFilter theFilter = new TeeSinkTokenFilter.SinkFilter() { + @Override + public boolean accept(AttributeSource a) { + CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class); + return termAtt.toString().equalsIgnoreCase("The"); + } + }; + + static final TeeSinkTokenFilter.SinkFilter dogFilter = new TeeSinkTokenFilter.SinkFilter() { + @Override + public boolean accept(AttributeSource a) { + CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class); + return termAtt.toString().equalsIgnoreCase("Dogs"); + } + }; + + public void testGeneral() throws IOException { + final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false)); + final TokenStream sink1 = source.newSinkTokenStream(); + final TokenStream sink2 = source.newSinkTokenStream(theFilter); + + source.addAttribute(CheckClearAttributesAttribute.class); + sink1.addAttribute(CheckClearAttributesAttribute.class); + sink2.addAttribute(CheckClearAttributesAttribute.class); + + assertTokenStreamContents(source, tokens1); + assertTokenStreamContents(sink1, tokens1); + assertTokenStreamContents(sink2, new String[]{"The", "the"}); + } + + public void testMultipleSources() throws Exception { + final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false)); + final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter); + final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter); + tee1.reset(); + final TokenStream source1 = new CachingTokenFilter(tee1); + + tee1.addAttribute(CheckClearAttributesAttribute.class); + dogDetector.addAttribute(CheckClearAttributesAttribute.class); + theDetector.addAttribute(CheckClearAttributesAttribute.class); + + final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false)); + tee2.addSinkTokenStream(dogDetector); + tee2.addSinkTokenStream(theDetector); + final TokenStream source2 = tee2; + + assertTokenStreamContents(source1, tokens1); + assertTokenStreamContents(source2, tokens2); + + assertTokenStreamContents(theDetector, new String[]{"The", "the", "The", "the"}); + assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"}); + + source1.reset(); + TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1); + String[] lowerCaseTokens = new String[tokens1.length]; + for (int i = 0; i < tokens1.length; i++) + lowerCaseTokens[i] = tokens1[i].toLowerCase(); + assertTokenStreamContents(lowerCasing, lowerCaseTokens); + } + + /** + * Not an explicit test, just useful to print out some info on performance + * + * @throws Exception + */ + public void performance() throws Exception { + int[] tokCount = {100, 500, 1000, 2000, 5000, 10000}; + int[] modCounts = {1, 2, 5, 10, 20, 50, 100, 200, 500}; + for (int k = 0; k < tokCount.length; k++) { + StringBuilder buffer = new StringBuilder(); + System.out.println("-----Tokens: " + tokCount[k] + "-----"); + for (int i = 0; i < tokCount[k]; i++) { + buffer.append(English.intToEnglish(i).toUpperCase()).append(' '); + } + //make sure we produce the same tokens + TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString())))); + TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100)); + teeStream.consumeAllTokens(); + TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))), 100); + CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class); + CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class); + for (int i=0; stream.incrementToken(); i++) { + assertTrue(sink.incrementToken()); + assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true); + } + + //simulate two fields, each being analyzed once, for 20 documents + for (int j = 0; j < modCounts.length; j++) { + int tfPos = 0; + long start = System.currentTimeMillis(); + for (int i = 0; i < 20; i++) { + stream = new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + while (stream.incrementToken()) { + tfPos += posIncrAtt.getPositionIncrement(); + } + stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))), modCounts[j]); + posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + while (stream.incrementToken()) { + tfPos += posIncrAtt.getPositionIncrement(); + } + } + long finish = System.currentTimeMillis(); + System.out.println("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms"); + int sinkPos = 0; + //simulate one field with one sink + start = System.currentTimeMillis(); + for (int i = 0; i < 20; i++) { + teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString())))); + sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j])); + PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class); + while (teeStream.incrementToken()) { + sinkPos += posIncrAtt.getPositionIncrement(); + } + //System.out.println("Modulo--------"); + posIncrAtt = sink.getAttribute(PositionIncrementAttribute.class); + while (sink.incrementToken()) { + sinkPos += posIncrAtt.getPositionIncrement(); + } + } + finish = System.currentTimeMillis(); + System.out.println("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms"); + assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos); + + } + System.out.println("- End Tokens: " + tokCount[k] + "-----"); + } + + } + + + class ModuloTokenFilter extends TokenFilter { + + int modCount; + + ModuloTokenFilter(TokenStream input, int mc) { + super(input); + modCount = mc; + } + + int count = 0; + + //return every 100 tokens + @Override + public boolean incrementToken() throws IOException { + boolean hasNext; + for (hasNext = input.incrementToken(); + hasNext && count % modCount != 0; + hasNext = input.incrementToken()) { + count++; + } + count++; + return hasNext; + } + } + + class ModuloSinkFilter extends TeeSinkTokenFilter.SinkFilter { + int count = 0; + int modCount; + + ModuloSinkFilter(int mc) { + modCount = mc; + } + + @Override + public boolean accept(AttributeSource a) { + boolean b = (a != null && count % modCount == 0); + count++; + return b; + } + + } +} + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java new file mode 100644 index 0000000..caaaa29 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestToken.java @@ -0,0 +1,273 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Payload; +import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util._TestUtil; + +import java.io.StringReader; +import java.util.HashMap; + +public class TestToken extends LuceneTestCase { + + public void testCtor() throws Exception { + Token t = new Token(); + char[] content = "hello".toCharArray(); + t.setTermBuffer(content, 0, content.length); + assertNotSame(t.termBuffer(), content); + assertEquals(0, t.startOffset()); + assertEquals(0, t.endOffset()); + assertEquals("hello", t.term()); + assertEquals("word", t.type()); + assertEquals(0, t.getFlags()); + + t = new Token(6, 22); + t.setTermBuffer(content, 0, content.length); + assertEquals("hello", t.term()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); + assertEquals("word", t.type()); + assertEquals(0, t.getFlags()); + + t = new Token(6, 22, 7); + t.setTermBuffer(content, 0, content.length); + assertEquals("hello", t.term()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); + assertEquals("word", t.type()); + assertEquals(7, t.getFlags()); + + t = new Token(6, 22, "junk"); + t.setTermBuffer(content, 0, content.length); + assertEquals("hello", t.term()); + assertEquals("hello", t.toString()); + assertEquals(6, t.startOffset()); + assertEquals(22, t.endOffset()); + assertEquals("junk", t.type()); + assertEquals(0, t.getFlags()); + } + + public void testResize() { + Token t = new Token(); + char[] content = "hello".toCharArray(); + t.setTermBuffer(content, 0, content.length); + for (int i = 0; i < 2000; i++) + { + t.resizeTermBuffer(i); + assertTrue(i <= t.termBuffer().length); + assertEquals("hello", t.term()); + } + } + + public void testGrow() { + Token t = new Token(); + StringBuilder buf = new StringBuilder("ab"); + for (int i = 0; i < 20; i++) + { + char[] content = buf.toString().toCharArray(); + t.setTermBuffer(content, 0, content.length); + assertEquals(buf.length(), t.termLength()); + assertEquals(buf.toString(), t.term()); + buf.append(buf.toString()); + } + assertEquals(1048576, t.termLength()); + + // now as a string, first variant + t = new Token(); + buf = new StringBuilder("ab"); + for (int i = 0; i < 20; i++) + { + String content = buf.toString(); + t.setTermBuffer(content, 0, content.length()); + assertEquals(content.length(), t.termLength()); + assertEquals(content, t.term()); + buf.append(content); + } + assertEquals(1048576, t.termLength()); + + // now as a string, second variant + t = new Token(); + buf = new StringBuilder("ab"); + for (int i = 0; i < 20; i++) + { + String content = buf.toString(); + t.setTermBuffer(content); + assertEquals(content.length(), t.termLength()); + assertEquals(content, t.term()); + buf.append(content); + } + assertEquals(1048576, t.termLength()); + + // Test for slow growth to a long term + t = new Token(); + buf = new StringBuilder("a"); + for (int i = 0; i < 20000; i++) + { + String content = buf.toString(); + t.setTermBuffer(content); + assertEquals(content.length(), t.termLength()); + assertEquals(content, t.term()); + buf.append("a"); + } + assertEquals(20000, t.termLength()); + + // Test for slow growth to a long term + t = new Token(); + buf = new StringBuilder("a"); + for (int i = 0; i < 20000; i++) + { + String content = buf.toString(); + t.setTermBuffer(content); + assertEquals(content.length(), t.termLength()); + assertEquals(content, t.term()); + buf.append("a"); + } + assertEquals(20000, t.termLength()); + } + + public void testToString() throws Exception { + char[] b = {'a', 'l', 'o', 'h', 'a'}; + Token t = new Token("", 0, 5); + t.setTermBuffer(b, 0, 5); + assertEquals("aloha", t.toString()); + + t.setTermBuffer("hi there"); + assertEquals("hi there", t.toString()); + } + + public void testTermBufferEquals() throws Exception { + Token t1a = new Token(); + char[] content1a = "hello".toCharArray(); + t1a.setTermBuffer(content1a, 0, 5); + Token t1b = new Token(); + char[] content1b = "hello".toCharArray(); + t1b.setTermBuffer(content1b, 0, 5); + Token t2 = new Token(); + char[] content2 = "hello2".toCharArray(); + t2.setTermBuffer(content2, 0, 6); + assertTrue(t1a.equals(t1b)); + assertFalse(t1a.equals(t2)); + assertFalse(t2.equals(t1b)); + } + + public void testMixedStringArray() throws Exception { + Token t = new Token("hello", 0, 5); + assertEquals(t.termLength(), 5); + assertEquals(t.term(), "hello"); + t.setTermBuffer("hello2"); + assertEquals(t.termLength(), 6); + assertEquals(t.term(), "hello2"); + t.setTermBuffer("hello3".toCharArray(), 0, 6); + assertEquals(t.term(), "hello3"); + + char[] buffer = t.termBuffer(); + buffer[1] = 'o'; + assertEquals(t.term(), "hollo3"); + } + + public void testClone() throws Exception { + Token t = new Token(0, 5); + char[] content = "hello".toCharArray(); + t.setTermBuffer(content, 0, 5); + char[] buf = t.termBuffer(); + Token copy = (Token) TestSimpleAttributeImpls.assertCloneIsEqual(t); + assertEquals(t.term(), copy.term()); + assertNotSame(buf, copy.termBuffer()); + + Payload pl = new Payload(new byte[]{1,2,3,4}); + t.setPayload(pl); + copy = (Token) TestSimpleAttributeImpls.assertCloneIsEqual(t); + assertEquals(pl, copy.getPayload()); + assertNotSame(pl, copy.getPayload()); + } + + public void testCopyTo() throws Exception { + Token t = new Token(); + Token copy = (Token) TestSimpleAttributeImpls.assertCopyIsEqual(t); + assertEquals("", t.term()); + assertEquals("", copy.term()); + + t = new Token(0, 5); + char[] content = "hello".toCharArray(); + t.setTermBuffer(content, 0, 5); + char[] buf = t.termBuffer(); + copy = (Token) TestSimpleAttributeImpls.assertCopyIsEqual(t); + assertEquals(t.term(), copy.term()); + assertNotSame(buf, copy.termBuffer()); + + Payload pl = new Payload(new byte[]{1,2,3,4}); + t.setPayload(pl); + copy = (Token) TestSimpleAttributeImpls.assertCopyIsEqual(t); + assertEquals(pl, copy.getPayload()); + assertNotSame(pl, copy.getPayload()); + } + + public interface SenselessAttribute extends Attribute {} + + public static final class SenselessAttributeImpl extends AttributeImpl implements SenselessAttribute { + @Override + public void copyTo(AttributeImpl target) {} + @Override + public void clear() {} + @Override + public boolean equals(Object o) { return (o instanceof SenselessAttributeImpl); } + @Override + public int hashCode() { return 0; } + } + + public void testTokenAttributeFactory() throws Exception { + TokenStream ts = new WhitespaceTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar")); + + assertTrue("SenselessAttribute is not implemented by SenselessAttributeImpl", + ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl); + + assertTrue("CharTermAttribute is not implemented by Token", + ts.addAttribute(CharTermAttribute.class) instanceof Token); + assertTrue("OffsetAttribute is not implemented by Token", + ts.addAttribute(OffsetAttribute.class) instanceof Token); + assertTrue("FlagsAttribute is not implemented by Token", + ts.addAttribute(FlagsAttribute.class) instanceof Token); + assertTrue("PayloadAttribute is not implemented by Token", + ts.addAttribute(PayloadAttribute.class) instanceof Token); + assertTrue("PositionIncrementAttribute is not implemented by Token", + ts.addAttribute(PositionIncrementAttribute.class) instanceof Token); + assertTrue("TypeAttribute is not implemented by Token", + ts.addAttribute(TypeAttribute.class) instanceof Token); + } + + public void testAttributeReflection() throws Exception { + Token t = new Token("foobar", 6, 22, 8); + _TestUtil.assertAttributeReflection(t, + new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "foobar"); + put(OffsetAttribute.class.getName() + "#startOffset", 6); + put(OffsetAttribute.class.getName() + "#endOffset", 22); + put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1); + put(PayloadAttribute.class.getName() + "#payload", null); + put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE); + put(FlagsAttribute.class.getName() + "#flags", 8); + }}); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java new file mode 100644 index 0000000..913770d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java @@ -0,0 +1,420 @@ +package org.apache.lucene.analysis; + +import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase { + + public void testHugeDoc() throws IOException { + StringBuilder sb = new StringBuilder(); + char whitespace[] = new char[4094]; + Arrays.fill(whitespace, ' '); + sb.append(whitespace); + sb.append("testing 1234"); + String input = sb.toString(); + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(new StringReader(input)); + BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" }); + } + + private Analyzer a = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents + (String fieldName, Reader reader) { + + Tokenizer tokenizer = new UAX29URLEmailTokenizer(reader); + return new TokenStreamComponents(tokenizer); + } + }; + + + /** Passes through tokens with type "" and blocks all other types. */ + private class URLFilter extends TokenFilter { + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + public URLFilter(TokenStream in) { + super(in); + } + @Override + public final boolean incrementToken() throws java.io.IOException { + boolean isTokenAvailable = false; + while (input.incrementToken()) { + if (typeAtt.type() == UAX29URLEmailTokenizer.URL_TYPE) { + isTokenAvailable = true; + break; + } + } + return isTokenAvailable; + } + } + + /** Passes through tokens with type "" and blocks all other types. */ + private class EmailFilter extends TokenFilter { + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + public EmailFilter(TokenStream in) { + super(in); + } + @Override + public final boolean incrementToken() throws java.io.IOException { + boolean isTokenAvailable = false; + while (input.incrementToken()) { + if (typeAtt.type() == UAX29URLEmailTokenizer.EMAIL_TYPE) { + isTokenAvailable = true; + break; + } + } + return isTokenAvailable; + } + } + + private Analyzer urlAnalyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(reader); + tokenizer.setMaxTokenLength(Integer.MAX_VALUE); // Tokenize arbitrary length URLs + TokenFilter filter = new URLFilter(tokenizer); + return new TokenStreamComponents(tokenizer, filter); + } + }; + + private Analyzer emailAnalyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(reader); + TokenFilter filter = new EmailFilter(tokenizer); + return new TokenStreamComponents(tokenizer, filter); + } + }; + + + public void testArmenian() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 Õ´Õ«Õ¬Õ«Õ¸Õ¶ հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել Õ¥Õ¶ կամավորների կողմից ու համարյա բոլոր հոդվածները կարող Õ§ խմբագրել ցանկաց մարդ Õ¸Õ¾ կարող Õ§ բացել Վիքիպեդիայի կայքը։", + new String[] { "Վիքիպեդիայի", "13", "Õ´Õ«Õ¬Õ«Õ¸Õ¶", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "Õ¥Õ¶", "կամավորների", "կողմից", + "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "Õ§", "խմբագրել", "ցանկաց", "մարդ", "Õ¸Õ¾", "կարող", "Õ§", "բացել", "Վիքիպեդիայի", "կայքը" } ); + } + + public void testAmharic() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም", + new String[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" } ); + } + + public void testArabic() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.", + new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", + "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" } ); + } + + public void testAramaic() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀", + new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ", + "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"}); + } + + public void testBengali() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু à§§à§« জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।", + new String[] { "এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার", + "শুরু", "à§§à§«", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়েছে" }); + } + + public void testFarsi() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ Û²Ûµ دی Û±Û³Û·Û¹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.", + new String[] { "ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "Û²Ûµ", "دی", "Û±Û³Û·Û¹", "به", "صورت", "مکملی", + "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد" }); + } + + public void testGreek() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.", + new String[] { "Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που", + "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα" }); + } + + public void testThai() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔", + new String[] { "การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔" }); + } + + public void testLao() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", + new String[] { "ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ" }); + } + + public void testTibetan() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །", + new String[] { "སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", + "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", + "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ" }); + } + + /* + * For chinese, tokenize as char (these can later form bigrams or whatever) + */ + public void testChinese() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 1234 Tests ", + new String[] { "我", "是", "中", "国", "人", "1234", "Tests"}); + } + + public void testEmpty() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new String[] {}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new String[] {}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new String[] {}); + } + + /* test various jira issues this analyzer is related to */ + + public void testLUCENE1545() throws Exception { + /* + * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E. + * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost. + * Expected result is only on token "moͤchte". + */ + BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" }); + } + + /* Tests from StandardAnalyzer, just to show behavior is similar */ + public void testAlphanumericSA() throws Exception { + // alphanumeric tokens + BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new String[]{"B2B"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new String[]{"2B"}); + } + + public void testDelimitersSA() throws Exception { + // other delimiters: "-", "/", "," + BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"}); + } + + public void testApostrophesSA() throws Exception { + // internal apostrophes: O'Reilly, you're, O'Reilly's + BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"}); + } + + public void testNumericSA() throws Exception { + // floating point, serial, model numbers, ip addresses, etc. + BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"}); + } + + public void testTextWithNumbersSA() throws Exception { + // numbers + BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"}); + } + + public void testVariousTextSA() throws Exception { + // various + BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"}); + BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"}); + } + + public void testKoreanSA() throws Exception { + // Korean words + BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"}); + } + + public void testOffsets() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", + new String[] {"David", "has", "5000", "bones"}, + new int[] {0, 6, 10, 15}, + new int[] {5, 9, 14, 20}); + } + + public void testTypes() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", + new String[] {"David", "has", "5000", "bones"}, + new String[] { "", "", "", "" }); + } + + public void testWikiURLs() throws Exception { + Reader reader = null; + String luceneResourcesWikiPage; + try { + reader = new InputStreamReader(getClass().getResourceAsStream + ("LuceneResourcesWikiPage.html"), "UTF-8"); + StringBuilder builder = new StringBuilder(); + char[] buffer = new char[1024]; + int numCharsRead; + while (-1 != (numCharsRead = reader.read(buffer))) { + builder.append(buffer, 0, numCharsRead); + } + luceneResourcesWikiPage = builder.toString(); + } finally { + if (null != reader) { + reader.close(); + } + } + assertTrue(null != luceneResourcesWikiPage + && luceneResourcesWikiPage.length() > 0); + BufferedReader bufferedReader = null; + String[] urls; + try { + List urlList = new ArrayList(); + bufferedReader = new BufferedReader(new InputStreamReader + (getClass().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), "UTF-8")); + String line; + while (null != (line = bufferedReader.readLine())) { + line = line.trim(); + if (line.length() > 0) { + urlList.add(line); + } + } + urls = urlList.toArray(new String[urlList.size()]); + } finally { + if (null != bufferedReader) { + bufferedReader.close(); + } + } + assertTrue(null != urls && urls.length > 0); + BaseTokenStreamTestCase.assertAnalyzesTo + (urlAnalyzer, luceneResourcesWikiPage, urls); + } + + public void testEmails() throws Exception { + Reader reader = null; + String randomTextWithEmails; + try { + reader = new InputStreamReader(getClass().getResourceAsStream + ("random.text.with.email.addresses.txt"), "UTF-8"); + StringBuilder builder = new StringBuilder(); + char[] buffer = new char[1024]; + int numCharsRead; + while (-1 != (numCharsRead = reader.read(buffer))) { + builder.append(buffer, 0, numCharsRead); + } + randomTextWithEmails = builder.toString(); + } finally { + if (null != reader) { + reader.close(); + } + } + assertTrue(null != randomTextWithEmails + && randomTextWithEmails.length() > 0); + BufferedReader bufferedReader = null; + String[] emails; + try { + List emailList = new ArrayList(); + bufferedReader = new BufferedReader(new InputStreamReader + (getClass().getResourceAsStream + ("email.addresses.from.random.text.with.email.addresses.txt"), "UTF-8")); + String line; + while (null != (line = bufferedReader.readLine())) { + line = line.trim(); + if (line.length() > 0) { + emailList.add(line); + } + } + emails = emailList.toArray(new String[emailList.size()]); + } finally { + if (null != bufferedReader) { + bufferedReader.close(); + } + } + assertTrue(null != emails && emails.length > 0); + BaseTokenStreamTestCase.assertAnalyzesTo + (emailAnalyzer, randomTextWithEmails, emails); + } + + public void testURLs() throws Exception { + Reader reader = null; + String randomTextWithURLs; + try { + reader = new InputStreamReader(getClass().getResourceAsStream + ("random.text.with.urls.txt"), "UTF-8"); + StringBuilder builder = new StringBuilder(); + char[] buffer = new char[1024]; + int numCharsRead; + while (-1 != (numCharsRead = reader.read(buffer))) { + builder.append(buffer, 0, numCharsRead); + } + randomTextWithURLs = builder.toString(); + } finally { + if (null != reader) { + reader.close(); + } + } + assertTrue(null != randomTextWithURLs + && randomTextWithURLs.length() > 0); + BufferedReader bufferedReader = null; + String[] urls; + try { + List urlList = new ArrayList(); + bufferedReader = new BufferedReader(new InputStreamReader + (getClass().getResourceAsStream + ("urls.from.random.text.with.urls.txt"), "UTF-8")); + String line; + while (null != (line = bufferedReader.readLine())) { + line = line.trim(); + if (line.length() > 0) { + urlList.add(line); + } + } + urls = urlList.toArray(new String[urlList.size()]); + } finally { + if (null != bufferedReader) { + bufferedReader.close(); + } + } + assertTrue(null != urls && urls.length > 0); + BaseTokenStreamTestCase.assertAnalyzesTo + (urlAnalyzer, randomTextWithURLs, urls); + } + + public void testUnicodeWordBreaks() throws Exception { + WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0(); + wordBreakTest.test(a); + } + + public void testSupplementary() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱鍟䇹愯瀛", + new String[] {"𩬅", "艱", "鍟", "䇹", "愯", "瀛"}, + new String[] { "", "", "", "", "", "" }); + } + + public void testKorean() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음", + new String[] { "훈민정음" }, + new String[] { "" }); + } + + public void testJapanese() throws Exception { + BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ", + new String[] { "ä»®", "名", "遣", "い", "カタカナ" }, + new String[] { "", "", "", "", "" }); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/VocabularyAssert.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/VocabularyAssert.java new file mode 100644 index 0000000..820455a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/VocabularyAssert.java @@ -0,0 +1,82 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.zip.ZipFile; + +import org.apache.lucene.analysis.Analyzer; +import org.junit.Assert; + +/** Utility class for doing vocabulary-based stemming tests */ +public class VocabularyAssert { + /** Run a vocabulary test against two data files. */ + public static void assertVocabulary(Analyzer a, InputStream voc, InputStream out) + throws IOException { + BufferedReader vocReader = new BufferedReader( + new InputStreamReader(voc, "UTF-8")); + BufferedReader outputReader = new BufferedReader( + new InputStreamReader(out, "UTF-8")); + String inputWord = null; + while ((inputWord = vocReader.readLine()) != null) { + String expectedWord = outputReader.readLine(); + Assert.assertNotNull(expectedWord); + BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord); + } + } + + /** Run a vocabulary test against one file: tab separated. */ + public static void assertVocabulary(Analyzer a, InputStream vocOut) + throws IOException { + BufferedReader vocReader = new BufferedReader( + new InputStreamReader(vocOut, "UTF-8")); + String inputLine = null; + while ((inputLine = vocReader.readLine()) != null) { + if (inputLine.startsWith("#") || inputLine.trim().length() == 0) + continue; /* comment */ + String words[] = inputLine.split("\t"); + BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]); + } + } + + /** Run a vocabulary test against two data files inside a zip file */ + public static void assertVocabulary(Analyzer a, File zipFile, String voc, String out) + throws IOException { + ZipFile zip = new ZipFile(zipFile); + InputStream v = zip.getInputStream(zip.getEntry(voc)); + InputStream o = zip.getInputStream(zip.getEntry(out)); + assertVocabulary(a, v, o); + v.close(); + o.close(); + zip.close(); + } + + /** Run a vocabulary test against a tab-separated data file inside a zip file */ + public static void assertVocabulary(Analyzer a, File zipFile, String vocOut) + throws IOException { + ZipFile zip = new ZipFile(zipFile); + InputStream vo = zip.getInputStream(zip.getEntry(vocOut)); + assertVocabulary(a, vo); + vo.close(); + zip.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/WordBreakTestUnicode_6_0_0.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/WordBreakTestUnicode_6_0_0.java new file mode 100644 index 0000000..975b94c --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/WordBreakTestUnicode_6_0_0.java @@ -0,0 +1,3958 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.junit.Ignore; + +/** + * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl + * from: http://www.unicode.org/Public/6.0.0/ucd/auxiliary/WordBreakTest.txt + * + * WordBreakTest.txt indicates the points in the provided character sequences + * at which conforming implementations must and must not break words. This + * class tests for expected token extraction from each of the test sequences + * in WordBreakTest.txt, where the expected tokens are those character + * sequences bounded by word breaks and containing at least one character + * from one of the following character sets: + * + * \p{Script = Han} (From http://www.unicode.org/Public/6.0.0/ucd/Scripts.txt) + * \p{Script = Hiragana} + * \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/6.0.0/ucd/LineBreak.txt) + * \p{WordBreak = ALetter} (From http://www.unicode.org/Public/6.0.0/ucd/auxiliary/WordBreakProperty.txt) + * \p{WordBreak = Katakana} + * \p{WordBreak = Numeric} (Excludes full-width Arabic digits) + * [\uFF10-\uFF19] (Full-width Arabic digits) + */ +@Ignore +public class WordBreakTestUnicode_6_0_0 extends BaseTokenStreamTestCase { + + public void test(Analyzer analyzer) throws Exception { + // ÷ 0001 ÷ 0001 ÷ # ÷ [0.2] (Other) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0001", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 0001 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0001", + new String[] { }); + + // ÷ 0001 ÷ 000D ÷ # ÷ [0.2] (Other) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\r", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 000D ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\r", + new String[] { }); + + // ÷ 0001 ÷ 000A ÷ # ÷ [0.2] (Other) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\n", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 000A ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\n", + new String[] { }); + + // ÷ 0001 ÷ 000B ÷ # ÷ [0.2] (Other) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u000B", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 000B ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u000B", + new String[] { }); + + // ÷ 0001 ÷ 3031 ÷ # ÷ [0.2] (Other) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u3031", + new String[] { "\u3031" }); + + // ÷ 0001 × 0308 ÷ 3031 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 0001 ÷ 0041 ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0041", + new String[] { "\u0041" }); + + // ÷ 0001 × 0308 ÷ 0041 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 0001 ÷ 003A ÷ # ÷ [0.2] (Other) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u003A", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 003A ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u003A", + new String[] { }); + + // ÷ 0001 ÷ 002C ÷ # ÷ [0.2] (Other) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u002C", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 002C ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u002C", + new String[] { }); + + // ÷ 0001 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0027", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0027", + new String[] { }); + + // ÷ 0001 ÷ 0030 ÷ # ÷ [0.2] (Other) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0030", + new String[] { "\u0030" }); + + // ÷ 0001 × 0308 ÷ 0030 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 0001 ÷ 005F ÷ # ÷ [0.2] (Other) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u005F", + new String[] { }); + + // ÷ 0001 × 0308 ÷ 005F ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u005F", + new String[] { }); + + // ÷ 0001 × 00AD ÷ # ÷ [0.2] (Other) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u00AD", + new String[] { }); + + // ÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD", + new String[] { }); + + // ÷ 0001 × 0300 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0300", + new String[] { }); + + // ÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0300", + new String[] { }); + + // ÷ 0001 ÷ 0061 × 2060 ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0001 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0001 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0001 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0001 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0001 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0001 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0001 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0001 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0001 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0001 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0001 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0001 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0001 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0001 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0001 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0001 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0001 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 000D ÷ 0001 ÷ # ÷ [0.2] (CR) ÷ [3.1] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0001", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0001", + new String[] { }); + + // ÷ 000D ÷ 000D ÷ # ÷ [0.2] (CR) ÷ [3.1] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\r", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\r", + new String[] { }); + + // ÷ 000D × 000A ÷ # ÷ [0.2] (CR) × [3.0] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\n", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\n", + new String[] { }); + + // ÷ 000D ÷ 000B ÷ # ÷ [0.2] (CR) ÷ [3.1] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u000B", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 000B ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u000B", + new String[] { }); + + // ÷ 000D ÷ 3031 ÷ # ÷ [0.2] (CR) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u3031", + new String[] { "\u3031" }); + + // ÷ 000D ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 000D ÷ 0041 ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0041", + new String[] { "\u0041" }); + + // ÷ 000D ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 000D ÷ 003A ÷ # ÷ [0.2] (CR) ÷ [3.1] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u003A", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 003A ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u003A", + new String[] { }); + + // ÷ 000D ÷ 002C ÷ # ÷ [0.2] (CR) ÷ [3.1] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u002C", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 002C ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u002C", + new String[] { }); + + // ÷ 000D ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0027", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0027", + new String[] { }); + + // ÷ 000D ÷ 0030 ÷ # ÷ [0.2] (CR) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0030", + new String[] { "\u0030" }); + + // ÷ 000D ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 000D ÷ 005F ÷ # ÷ [0.2] (CR) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u005F", + new String[] { }); + + // ÷ 000D ÷ 0308 ÷ 005F ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u005F", + new String[] { }); + + // ÷ 000D ÷ 00AD ÷ # ÷ [0.2] (CR) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u00AD", + new String[] { }); + + // ÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u00AD", + new String[] { }); + + // ÷ 000D ÷ 0300 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0300", + new String[] { }); + + // ÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0300", + new String[] { }); + + // ÷ 000D ÷ 0061 × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 000D ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 000D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 000D ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 000D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 000D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 000D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 000D ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 000D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 000D ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 000D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 000D ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 000D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 000D ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 000D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 000D ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 000A ÷ 0001 ÷ # ÷ [0.2] (LF) ÷ [3.1] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0001", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0001", + new String[] { }); + + // ÷ 000A ÷ 000D ÷ # ÷ [0.2] (LF) ÷ [3.1] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\r", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\r", + new String[] { }); + + // ÷ 000A ÷ 000A ÷ # ÷ [0.2] (LF) ÷ [3.1] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\n", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\n", + new String[] { }); + + // ÷ 000A ÷ 000B ÷ # ÷ [0.2] (LF) ÷ [3.1] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u000B", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 000B ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u000B", + new String[] { }); + + // ÷ 000A ÷ 3031 ÷ # ÷ [0.2] (LF) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u3031", + new String[] { "\u3031" }); + + // ÷ 000A ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 000A ÷ 0041 ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0041", + new String[] { "\u0041" }); + + // ÷ 000A ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 000A ÷ 003A ÷ # ÷ [0.2] (LF) ÷ [3.1] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u003A", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 003A ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u003A", + new String[] { }); + + // ÷ 000A ÷ 002C ÷ # ÷ [0.2] (LF) ÷ [3.1] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u002C", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 002C ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u002C", + new String[] { }); + + // ÷ 000A ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0027", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0027", + new String[] { }); + + // ÷ 000A ÷ 0030 ÷ # ÷ [0.2] (LF) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0030", + new String[] { "\u0030" }); + + // ÷ 000A ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 000A ÷ 005F ÷ # ÷ [0.2] (LF) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u005F", + new String[] { }); + + // ÷ 000A ÷ 0308 ÷ 005F ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u005F", + new String[] { }); + + // ÷ 000A ÷ 00AD ÷ # ÷ [0.2] (LF) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u00AD", + new String[] { }); + + // ÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u00AD", + new String[] { }); + + // ÷ 000A ÷ 0300 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0300", + new String[] { }); + + // ÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0300", + new String[] { }); + + // ÷ 000A ÷ 0061 × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 000A ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 000A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 000A ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 000A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 000A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 000A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 000A ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 000A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 000A ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 000A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 000A ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 000A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 000A ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 000A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 000A ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 000B ÷ 0001 ÷ # ÷ [0.2] (Newline) ÷ [3.1] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0001", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0001", + new String[] { }); + + // ÷ 000B ÷ 000D ÷ # ÷ [0.2] (Newline) ÷ [3.1] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\r", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\r", + new String[] { }); + + // ÷ 000B ÷ 000A ÷ # ÷ [0.2] (Newline) ÷ [3.1] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\n", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\n", + new String[] { }); + + // ÷ 000B ÷ 000B ÷ # ÷ [0.2] (Newline) ÷ [3.1] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u000B", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 000B ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u000B", + new String[] { }); + + // ÷ 000B ÷ 3031 ÷ # ÷ [0.2] (Newline) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u3031", + new String[] { "\u3031" }); + + // ÷ 000B ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 000B ÷ 0041 ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0041", + new String[] { "\u0041" }); + + // ÷ 000B ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 000B ÷ 003A ÷ # ÷ [0.2] (Newline) ÷ [3.1] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u003A", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 003A ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u003A", + new String[] { }); + + // ÷ 000B ÷ 002C ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u002C", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 002C ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u002C", + new String[] { }); + + // ÷ 000B ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0027", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0027", + new String[] { }); + + // ÷ 000B ÷ 0030 ÷ # ÷ [0.2] (Newline) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0030", + new String[] { "\u0030" }); + + // ÷ 000B ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 000B ÷ 005F ÷ # ÷ [0.2] (Newline) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u005F", + new String[] { }); + + // ÷ 000B ÷ 0308 ÷ 005F ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u005F", + new String[] { }); + + // ÷ 000B ÷ 00AD ÷ # ÷ [0.2] (Newline) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u00AD", + new String[] { }); + + // ÷ 000B ÷ 0308 × 00AD ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD", + new String[] { }); + + // ÷ 000B ÷ 0300 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0300", + new String[] { }); + + // ÷ 000B ÷ 0308 × 0300 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0300", + new String[] { }); + + // ÷ 000B ÷ 0061 × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 000B ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 000B ÷ 0061 ÷ 003A ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 000B ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 000B ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 000B ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 000B ÷ 0061 ÷ 002C ÷ # ÷ [0.2] (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 000B ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 000B ÷ 0031 ÷ 003A ÷ # ÷ [0.2] (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 000B ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 000B ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 000B ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 000B ÷ 0031 ÷ 002C ÷ # ÷ [0.2] (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 000B ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 000B ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 000B ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 3031 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0001", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0001", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\r", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\r", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\n", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\n", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u000B", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u000B", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u3031", + new String[] { "\u3031\u3031" }); + + // ÷ 3031 × 0308 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u3031", + new String[] { "\u3031\u0308\u3031" }); + + // ÷ 3031 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0041", + new String[] { "\u3031", "\u0041" }); + + // ÷ 3031 × 0308 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0041", + new String[] { "\u3031\u0308", "\u0041" }); + + // ÷ 3031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u003A", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u003A", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u002C", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u002C", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0027", + new String[] { "\u3031" }); + + // ÷ 3031 × 0308 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0027", + new String[] { "\u3031\u0308" }); + + // ÷ 3031 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0030", + new String[] { "\u3031", "\u0030" }); + + // ÷ 3031 × 0308 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0030", + new String[] { "\u3031\u0308", "\u0030" }); + + // ÷ 3031 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u005F", + new String[] { "\u3031\u005F" }); + + // ÷ 3031 × 0308 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u005F", + new String[] { "\u3031\u0308\u005F" }); + + // ÷ 3031 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u00AD", + new String[] { "\u3031\u00AD" }); + + // ÷ 3031 × 0308 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD", + new String[] { "\u3031\u0308\u00AD" }); + + // ÷ 3031 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0300", + new String[] { "\u3031\u0300" }); + + // ÷ 3031 × 0308 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0300", + new String[] { "\u3031\u0308\u0300" }); + + // ÷ 3031 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0061\u2060", + new String[] { "\u3031", "\u0061\u2060" }); + + // ÷ 3031 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060", + new String[] { "\u3031\u0308", "\u0061\u2060" }); + + // ÷ 3031 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0061\u003A", + new String[] { "\u3031", "\u0061" }); + + // ÷ 3031 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A", + new String[] { "\u3031\u0308", "\u0061" }); + + // ÷ 3031 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0061\u0027", + new String[] { "\u3031", "\u0061" }); + + // ÷ 3031 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027", + new String[] { "\u3031\u0308", "\u0061" }); + + // ÷ 3031 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060", + new String[] { "\u3031", "\u0061" }); + + // ÷ 3031 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060", + new String[] { "\u3031\u0308", "\u0061" }); + + // ÷ 3031 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0061\u002C", + new String[] { "\u3031", "\u0061" }); + + // ÷ 3031 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C", + new String[] { "\u3031\u0308", "\u0061" }); + + // ÷ 3031 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0031\u003A", + new String[] { "\u3031", "\u0031" }); + + // ÷ 3031 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A", + new String[] { "\u3031\u0308", "\u0031" }); + + // ÷ 3031 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0031\u0027", + new String[] { "\u3031", "\u0031" }); + + // ÷ 3031 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027", + new String[] { "\u3031\u0308", "\u0031" }); + + // ÷ 3031 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0031\u002C", + new String[] { "\u3031", "\u0031" }); + + // ÷ 3031 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C", + new String[] { "\u3031\u0308", "\u0031" }); + + // ÷ 3031 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060", + new String[] { "\u3031", "\u0031" }); + + // ÷ 3031 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060", + new String[] { "\u3031\u0308", "\u0031" }); + + // ÷ 0041 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0001", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0001", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\r", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\r", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\n", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\n", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u000B", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u000B", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u3031", + new String[] { "\u0041", "\u3031" }); + + // ÷ 0041 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u3031", + new String[] { "\u0041\u0308", "\u3031" }); + + // ÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0041", + new String[] { "\u0041\u0041" }); + + // ÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0041", + new String[] { "\u0041\u0308\u0041" }); + + // ÷ 0041 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u003A", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u003A", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u002C", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u002C", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0027", + new String[] { "\u0041" }); + + // ÷ 0041 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0027", + new String[] { "\u0041\u0308" }); + + // ÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0030", + new String[] { "\u0041\u0030" }); + + // ÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0030", + new String[] { "\u0041\u0308\u0030" }); + + // ÷ 0041 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u005F", + new String[] { "\u0041\u005F" }); + + // ÷ 0041 × 0308 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u005F", + new String[] { "\u0041\u0308\u005F" }); + + // ÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u00AD", + new String[] { "\u0041\u00AD" }); + + // ÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u00AD", + new String[] { "\u0041\u0308\u00AD" }); + + // ÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0300", + new String[] { "\u0041\u0300" }); + + // ÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0300", + new String[] { "\u0041\u0308\u0300" }); + + // ÷ 0041 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0061\u2060", + new String[] { "\u0041\u0061\u2060" }); + + // ÷ 0041 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u2060", + new String[] { "\u0041\u0308\u0061\u2060" }); + + // ÷ 0041 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0061\u003A", + new String[] { "\u0041\u0061" }); + + // ÷ 0041 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u003A", + new String[] { "\u0041\u0308\u0061" }); + + // ÷ 0041 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0061\u0027", + new String[] { "\u0041\u0061" }); + + // ÷ 0041 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027", + new String[] { "\u0041\u0308\u0061" }); + + // ÷ 0041 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0061\u0027\u2060", + new String[] { "\u0041\u0061" }); + + // ÷ 0041 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027\u2060", + new String[] { "\u0041\u0308\u0061" }); + + // ÷ 0041 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0061\u002C", + new String[] { "\u0041\u0061" }); + + // ÷ 0041 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u002C", + new String[] { "\u0041\u0308\u0061" }); + + // ÷ 0041 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0031\u003A", + new String[] { "\u0041\u0031" }); + + // ÷ 0041 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u003A", + new String[] { "\u0041\u0308\u0031" }); + + // ÷ 0041 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0031\u0027", + new String[] { "\u0041\u0031" }); + + // ÷ 0041 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u0027", + new String[] { "\u0041\u0308\u0031" }); + + // ÷ 0041 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0031\u002C", + new String[] { "\u0041\u0031" }); + + // ÷ 0041 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002C", + new String[] { "\u0041\u0308\u0031" }); + + // ÷ 0041 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0031\u002E\u2060", + new String[] { "\u0041\u0031" }); + + // ÷ 0041 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002E\u2060", + new String[] { "\u0041\u0308\u0031" }); + + // ÷ 003A ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0001", + new String[] { }); + + // ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0001", + new String[] { }); + + // ÷ 003A ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\r", + new String[] { }); + + // ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\r", + new String[] { }); + + // ÷ 003A ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\n", + new String[] { }); + + // ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\n", + new String[] { }); + + // ÷ 003A ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u000B", + new String[] { }); + + // ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u000B", + new String[] { }); + + // ÷ 003A ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u3031", + new String[] { "\u3031" }); + + // ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 003A ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0041", + new String[] { "\u0041" }); + + // ÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 003A ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u003A", + new String[] { }); + + // ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u003A", + new String[] { }); + + // ÷ 003A ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u002C", + new String[] { }); + + // ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u002C", + new String[] { }); + + // ÷ 003A ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0027", + new String[] { }); + + // ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0027", + new String[] { }); + + // ÷ 003A ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0030", + new String[] { "\u0030" }); + + // ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 003A ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u005F", + new String[] { }); + + // ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u005F", + new String[] { }); + + // ÷ 003A × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u00AD", + new String[] { }); + + // ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u00AD", + new String[] { }); + + // ÷ 003A × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0300", + new String[] { }); + + // ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0300", + new String[] { }); + + // ÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u003A\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 002C ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0001", + new String[] { }); + + // ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0001", + new String[] { }); + + // ÷ 002C ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\r", + new String[] { }); + + // ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\r", + new String[] { }); + + // ÷ 002C ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\n", + new String[] { }); + + // ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\n", + new String[] { }); + + // ÷ 002C ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u000B", + new String[] { }); + + // ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u000B", + new String[] { }); + + // ÷ 002C ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u3031", + new String[] { "\u3031" }); + + // ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 002C ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0041", + new String[] { "\u0041" }); + + // ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 002C ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u003A", + new String[] { }); + + // ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u003A", + new String[] { }); + + // ÷ 002C ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u002C", + new String[] { }); + + // ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u002C", + new String[] { }); + + // ÷ 002C ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0027", + new String[] { }); + + // ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0027", + new String[] { }); + + // ÷ 002C ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0030", + new String[] { "\u0030" }); + + // ÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 002C ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u005F", + new String[] { }); + + // ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u005F", + new String[] { }); + + // ÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u00AD", + new String[] { }); + + // ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u00AD", + new String[] { }); + + // ÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0300", + new String[] { }); + + // ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0300", + new String[] { }); + + // ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u002C\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0001", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0001", + new String[] { }); + + // ÷ 0027 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\r", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\r", + new String[] { }); + + // ÷ 0027 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\n", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\n", + new String[] { }); + + // ÷ 0027 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u000B", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u000B", + new String[] { }); + + // ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u3031", + new String[] { "\u3031" }); + + // ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0041", + new String[] { "\u0041" }); + + // ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 0027 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u003A", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u003A", + new String[] { }); + + // ÷ 0027 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u002C", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u002C", + new String[] { }); + + // ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0027", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0027", + new String[] { }); + + // ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0030", + new String[] { "\u0030" }); + + // ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 0027 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u005F", + new String[] { }); + + // ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u005F", + new String[] { }); + + // ÷ 0027 × 00AD ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u00AD", + new String[] { }); + + // ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u00AD", + new String[] { }); + + // ÷ 0027 × 0300 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0300", + new String[] { }); + + // ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0300", + new String[] { }); + + // ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0027\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0030 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0001", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0001", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\r", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\r", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\n", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\n", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u000B", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u000B", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u3031", + new String[] { "\u0030", "\u3031" }); + + // ÷ 0030 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u3031", + new String[] { "\u0030\u0308", "\u3031" }); + + // ÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0041", + new String[] { "\u0030\u0041" }); + + // ÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0041", + new String[] { "\u0030\u0308\u0041" }); + + // ÷ 0030 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u003A", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u003A", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u002C", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u002C", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0027", + new String[] { "\u0030" }); + + // ÷ 0030 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0027", + new String[] { "\u0030\u0308" }); + + // ÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0030", + new String[] { "\u0030\u0030" }); + + // ÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0030", + new String[] { "\u0030\u0308\u0030" }); + + // ÷ 0030 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u005F", + new String[] { "\u0030\u005F" }); + + // ÷ 0030 × 0308 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u005F", + new String[] { "\u0030\u0308\u005F" }); + + // ÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u00AD", + new String[] { "\u0030\u00AD" }); + + // ÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u00AD", + new String[] { "\u0030\u0308\u00AD" }); + + // ÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0300", + new String[] { "\u0030\u0300" }); + + // ÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0300", + new String[] { "\u0030\u0308\u0300" }); + + // ÷ 0030 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0061\u2060", + new String[] { "\u0030\u0061\u2060" }); + + // ÷ 0030 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0061\u2060", + new String[] { "\u0030\u0308\u0061\u2060" }); + + // ÷ 0030 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0061\u003A", + new String[] { "\u0030\u0061" }); + + // ÷ 0030 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0061\u003A", + new String[] { "\u0030\u0308\u0061" }); + + // ÷ 0030 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0061\u0027", + new String[] { "\u0030\u0061" }); + + // ÷ 0030 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0061\u0027", + new String[] { "\u0030\u0308\u0061" }); + + // ÷ 0030 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0061\u0027\u2060", + new String[] { "\u0030\u0061" }); + + // ÷ 0030 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0061\u0027\u2060", + new String[] { "\u0030\u0308\u0061" }); + + // ÷ 0030 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0061\u002C", + new String[] { "\u0030\u0061" }); + + // ÷ 0030 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0061\u002C", + new String[] { "\u0030\u0308\u0061" }); + + // ÷ 0030 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0031\u003A", + new String[] { "\u0030\u0031" }); + + // ÷ 0030 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0031\u003A", + new String[] { "\u0030\u0308\u0031" }); + + // ÷ 0030 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0031\u0027", + new String[] { "\u0030\u0031" }); + + // ÷ 0030 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0031\u0027", + new String[] { "\u0030\u0308\u0031" }); + + // ÷ 0030 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0031\u002C", + new String[] { "\u0030\u0031" }); + + // ÷ 0030 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0031\u002C", + new String[] { "\u0030\u0308\u0031" }); + + // ÷ 0030 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0031\u002E\u2060", + new String[] { "\u0030\u0031" }); + + // ÷ 0030 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0030\u0308\u0031\u002E\u2060", + new String[] { "\u0030\u0308\u0031" }); + + // ÷ 005F ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0001", + new String[] { }); + + // ÷ 005F × 0308 ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0001", + new String[] { }); + + // ÷ 005F ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\r", + new String[] { }); + + // ÷ 005F × 0308 ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\r", + new String[] { }); + + // ÷ 005F ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\n", + new String[] { }); + + // ÷ 005F × 0308 ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\n", + new String[] { }); + + // ÷ 005F ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u000B", + new String[] { }); + + // ÷ 005F × 0308 ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u000B", + new String[] { }); + + // ÷ 005F × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u3031", + new String[] { "\u005F\u3031" }); + + // ÷ 005F × 0308 × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u3031", + new String[] { "\u005F\u0308\u3031" }); + + // ÷ 005F × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0041", + new String[] { "\u005F\u0041" }); + + // ÷ 005F × 0308 × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0041", + new String[] { "\u005F\u0308\u0041" }); + + // ÷ 005F ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u003A", + new String[] { }); + + // ÷ 005F × 0308 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u003A", + new String[] { }); + + // ÷ 005F ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u002C", + new String[] { }); + + // ÷ 005F × 0308 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u002C", + new String[] { }); + + // ÷ 005F ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0027", + new String[] { }); + + // ÷ 005F × 0308 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0027", + new String[] { }); + + // ÷ 005F × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0030", + new String[] { "\u005F\u0030" }); + + // ÷ 005F × 0308 × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0030", + new String[] { "\u005F\u0308\u0030" }); + + // ÷ 005F × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u005F", + new String[] { }); + + // ÷ 005F × 0308 × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u005F", + new String[] { }); + + // ÷ 005F × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u00AD", + new String[] { }); + + // ÷ 005F × 0308 × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u00AD", + new String[] { }); + + // ÷ 005F × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0300", + new String[] { }); + + // ÷ 005F × 0308 × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0300", + new String[] { }); + + // ÷ 005F × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0061\u2060", + new String[] { "\u005F\u0061\u2060" }); + + // ÷ 005F × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0061\u2060", + new String[] { "\u005F\u0308\u0061\u2060" }); + + // ÷ 005F × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0061\u003A", + new String[] { "\u005F\u0061" }); + + // ÷ 005F × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0061\u003A", + new String[] { "\u005F\u0308\u0061" }); + + // ÷ 005F × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0061\u0027", + new String[] { "\u005F\u0061" }); + + // ÷ 005F × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0061\u0027", + new String[] { "\u005F\u0308\u0061" }); + + // ÷ 005F × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0061\u0027\u2060", + new String[] { "\u005F\u0061" }); + + // ÷ 005F × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0061\u0027\u2060", + new String[] { "\u005F\u0308\u0061" }); + + // ÷ 005F × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0061\u002C", + new String[] { "\u005F\u0061" }); + + // ÷ 005F × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0061\u002C", + new String[] { "\u005F\u0308\u0061" }); + + // ÷ 005F × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0031\u003A", + new String[] { "\u005F\u0031" }); + + // ÷ 005F × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0031\u003A", + new String[] { "\u005F\u0308\u0031" }); + + // ÷ 005F × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0031\u0027", + new String[] { "\u005F\u0031" }); + + // ÷ 005F × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0031\u0027", + new String[] { "\u005F\u0308\u0031" }); + + // ÷ 005F × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0031\u002C", + new String[] { "\u005F\u0031" }); + + // ÷ 005F × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0031\u002C", + new String[] { "\u005F\u0308\u0031" }); + + // ÷ 005F × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0031\u002E\u2060", + new String[] { "\u005F\u0031" }); + + // ÷ 005F × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u005F\u0308\u0031\u002E\u2060", + new String[] { "\u005F\u0308\u0031" }); + + // ÷ 00AD ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0001", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0001", + new String[] { }); + + // ÷ 00AD ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\r", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\r", + new String[] { }); + + // ÷ 00AD ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\n", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\n", + new String[] { }); + + // ÷ 00AD ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u000B", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u000B", + new String[] { }); + + // ÷ 00AD ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u3031", + new String[] { "\u3031" }); + + // ÷ 00AD × 0308 ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 00AD ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0041", + new String[] { "\u0041" }); + + // ÷ 00AD × 0308 ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 00AD ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u003A", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u003A", + new String[] { }); + + // ÷ 00AD ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u002C", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u002C", + new String[] { }); + + // ÷ 00AD ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0027", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0027", + new String[] { }); + + // ÷ 00AD ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0030", + new String[] { "\u0030" }); + + // ÷ 00AD × 0308 ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 00AD ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u005F", + new String[] { }); + + // ÷ 00AD × 0308 ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u005F", + new String[] { }); + + // ÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u00AD", + new String[] { }); + + // ÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u00AD", + new String[] { }); + + // ÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0300", + new String[] { }); + + // ÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0300", + new String[] { }); + + // ÷ 00AD ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 00AD × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 00AD ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 00AD × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 00AD ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 00AD × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 00AD ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 00AD × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 00AD ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 00AD × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 00AD ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 00AD × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 00AD ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 00AD × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 00AD ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 00AD × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 00AD ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 00AD × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u00AD\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0001", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0001", + new String[] { }); + + // ÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\r", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\r", + new String[] { }); + + // ÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\n", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\n", + new String[] { }); + + // ÷ 0300 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u000B", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u000B", + new String[] { }); + + // ÷ 0300 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u3031", + new String[] { "\u3031" }); + + // ÷ 0300 × 0308 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u3031", + new String[] { "\u3031" }); + + // ÷ 0300 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0041", + new String[] { "\u0041" }); + + // ÷ 0300 × 0308 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0041", + new String[] { "\u0041" }); + + // ÷ 0300 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u003A", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u003A", + new String[] { }); + + // ÷ 0300 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u002C", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u002C", + new String[] { }); + + // ÷ 0300 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0027", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0027", + new String[] { }); + + // ÷ 0300 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0030", + new String[] { "\u0030" }); + + // ÷ 0300 × 0308 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0030", + new String[] { "\u0030" }); + + // ÷ 0300 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u005F", + new String[] { }); + + // ÷ 0300 × 0308 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u005F", + new String[] { }); + + // ÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u00AD", + new String[] { }); + + // ÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u00AD", + new String[] { }); + + // ÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0300", + new String[] { }); + + // ÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0300", + new String[] { }); + + // ÷ 0300 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0300 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0061\u2060", + new String[] { "\u0061\u2060" }); + + // ÷ 0300 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0300 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0061\u003A", + new String[] { "\u0061" }); + + // ÷ 0300 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0300 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0061\u0027", + new String[] { "\u0061" }); + + // ÷ 0300 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0300 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0061\u0027\u2060", + new String[] { "\u0061" }); + + // ÷ 0300 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0300 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0061\u002C", + new String[] { "\u0061" }); + + // ÷ 0300 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0300 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0031\u003A", + new String[] { "\u0031" }); + + // ÷ 0300 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0300 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0031\u0027", + new String[] { "\u0031" }); + + // ÷ 0300 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0300 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0031\u002C", + new String[] { "\u0031" }); + + // ÷ 0300 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0300 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0300\u0308\u0031\u002E\u2060", + new String[] { "\u0031" }); + + // ÷ 0061 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0001", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0001", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\r", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\r", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\n", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\n", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u000B", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u000B", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u3031", + new String[] { "\u0061\u2060", "\u3031" }); + + // ÷ 0061 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u3031", + new String[] { "\u0061\u2060\u0308", "\u3031" }); + + // ÷ 0061 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0041", + new String[] { "\u0061\u2060\u0041" }); + + // ÷ 0061 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0041", + new String[] { "\u0061\u2060\u0308\u0041" }); + + // ÷ 0061 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u003A", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u003A", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u002C", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u002C", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0027", + new String[] { "\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0027", + new String[] { "\u0061\u2060\u0308" }); + + // ÷ 0061 × 2060 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0030", + new String[] { "\u0061\u2060\u0030" }); + + // ÷ 0061 × 2060 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0030", + new String[] { "\u0061\u2060\u0308\u0030" }); + + // ÷ 0061 × 2060 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u005F", + new String[] { "\u0061\u2060\u005F" }); + + // ÷ 0061 × 2060 × 0308 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u005F", + new String[] { "\u0061\u2060\u0308\u005F" }); + + // ÷ 0061 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u00AD", + new String[] { "\u0061\u2060\u00AD" }); + + // ÷ 0061 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u00AD", + new String[] { "\u0061\u2060\u0308\u00AD" }); + + // ÷ 0061 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0300", + new String[] { "\u0061\u2060\u0300" }); + + // ÷ 0061 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0300", + new String[] { "\u0061\u2060\u0308\u0300" }); + + // ÷ 0061 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0061\u2060", + new String[] { "\u0061\u2060\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0061\u2060", + new String[] { "\u0061\u2060\u0308\u0061\u2060" }); + + // ÷ 0061 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0061\u003A", + new String[] { "\u0061\u2060\u0061" }); + + // ÷ 0061 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0061\u003A", + new String[] { "\u0061\u2060\u0308\u0061" }); + + // ÷ 0061 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0061\u0027", + new String[] { "\u0061\u2060\u0061" }); + + // ÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0061\u0027", + new String[] { "\u0061\u2060\u0308\u0061" }); + + // ÷ 0061 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0061\u0027\u2060", + new String[] { "\u0061\u2060\u0061" }); + + // ÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0061\u0027\u2060", + new String[] { "\u0061\u2060\u0308\u0061" }); + + // ÷ 0061 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0061\u002C", + new String[] { "\u0061\u2060\u0061" }); + + // ÷ 0061 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0061\u002C", + new String[] { "\u0061\u2060\u0308\u0061" }); + + // ÷ 0061 × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0031\u003A", + new String[] { "\u0061\u2060\u0031" }); + + // ÷ 0061 × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0031\u003A", + new String[] { "\u0061\u2060\u0308\u0031" }); + + // ÷ 0061 × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0031\u0027", + new String[] { "\u0061\u2060\u0031" }); + + // ÷ 0061 × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0031\u0027", + new String[] { "\u0061\u2060\u0308\u0031" }); + + // ÷ 0061 × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0031\u002C", + new String[] { "\u0061\u2060\u0031" }); + + // ÷ 0061 × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0031\u002C", + new String[] { "\u0061\u2060\u0308\u0031" }); + + // ÷ 0061 × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0031\u002E\u2060", + new String[] { "\u0061\u2060\u0031" }); + + // ÷ 0061 × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u2060\u0308\u0031\u002E\u2060", + new String[] { "\u0061\u2060\u0308\u0031" }); + + // ÷ 0061 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0001", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0001", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\r", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\r", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\n", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\n", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u000B", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u000B", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u3031", + new String[] { "\u0061", "\u3031" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u3031", + new String[] { "\u0061", "\u3031" }); + + // ÷ 0061 × 003A × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0041", + new String[] { "\u0061\u003A\u0041" }); + + // ÷ 0061 × 003A × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0041", + new String[] { "\u0061\u003A\u0308\u0041" }); + + // ÷ 0061 ÷ 003A ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u003A", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u003A", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u002C", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u002C", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0030", + new String[] { "\u0061", "\u0030" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0030", + new String[] { "\u0061", "\u0030" }); + + // ÷ 0061 ÷ 003A ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u005F", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u005F", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u00AD", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u00AD", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0300", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0300", + new String[] { "\u0061" }); + + // ÷ 0061 × 003A × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0061\u2060", + new String[] { "\u0061\u003A\u0061\u2060" }); + + // ÷ 0061 × 003A × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0061\u2060", + new String[] { "\u0061\u003A\u0308\u0061\u2060" }); + + // ÷ 0061 × 003A × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0061\u003A", + new String[] { "\u0061\u003A\u0061" }); + + // ÷ 0061 × 003A × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0061\u003A", + new String[] { "\u0061\u003A\u0308\u0061" }); + + // ÷ 0061 × 003A × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0061\u0027", + new String[] { "\u0061\u003A\u0061" }); + + // ÷ 0061 × 003A × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0061\u0027", + new String[] { "\u0061\u003A\u0308\u0061" }); + + // ÷ 0061 × 003A × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0061\u0027\u2060", + new String[] { "\u0061\u003A\u0061" }); + + // ÷ 0061 × 003A × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0061\u0027\u2060", + new String[] { "\u0061\u003A\u0308\u0061" }); + + // ÷ 0061 × 003A × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0061\u002C", + new String[] { "\u0061\u003A\u0061" }); + + // ÷ 0061 × 003A × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0061\u002C", + new String[] { "\u0061\u003A\u0308\u0061" }); + + // ÷ 0061 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0031\u003A", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0031\u003A", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0031\u0027", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0031\u0027", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0031\u002C", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0031\u002C", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0031\u002E\u2060", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u003A\u0308\u0031\u002E\u2060", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0001", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0001", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\r", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\r", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\n", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\n", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u000B", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u000B", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u3031", + new String[] { "\u0061", "\u3031" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u3031", + new String[] { "\u0061", "\u3031" }); + + // ÷ 0061 × 0027 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0041", + new String[] { "\u0061\u0027\u0041" }); + + // ÷ 0061 × 0027 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0041", + new String[] { "\u0061\u0027\u0308\u0041" }); + + // ÷ 0061 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u003A", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u003A", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u002C", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u002C", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0030", + new String[] { "\u0061", "\u0030" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0030", + new String[] { "\u0061", "\u0030" }); + + // ÷ 0061 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u005F", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u005F", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u00AD", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u00AD", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0300", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0300", + new String[] { "\u0061" }); + + // ÷ 0061 × 0027 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u2060", + new String[] { "\u0061\u0027\u0061\u2060" }); + + // ÷ 0061 × 0027 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u2060", + new String[] { "\u0061\u0027\u0308\u0061\u2060" }); + + // ÷ 0061 × 0027 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u003A", + new String[] { "\u0061\u0027\u0061" }); + + // ÷ 0061 × 0027 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u003A", + new String[] { "\u0061\u0027\u0308\u0061" }); + + // ÷ 0061 × 0027 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u0027", + new String[] { "\u0061\u0027\u0061" }); + + // ÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u0027", + new String[] { "\u0061\u0027\u0308\u0061" }); + + // ÷ 0061 × 0027 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u0027\u2060", + new String[] { "\u0061\u0027\u0061" }); + + // ÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u0027\u2060", + new String[] { "\u0061\u0027\u0308\u0061" }); + + // ÷ 0061 × 0027 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0061\u002C", + new String[] { "\u0061\u0027\u0061" }); + + // ÷ 0061 × 0027 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0061\u002C", + new String[] { "\u0061\u0027\u0308\u0061" }); + + // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0031\u003A", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0031\u003A", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0031\u0027", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0031\u0027", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0031\u002C", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0031\u002C", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0031\u002E\u2060", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u0308\u0031\u002E\u2060", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0001", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0001", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\r", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\r", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\n", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\n", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u000B", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u000B", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u3031", + new String[] { "\u0061", "\u3031" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u3031", + new String[] { "\u0061", "\u3031" }); + + // ÷ 0061 × 0027 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0041", + new String[] { "\u0061\u0027\u2060\u0041" }); + + // ÷ 0061 × 0027 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0041", + new String[] { "\u0061\u0027\u2060\u0308\u0041" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u003A", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u003A", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u002C", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u002C", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0030", + new String[] { "\u0061", "\u0030" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0030", + new String[] { "\u0061", "\u0030" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u005F", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u005F", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u00AD", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u00AD", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0300", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0300", + new String[] { "\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u2060", + new String[] { "\u0061\u0027\u2060\u0061\u2060" }); + + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u2060", + new String[] { "\u0061\u0027\u2060\u0308\u0061\u2060" }); + + // ÷ 0061 × 0027 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u003A", + new String[] { "\u0061\u0027\u2060\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u003A", + new String[] { "\u0061\u0027\u2060\u0308\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u0027", + new String[] { "\u0061\u0027\u2060\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u0027", + new String[] { "\u0061\u0027\u2060\u0308\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u0027\u2060", + new String[] { "\u0061\u0027\u2060\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u0027\u2060", + new String[] { "\u0061\u0027\u2060\u0308\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0061\u002C", + new String[] { "\u0061\u0027\u2060\u0061" }); + + // ÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0061\u002C", + new String[] { "\u0061\u0027\u2060\u0308\u0061" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0031\u003A", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0031\u003A", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0031\u0027", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0031\u0027", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0031\u002C", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0031\u002C", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0031\u002E\u2060", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0027\u2060\u0308\u0031\u002E\u2060", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0001", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0001", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\r", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\r", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\n", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\n", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u000B", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u000B", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u3031", + new String[] { "\u0061", "\u3031" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u3031", + new String[] { "\u0061", "\u3031" }); + + // ÷ 0061 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0041", + new String[] { "\u0061", "\u0041" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0041", + new String[] { "\u0061", "\u0041" }); + + // ÷ 0061 ÷ 002C ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u003A", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u003A", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u002C", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u002C", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0027", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0030", + new String[] { "\u0061", "\u0030" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0030", + new String[] { "\u0061", "\u0030" }); + + // ÷ 0061 ÷ 002C ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u005F", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u005F", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u00AD", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u00AD", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0300", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0300", + new String[] { "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0061\u2060", + new String[] { "\u0061", "\u0061\u2060" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0061\u2060", + new String[] { "\u0061", "\u0061\u2060" }); + + // ÷ 0061 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0061\u003A", + new String[] { "\u0061", "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0061\u003A", + new String[] { "\u0061", "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0061\u0027", + new String[] { "\u0061", "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0061\u0027", + new String[] { "\u0061", "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0061\u0027\u2060", + new String[] { "\u0061", "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0061\u0027\u2060", + new String[] { "\u0061", "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0061\u002C", + new String[] { "\u0061", "\u0061" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0061\u002C", + new String[] { "\u0061", "\u0061" }); + + // ÷ 0061 ÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0031\u003A", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0031\u003A", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0031\u0027", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0031\u0027", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0031\u002C", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0031\u002C", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0031\u002E\u2060", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u002C\u0308\u0031\u002E\u2060", + new String[] { "\u0061", "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0001", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0001", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\r", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\r", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\n", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\n", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u000B", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u000B", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u3031", + new String[] { "\u0031", "\u3031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u3031", + new String[] { "\u0031", "\u3031" }); + + // ÷ 0031 ÷ 003A ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0041", + new String[] { "\u0031", "\u0041" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0041", + new String[] { "\u0031", "\u0041" }); + + // ÷ 0031 ÷ 003A ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u003A", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u003A", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u002C", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u002C", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0030", + new String[] { "\u0031", "\u0030" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0030", + new String[] { "\u0031", "\u0030" }); + + // ÷ 0031 ÷ 003A ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u005F", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u005F", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u00AD", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u00AD", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0300", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0300", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0061\u2060", + new String[] { "\u0031", "\u0061\u2060" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0061\u2060", + new String[] { "\u0031", "\u0061\u2060" }); + + // ÷ 0031 ÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0061\u003A", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0061\u003A", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0061\u0027", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0061\u0027", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0061\u0027\u2060", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0061\u0027\u2060", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0061\u002C", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0061\u002C", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0031\u003A", + new String[] { "\u0031", "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0031\u003A", + new String[] { "\u0031", "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0031\u0027", + new String[] { "\u0031", "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0031\u0027", + new String[] { "\u0031", "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0031\u002C", + new String[] { "\u0031", "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0031\u002C", + new String[] { "\u0031", "\u0031" }); + + // ÷ 0031 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0031\u002E\u2060", + new String[] { "\u0031", "\u0031" }); + + // ÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u003A\u0308\u0031\u002E\u2060", + new String[] { "\u0031", "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0001", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0001", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\r", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\r", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\n", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\n", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u000B", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u000B", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u3031", + new String[] { "\u0031", "\u3031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u3031", + new String[] { "\u0031", "\u3031" }); + + // ÷ 0031 ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0041", + new String[] { "\u0031", "\u0041" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0041", + new String[] { "\u0031", "\u0041" }); + + // ÷ 0031 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u003A", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u003A", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u002C", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u002C", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 × 0027 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0030", + new String[] { "\u0031\u0027\u0030" }); + + // ÷ 0031 × 0027 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0030", + new String[] { "\u0031\u0027\u0308\u0030" }); + + // ÷ 0031 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u005F", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u005F", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u00AD", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u00AD", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0300", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0300", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u2060", + new String[] { "\u0031", "\u0061\u2060" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u2060", + new String[] { "\u0031", "\u0061\u2060" }); + + // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u003A", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u003A", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u0027", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u0027", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u0027\u2060", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u0027\u2060", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0061\u002C", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0061\u002C", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 × 0027 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0031\u003A", + new String[] { "\u0031\u0027\u0031" }); + + // ÷ 0031 × 0027 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0031\u003A", + new String[] { "\u0031\u0027\u0308\u0031" }); + + // ÷ 0031 × 0027 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0031\u0027", + new String[] { "\u0031\u0027\u0031" }); + + // ÷ 0031 × 0027 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0031\u0027", + new String[] { "\u0031\u0027\u0308\u0031" }); + + // ÷ 0031 × 0027 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0031\u002C", + new String[] { "\u0031\u0027\u0031" }); + + // ÷ 0031 × 0027 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0031\u002C", + new String[] { "\u0031\u0027\u0308\u0031" }); + + // ÷ 0031 × 0027 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0031\u002E\u2060", + new String[] { "\u0031\u0027\u0031" }); + + // ÷ 0031 × 0027 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u0027\u0308\u0031\u002E\u2060", + new String[] { "\u0031\u0027\u0308\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0001", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0001", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\r", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\r", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\n", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\n", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u000B", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u000B", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u3031", + new String[] { "\u0031", "\u3031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u3031", + new String[] { "\u0031", "\u3031" }); + + // ÷ 0031 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0041", + new String[] { "\u0031", "\u0041" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0041", + new String[] { "\u0031", "\u0041" }); + + // ÷ 0031 ÷ 002C ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u003A", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u003A", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u002C", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u002C", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0030", + new String[] { "\u0031\u002C\u0030" }); + + // ÷ 0031 × 002C × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0030", + new String[] { "\u0031\u002C\u0308\u0030" }); + + // ÷ 0031 ÷ 002C ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u005F", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u005F", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u00AD", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u00AD", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0300", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0300", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0061\u2060", + new String[] { "\u0031", "\u0061\u2060" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0061\u2060", + new String[] { "\u0031", "\u0061\u2060" }); + + // ÷ 0031 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0061\u003A", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0061\u003A", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0061\u0027", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0061\u0027", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0061\u0027\u2060", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0061\u0027\u2060", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0061\u002C", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0061\u002C", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 × 002C × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0031\u003A", + new String[] { "\u0031\u002C\u0031" }); + + // ÷ 0031 × 002C × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0031\u003A", + new String[] { "\u0031\u002C\u0308\u0031" }); + + // ÷ 0031 × 002C × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0031\u0027", + new String[] { "\u0031\u002C\u0031" }); + + // ÷ 0031 × 002C × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0031\u0027", + new String[] { "\u0031\u002C\u0308\u0031" }); + + // ÷ 0031 × 002C × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0031\u002C", + new String[] { "\u0031\u002C\u0031" }); + + // ÷ 0031 × 002C × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0031\u002C", + new String[] { "\u0031\u002C\u0308\u0031" }); + + // ÷ 0031 × 002C × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0031\u002E\u2060", + new String[] { "\u0031\u002C\u0031" }); + + // ÷ 0031 × 002C × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002C\u0308\u0031\u002E\u2060", + new String[] { "\u0031\u002C\u0308\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0001", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] (Other) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0001", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\r", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (CR) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\r", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\n", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (LF) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\n", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u000B", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.11] (Newline) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u000B", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u3031", + new String[] { "\u0031", "\u3031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u3031", + new String[] { "\u0031", "\u3031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0041", + new String[] { "\u0031", "\u0041" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0041", + new String[] { "\u0031", "\u0041" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u003A", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u003A", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u002C", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u002C", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0027", + new String[] { "\u0031" }); + + // ÷ 0031 × 002E × 2060 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0030", + new String[] { "\u0031\u002E\u2060\u0030" }); + + // ÷ 0031 × 002E × 2060 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0030", + new String[] { "\u0031\u002E\u2060\u0308\u0030" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u005F", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u005F", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u00AD", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u00AD", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0300", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0300", + new String[] { "\u0031" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0061\u2060", + new String[] { "\u0031", "\u0061\u2060" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0061\u2060", + new String[] { "\u0031", "\u0061\u2060" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0061\u003A", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0061\u003A", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0061\u0027", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0061\u0027", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0061\u0027\u2060", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0061\u0027\u2060", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0061\u002C", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0061\u002C", + new String[] { "\u0031", "\u0061" }); + + // ÷ 0031 × 002E × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0031\u003A", + new String[] { "\u0031\u002E\u2060\u0031" }); + + // ÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0031\u003A", + new String[] { "\u0031\u002E\u2060\u0308\u0031" }); + + // ÷ 0031 × 002E × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0031\u0027", + new String[] { "\u0031\u002E\u2060\u0031" }); + + // ÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (MidNumLet) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0031\u0027", + new String[] { "\u0031\u002E\u2060\u0308\u0031" }); + + // ÷ 0031 × 002E × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0031\u002C", + new String[] { "\u0031\u002E\u2060\u0031" }); + + // ÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0031\u002C", + new String[] { "\u0031\u002E\u2060\u0308\u0031" }); + + // ÷ 0031 × 002E × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0031\u002E\u2060", + new String[] { "\u0031\u002E\u2060\u0031" }); + + // ÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0031\u002E\u2060\u0308\u0031\u002E\u2060", + new String[] { "\u0031\u002E\u2060\u0308\u0031" }); + + // ÷ 0063 × 0061 × 006E × 0027 × 0074 ÷ # ÷ [0.2] LATIN SMALL LETTER C (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER N (ALetter) × [6.0] APOSTROPHE (MidNumLet) × [7.0] LATIN SMALL LETTER T (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0063\u0061\u006E\u0027\u0074", + new String[] { "\u0063\u0061\u006E\u0027\u0074" }); + + // ÷ 0063 × 0061 × 006E × 2019 × 0074 ÷ # ÷ [0.2] LATIN SMALL LETTER C (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER N (ALetter) × [6.0] RIGHT SINGLE QUOTATION MARK (MidNumLet) × [7.0] LATIN SMALL LETTER T (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0063\u0061\u006E\u2019\u0074", + new String[] { "\u0063\u0061\u006E\u2019\u0074" }); + + // ÷ 0061 × 0062 × 00AD × 0062 × 0079 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER B (ALetter) × [4.0] SOFT HYPHEN (Format_FE) × [5.0] LATIN SMALL LETTER B (ALetter) × [5.0] LATIN SMALL LETTER Y (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0062\u00AD\u0062\u0079", + new String[] { "\u0061\u0062\u00AD\u0062\u0079" }); + + // ÷ 0061 ÷ 0024 ÷ 002D ÷ 0033 × 0034 × 002C × 0035 × 0036 × 0037 × 002E × 0031 × 0034 ÷ 0025 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] DOLLAR SIGN (Other) ÷ [999.0] HYPHEN-MINUS (Other) ÷ [999.0] DIGIT THREE (Numeric) × [8.0] DIGIT FOUR (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT FIVE (Numeric) × [8.0] DIGIT SIX (Numeric) × [8.0] DIGIT SEVEN (Numeric) × [12.0] FULL STOP (MidNumLet) × [11.0] DIGIT ONE (Numeric) × [8.0] DIGIT FOUR (Numeric) ÷ [999.0] PERCENT SIGN (Other) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0061\u0024\u002D\u0033\u0034\u002C\u0035\u0036\u0037\u002E\u0031\u0034\u0025\u0062", + new String[] { "\u0061", "\u0033\u0034\u002C\u0035\u0036\u0037\u002E\u0031\u0034", "\u0062" }); + + // ÷ 0033 × 0061 ÷ # ÷ [0.2] DIGIT THREE (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u0033\u0061", + new String[] { "\u0033\u0061" }); + + // ÷ 2060 ÷ 0063 × 2060 × 0061 × 2060 × 006E × 2060 × 0027 × 2060 × 0074 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER C (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER N (ALetter) × [4.0] WORD JOINER (Format_FE) × [6.0] APOSTROPHE (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER T (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u2060\u0063\u2060\u0061\u2060\u006E\u2060\u0027\u2060\u0074\u2060\u2060", + new String[] { "\u0063\u2060\u0061\u2060\u006E\u2060\u0027\u2060\u0074\u2060\u2060" }); + + // ÷ 2060 ÷ 0063 × 2060 × 0061 × 2060 × 006E × 2060 × 2019 × 2060 × 0074 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER C (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER N (ALetter) × [4.0] WORD JOINER (Format_FE) × [6.0] RIGHT SINGLE QUOTATION MARK (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER T (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u2060\u0063\u2060\u0061\u2060\u006E\u2060\u2019\u2060\u0074\u2060\u2060", + new String[] { "\u0063\u2060\u0061\u2060\u006E\u2060\u2019\u2060\u0074\u2060\u2060" }); + + // ÷ 2060 ÷ 0061 × 2060 × 0062 × 2060 × 00AD × 2060 × 0062 × 2060 × 0079 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER B (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER B (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER Y (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u2060\u0061\u2060\u0062\u2060\u00AD\u2060\u0062\u2060\u0079\u2060\u2060", + new String[] { "\u0061\u2060\u0062\u2060\u00AD\u2060\u0062\u2060\u0079\u2060\u2060" }); + + // ÷ 2060 ÷ 0061 × 2060 ÷ 0024 × 2060 ÷ 002D × 2060 ÷ 0033 × 2060 × 0034 × 2060 × 002C × 2060 × 0035 × 2060 × 0036 × 2060 × 0037 × 2060 × 002E × 2060 × 0031 × 2060 × 0034 × 2060 ÷ 0025 × 2060 ÷ 0062 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DOLLAR SIGN (Other) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] HYPHEN-MINUS (Other) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT THREE (Numeric) × [4.0] WORD JOINER (Format_FE) × [8.0] DIGIT FOUR (Numeric) × [4.0] WORD JOINER (Format_FE) × [12.0] COMMA (MidNum) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT FIVE (Numeric) × [4.0] WORD JOINER (Format_FE) × [8.0] DIGIT SIX (Numeric) × [4.0] WORD JOINER (Format_FE) × [8.0] DIGIT SEVEN (Numeric) × [4.0] WORD JOINER (Format_FE) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) × [4.0] WORD JOINER (Format_FE) × [8.0] DIGIT FOUR (Numeric) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] PERCENT SIGN (Other) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER B (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u2060\u0061\u2060\u0024\u2060\u002D\u2060\u0033\u2060\u0034\u2060\u002C\u2060\u0035\u2060\u0036\u2060\u0037\u2060\u002E\u2060\u0031\u2060\u0034\u2060\u0025\u2060\u0062\u2060\u2060", + new String[] { "\u0061\u2060", "\u0033\u2060\u0034\u2060\u002C\u2060\u0035\u2060\u0036\u2060\u0037\u2060\u002E\u2060\u0031\u2060\u0034\u2060", "\u0062\u2060\u2060" }); + + // ÷ 2060 ÷ 0033 × 2060 × 0061 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) ÷ [999.0] DIGIT THREE (Numeric) × [4.0] WORD JOINER (Format_FE) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] WORD JOINER (Format_FE) ÷ [0.3] + assertAnalyzesTo(analyzer, "\u2060\u0033\u2060\u0061\u2060\u2060", + new String[] { "\u0033\u2060\u0061\u2060\u2060" }); + + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/email.addresses.from.random.text.with.email.addresses.txt b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/email.addresses.from.random.text.with.email.addresses.txt new file mode 100644 index 0000000..832a2aa --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/email.addresses.from.random.text.with.email.addresses.txt @@ -0,0 +1,265 @@ +dJ8ngFi@avz13m.CC +JCAVLRJg@3aqiq2yui.gm +kU-l6DS@[082.015.228.189] +37layCJS@j5NVP7NWAY.VG +"%U@?\B"@Fl2d.md +aH3QW@tw8uo2.eu +Bvd#@tupjv.sn +SBMm0Nm.oyk70.rMNdd8k.#ru3LI.gMMLBI.0dZRD4d.RVK2nY@au58t.B13albgy4u.mt +DvdUJk@61zwkit7dkd3rcq4v.BD +~+Kdz@3mousnl.SE +C'ts`@Vh4zk.uoafcft-dr753x4odt04q.UY +}0tzWYDBuy@cSRQAABB9B.7c8xawf75-cyo.PM +lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae +V85E9Hx7@vpf0bs.bz +MGBg2@7F3MJTCCPROS8YETM0B4-C9P7WXKGFB0.RU +rsBWOCJ@lYX0SILY4L53Z3VJPSF6.pwrawr.vdpoq.nz +dIyLrU@9A40T2ZIG7H8R.t63.tv +6dAsZKz@d33XR.IR +EnqCC@2bk6da6y08.LI +AQ9yV@Mfqq32nexufgxzl4o7q5jv3kd.lb +lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H +b6/zomNkV@8jwm-he.IN +5FLuakz.hXVkuqDt@iBFP83V6MNI3N0FRWJ9302DS-0KHRV6O.1bf59kj64uj5b6e2zfn.cm +RhIwkU@58vmet9yfddpg.3adkmhrv1px.AO +nEBk6w2Q@Bb5ib.2pay.so +AlW5CMAn@qos-53u.j91qq96d4en129szf7099kxv5lo6yo.gm +QPYBDV3.Ah/h8U@x3v444pzi.1cvgokam.PW +5Iwbiq7@p9s-2pixps9jwzyhfroxqivw8sv90r.xn--wgbh1c +AaFU9L@3yj1xqf1.cz9.ac +|iCmQ1@rum6w0a7wt.3QLD.ht71.cx +EhLTUjo@rEK.sJ44H0.GR +bHEbq3Rp@33.lKSSMY.9xaurtfle9xe.iu4810l.fj +eFcup.cPPEW@[1ae] +p907@bk3o.fvtmw2m2.Uutr83x2yt4.2nuin.EU +PpW2L5.QgP2n@9rz7.a5qi.oRH1Z.8ov.UZ +o8UgG5fewm4vr9Ai5wPS@sgh.2F-OLKLZ81DIUET.xpya0vtx.fj +aixQH@z-y.AR +jVTeWQfL."M#~t Q"@1e.oglq.ubk.SZ +6e5QQuy@N7.2cuw3x2wpddf.paycp1pc.AI +IqG6Fl@[220.112.120.54] +lWHH4eWSn@tbxyb7.jhzqxrk.lv +P1zO*RaAr@[111.99.108.22] +d00gy@[4TC] +1yNINoBU@[136.003.010.238] +Ms8ox@[_3Tuehr] +wtWDNo@1sjmcbbli196-765mt7m8o8hywft.7-ga6rsnum8v.np +"x)yO "@7le5o2rcud5ngs.Qmfmq.Jfxv8.Zznv6t6il.MIL +1hXd@f8.1kxqd3yw4j6zmb7l7.US +"8}(\$"@mu2viak0nh4sj5ivgpy1wqie.HK +Th7XoAs5@ggdb.BI +5iDbhah.xdtF1x@[59.55.12.243] +j2ovALlgm2Wcwx@5jphzt.TN +ZlaP~E.4Yk1K0F@lF6VN.M5.Nj.PRO +cFCvIJAw@l93H0R1W6V4RI0AY7RLRQR4KOEVQPEG-PDTF03V4D9A0.xZZK5.lu +8Ju2AW@1n.h7.vu +"\nkP]{"@[Vej\yo\HD] +fKWC?@qgcb.xn--mgbaam7a8h +L4BbaB@hv1.BIZ +WvSmV@qpx15vzmbtxzvi-syndl1.ML +"3|PX~Cbdq"@U3vp-7k.8c4q3sgpwt6sochundzhx.museum +LjH9rJTu@tkm.gy +vQgXEFb@maxmrbk-5a5s6o.6MZZ6IK.awjbtiva7.IL +6TVbIA@r50eh-a.la +AaASl@Bsteea.qHXE3Q5CUJ3DBG.S2hvnld.4WJWL.fk +"CN;\-z 6M"@86.qc7s.23p.ET +zX3=O3o@Yjov.7g660.8M88OJGTDC5.np +QFZlK1A@4W47EIXE.KY +1guLnQb07k@ab.ccemuif2s.lb +Jddxj@[111.079.109.147] +Hj06gcE@[105.233.192.168] +u8?xicQ@[i\21I] +CczYer}W@bezu6wtys9s.lft3z.mobi +OmpYhIL@6GJ7P29EIE-G63RDW7GLFLFC0M1.AERO +2RRPLqO@8lh0i.vm7xmvvo-r5nf0x.CY +TOc!BhbKz@F-myy7.kQWSUI7S3.net +"0\!P?".shQVdSerA@2qmqj8ul.hm +LTLNFsgB@[191.56.104.113] +iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU +VGLn@z3E2.3an2.MM +TWmfsxn@[112.192.017.029] +2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV +CjaPC63@['\RDrwk] +Ayydpdoa@tdgypppmen.wf +"gfKP9"@jo3-r0.mz +aTMgDW4@t5gax.XN--0ZWM56D +mcDrMO3FQ@nwc21.y5qd45lesryrp.IL +NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp +XtAhFnq@[218.214.251.103] +x0S8uos@[109.82.126.233] +ALB4KFavj16pODdd@i206d6s.MM +grxIt96.46nCf@nokjogh2l4.nCMWXG.yt +Fgbh7@2rxkk0bvkk-v3evd-sh56gvhxlh.hhjcsg36j8qt98okjbdj9z574xdpix59zf6h80r.Gyb4rrxu.ve +uo0AX41@Fhlegm1z57j-qvf5.p8jo6zvm.sc +sjn4cz@9ktlwkqte.bv +b04v0Ct@[243.230.224.190] +F!FUbQHU@uvz7cu1l.ciz4h2.93U4V.gb +6CHec@nONUKT.nl +zbmZiXw@yb.bxxp.3fm457.va +"/GdiZ7f"@[221.229.46.3] +NJde8Li@f7a.g51VICBH.cy +6IeAft@e-3fp.Nkh7nm8.v8i47xvrv27r.pf +TC*Qopzb@xIOB3.6egz4.m-24t5wmxtmco4iy8g91o66mjgha1vjlepyffott.E5ta.p9.CF +"_3Sc_"@[193.165.124.143] +W0dwHf@[25.174.65.80] +qPkkP0@4k0vs.oaak2z.3JMTI.PK +XzZh7@[\\Jm D%U] +66SGHzw@Oqnr82oml7jct0b8crwbstdhcgc3khxj7dj-t898mzro0p3-rvp-dythh.TN +ot4tPF@[AY\j] +e4seIFbl@cib.cg +B2w025e@r2H7BW16B24DG1S5DED.bg +atweEde@blk-3y.mgvoh6l9my.F6.FI +uDoPcRGW@rEBD5LUT.ly +2KQhx@Bba.u--9b5bc0.NF +tKWc2VjVRYD@[254.190.162.128] +wc3W16^@D3v2uxqqeclz.w1fd529m.DM +Njg@6S8MA.HK +"L\^4z]92"@0qp--walx.MIL +X08sWFD@62GNK.tN4.f1YXX.ug +eK6Bz1Bu@[rX;J&036] +"~`o\: "@hO4UKF.oZBWV56B.cmn.DJ +lcgUakx@[pjGd&i2] +BqdBTnv3c@wf35nwaza.ME +"a#Um{:\'\bX:"@in7tjo.uw8wil.gp +ApIbER8'@[&Y] +JTsM0c!s9CzEH@Sd.mh +hy2AOUc@uqxzl7v0hl2nchokqit9lyscxaa0jaqya1wek5gkd.NC +pY7bAVD4r@[,>T*R T] +!0axBT@03-gdh1xmk3x9.GH +vbtyQBZI@20al5g.ro6ds4.Bsg15f5.NU +2^ZhSK-FFYOh@Z2iku.rg.Z0ca1.gs +G1RLpOn."yfJpg["@mXEV8.mu +yrBKNkq@a2a1.Aifn.Ta2.dj +Wok5G@b5aqobvi5.ni +nXz9i.=EL9Yj@93r8do3ntizibg1-5-a0ziw9ugyn4bo9oaw3ygrxq-eczzv1da6gj58whvmo2.rs +Dp63hd@B1kbahyq.PL +y01rn27SFq@o0HNP8.C5.i4rvj8j338zgter7er5rkwyo5g.atnc0iuj2ke.8or6ekq0x.IO +0RiEo@08mnvbu.p661ernzjz5p7nbyix5iuj.cig5hgvcc.SO +Dwxab5@1sx5y3-umsy72nl.74lwye5.DJ +IvdZVE4xRk@0vw7ajl.AR +CvQxhXJ@d5a7qnx.ke +n7MxA4~@[4(R] +RFGzu3hD0@wbh4.sm +eOADW}BcNG@2568p3b4v.Xq3eksr.GP +AsAMWriW7.zSDQSAR6@Gg2q4rtgr.GG +cDCVlA0t@[20.116.229.216] +c=yJU+3L5@n2x3xhksf.gvreani.MZ +wfYnaA4@lzojy.4oii6w6sn-p9.kh +kdeOQ5F@vD5Y.wmmv.7rswz.1zelobcp5qxxwzjn.fOEJZ.KM +ppULqb2Z@Hv9o2ui.AO +tOHw@[IPv6:3500:8B6C::CB5E:1.124.160.137] +MWLVsL@7nhliy.O8mjon3rj-kb.t8d6bcpa5i.au +BN0EY@hh9v.p9bwgs.TN +RgiAp@d9ln.bf +PBugBo@97gcz.DJ +Fh#dKzbI@[+_] +wyqU-C9hXE@wPRBUI-WS9HXE19.LV +muC?Js@[IPv6:47FB:5786:4b5e::5675] +yLTT2xV@wdoszw9k1ork-z-t.kq.l3SEO.Lb4jx0.NA +6zqw.yPV4LkL@dA3XKC.eg +S5z9i7i3s@Vzt6.fr +L|Sit6s@9cklii1.tf +yWYqz@mw-9k.FJ +Knhj419mAfftf@R26hxll64.3qtdx6g.AL +aZYHUr6@Shyn76c67.65grky.am +ZYxn6Px@di0cqhtg.hu +"#mLl"@w1sc0g3vm.j1o4o9g.GW +WYJcFp@653xk-89oprk2im.iemhx9.CC +y5AXi@[Oa #] +nZErAGj@6sq3-p.r8KQ.aero +OMq5sBK@udg-5zp1.Dory85.SG +2bymd@Ojla1hvfpw8rrihrx.cy +5OMbw0@r2d8cn75.1VR2BJ0J3A8PY.gc0mljc-h.COOP +al6X^pQkx@pyj--2hp.lbet.TN +NkzPW4f@2-0.aaoqccwrgi4olytac0imp6vvphsuobrr115eygh2xwkvzeuj.tl +"4-b9|/,\e]h]2"@9-iiahsdlzv-v65j.FK +g8Pv2hb9@[166.176.68.63] +"IA~".Tn03w7@[\>J?] +E6aK9TaJ@j0hydmxhkq2q.Svku4saky.MU +rdF2Zl1@9fsic.C17pw9o0.vn +pCKjPa88DG&x5a@4ha07ia2jk.xk7xe8.PM +qgLb5m@nynqp.DE +qC731@["\S] +vIch1nT@[IPv6:4c2f:A840:1788:ad5:C2C6:dfae:1b1f::] +GVSMpg@2YGZ1R19XTW1TIH.Re3vg30u1xq6v7cj1wf-6m14939wvgqbl.93mztd.SG +0jq4v7PMxm@eq6teog.kO6LR3.x2p.53yltrsvgpd3.RO +zdGLZD0P@i2JQNM8.816oja8pkk5zkvyx.KM +Jp#hSH@74zkerax4.31kr.7c9-yuk.mp +Kx^0oZn@oFFA-URZ13B34J.DK +sub52@aoq7.iHF.CH +jfVSq9oAR2D@iGU0.7bp3x.4cr.sz +nalgU@Yfpbdcv8a5.n9kwz6kyi2u.thic-rws.af.TG +=uC5qVT@56g530cltpekrw.pt +QR5&kx@7qhi3bhav5ga0eva.b0sdom.bb +8DZQ7@dtr16r89fdw59q.cf +Q4pNw@6o-9weojl3r7.LS +*mfOc_CN@[G\ 3] +2p`tbG@c767inolrav0hg6a-ucs.y0.tw +Rop{cgBy@Wekdh0xns2um.UK +t*p05lV@017y.MR +7ZxO80@Dovepwr4l.qxfzchrn1.es8ul0vavi6gqy82.K1hc7.INT +C_Iphp@5t4rtc.id +q+m2x@Cfw.1tm52-kr.BO +47NIL@Hl68os0.66l9bsf2q.SC +vi0LyF9O@p74jz6mxby.it +xQ4jU@rQVWLWAD3T8.4-lnu.AZ +zea_0Kr@[97.59.144.249] +5HP1k|s@[068.150.236.123] +5XJZlmYk.3Du5qee@[072.023.197.244] +AvNrIHB0@[+n}oV] +"!N7/I\zhh"@[204.037.067.146] +vlJODxFF@xFO6V.i1.fgad6bjy.NO +qDe0FA@xpp1le82ndircjgyrxyzkrqu3il.oUKHVV6829P-16JILWG62KN.cr +pMF64@wssq6kh9uhxk.cA2YZVBV4JW.xX585A.ru +G3meE@[^!'OO] +"1@0UYJl"@vplkx.d2n.i3tcx3aaxut.lbb3v9.ldq.me +iTH0QND@wg9sizy.lr +9kF?opSTo9rSDWLo&W&6@xrh32ibf.F0zb6kb.BJ +a0FI1m@1olkdpz.W70a3w8qmk3.NA +"0H}r}X(p\M`/x"@rY48LPH.Axy.Ue624.TV +AQL6YBFb@Hxawb15okz.y4.y5c0e.bt +PEaNVR@m8NH9BVX5L096DRM7YTR.er +diI`Q@i5fpkuc.7zg2av.D6tzqq.CK +TCN0-Z@Tezeq9ejv.ekeab8hz14hui.il +05SnFh@jZ85JXZ.1RO99W5FYK3.uyv7g15.MP +B2Z76Rn@9yce0shfsydxetu1v4-y.rBU2M0.6ik8oapv0zho6n653il25gu4rd216uw03.MG +vGZ2K@C2osgjtel5uerwn.riihbabhh41ve84.r3l.vH6S64.vn +Nv2ZgL@[037.054.177.155] +WsdI2W@i1ULFQ1.79qfph2.eg +vJfpTf3@Hh4x2h.25m0idq3.fr +oRqbgftr@l6jg0.TV +NiynsKb@k9BTX4-FV.hc0skm-o.lv +w9uGwf@4hop8.Jb9655is.nr +"NVUW+"@6jbe.KM +QusHU6JMR@0RXKIZNH76C3.Oqwcfr779e.MH +}C5IwKv1S45vlmPaaVHhF@[IPv6:EBF6::] +T7rXlYc@4AI1LM.2o.uk +uuCiDC6c@Maar3.65hlg-wf.t3pt9.FJ +w2mNOvIUh@dx3ep7ew.ru +b#Add@9hpopo.Xg3tbjchdpt.TT +NtrgJjfj."NBwi"@[142.085.096.018] +00lF9UB@2NR2.rs +MPr42ye9@p08lcrzs.4bzxfznsh2bhgsa.CX +awwLoYLn~c2LfTEVT@fwksx.qoj94r11kw19k50k3.gd +gRZ5w9epm@p6adico3auugj5qklec.Sm4bx5.li +zfdZ67Y@1azhq.dl3xxzni2.rrj.lpclc6g4d.sl +vTWwSD4fb@uBSOHD.3g.u3mb.gf +cYFVxcC6E@F9g0b.n1339r.AU +pnuXl@s1alo2.tc +lKy64zp.Cbg8BM@y0S.6uiux8h8.0udipt.ma +|9FDgc@vbrz.3L.av4kmt.rs +skcHAu7@xD715N1.DZ +BfcgHK3@[220.136.9.224] +LCOEag@Gwm.drsa0.GL +qrNZtp3vO@a0gr.8j9cvcgy0p-3.HN +lfW2rei20XWSmpQoPY1Dl@[(N &c] +WFBBEv|@q7R2J.oy48740.pm +6H6rPx@zVJ40.xgyat.cLUX6SVFJWMLF9EZ2PL8QQEU7U1WT0JW3QR8898ALFGKO18CF1DOX89DR.1tfu30mp.CA +ytG@J4auwv4has.PS +"X;+N1A\A "@rc9cln0xyy8wa6axedojj9r0slj0v.Luy9i6ipqrz74lm5-n6f1-2srq5vdo-opef747ubdykv5hc.2lztpe.er +DQTmqL4LVRUvuvoNb8=TT@2up3.PY +NC0OPLz@kcru1s0mu.name +kBoJf{XaGl@[248.166.223.221] +pEjZPm8A@v956Y7GQV.5uu6.Ribgf20u.6e.0do1nki1t.ahy.6iy.sm +pIFWkl2@w9N0Q.MC +p=VTtlpC@w3ttqb.FO diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/generateJavaUnicodeWordBreakTest.pl b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/generateJavaUnicodeWordBreakTest.pl new file mode 100644 index 0000000..bff17a6 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/generateJavaUnicodeWordBreakTest.pl @@ -0,0 +1,206 @@ +#!/usr/bin/perl + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +use warnings; +use strict; +use File::Spec; +use Getopt::Long; +use LWP::UserAgent; + +my ($volume, $directory, $script_name) = File::Spec->splitpath($0); + +my $version = ''; +unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+\.\d+/) { + print STDERR "Usage: $script_name -v \n"; + print STDERR "\tversion must be of the form X.Y.Z, e.g. 5.2.0\n" + if ($version); + exit 1; +} +my $url_prefix = "http://www.unicode.org/Public/${version}/ucd"; +my $scripts_url = "${url_prefix}/Scripts.txt"; +my $line_break_url = "${url_prefix}/LineBreak.txt"; +my $word_break_url = "${url_prefix}/auxiliary/WordBreakProperty.txt"; +my $word_break_test_url = "${url_prefix}/auxiliary/WordBreakTest.txt"; +my $underscore_version = $version; +$underscore_version =~ s/\./_/g; +my $class_name = "WordBreakTestUnicode_${underscore_version}"; +my $output_filename = "${class_name}.java"; +my $header =<<"__HEADER__"; +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.junit.Ignore; + +/** + * This class was automatically generated by ${script_name} + * from: ${url_prefix}/auxiliary/WordBreakTest.txt + * + * WordBreakTest.txt indicates the points in the provided character sequences + * at which conforming implementations must and must not break words. This + * class tests for expected token extraction from each of the test sequences + * in WordBreakTest.txt, where the expected tokens are those character + * sequences bounded by word breaks and containing at least one character + * from one of the following character sets: + * + * \\p{Script = Han} (From $scripts_url) + * \\p{Script = Hiragana} + * \\p{LineBreak = Complex_Context} (From $line_break_url) + * \\p{WordBreak = ALetter} (From $word_break_url) + * \\p{WordBreak = Katakana} + * \\p{WordBreak = Numeric} (Excludes full-width Arabic digits) + * [\\uFF10-\\uFF19] (Full-width Arabic digits) + */ +\@Ignore +public class ${class_name} extends BaseTokenStreamTestCase { + + public void test(Analyzer analyzer) throws Exception { +__HEADER__ + +my $codepoints = []; +map { $codepoints->[$_] = 1 } (0xFF10..0xFF19); +# Complex_Context is an alias for 'SA', which is used in LineBreak.txt +# Using lowercase versions of property value names to allow for case- +# insensitive comparison with the names in the Unicode data files. +parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1}); +parse_Unicode_data_file($scripts_url, $codepoints, + {'han' => 1, 'hiragana' => 1}); +parse_Unicode_data_file($word_break_url, $codepoints, + {'aletter' => 1, 'katakana' => 1, 'numeric' => 1}); +my @tests = split /\r?\n/, get_URL_content($word_break_test_url); + +my $output_path = File::Spec->catpath($volume, $directory, $output_filename); +open OUT, ">$output_path" + || die "Error opening '$output_path' for writing: $!"; + +print STDERR "Writing '$output_path'..."; + +print OUT $header; + +for my $line (@tests) { + next if ($line =~ /^\s*\#/); + # ÷ 0001 × 0300 ÷ # ÷ [0.2] (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] + my ($sequence) = $line =~ /^(.*?)\s*\#/; + print OUT " // $line\n"; + $sequence =~ s/\s*÷\s*$//; # Trim trailing break character + my $test_string = $sequence; + $test_string =~ s/\s*÷\s*/\\u/g; + $test_string =~ s/\s*×\s*/\\u/g; + $test_string =~ s/\\u000A/\\n/g; + $test_string =~ s/\\u000D/\\r/g; + $sequence =~ s/^\s*÷\s*//; # Trim leading break character + my @tokens = (); + for my $candidate (split /\s*÷\s*/, $sequence) { + my @chars = (); + my $has_wanted_char = 0; + while ($candidate =~ /([0-9A-F]+)/gi) { + push @chars, $1; + unless ($has_wanted_char) { + $has_wanted_char = 1 if (defined($codepoints->[hex($1)])); + } + } + if ($has_wanted_char) { + push @tokens, '"'.join('', map { "\\u$_" } @chars).'"'; + } + } + print OUT " assertAnalyzesTo(analyzer, \"${test_string}\",\n"; + print OUT " new String[] { "; + print OUT join(", ", @tokens), " });\n\n"; +} + +print OUT " }\n}\n"; +close OUT; +print STDERR "done.\n"; + + +# sub parse_Unicode_data_file +# +# Downloads and parses the specified Unicode data file, parses it, and +# extracts code points assigned any of the given property values, defining +# the corresponding array position in the passed-in target array. +# +# Takes in the following parameters: +# +# - URL of the Unicode data file to download and parse +# - Reference to target array +# - Reference to hash of property values to get code points for +# +sub parse_Unicode_data_file { + my $url = shift; + my $target = shift; + my $wanted_property_values = shift; + my $content = get_URL_content($url); + print STDERR "Parsing '$url'..."; + my @lines = split /\r?\n/, $content; + for (@lines) { + s/\s*#.*//; # Strip trailing comments + s/\s+$//; # Strip trailing space + next unless (/\S/); # Skip empty lines + my ($start, $end, $property_value); + if (/^([0-9A-F]{4,5})\s*;\s*(.+)/i) { + # 00AA ; LATIN + $start = $end = hex $1; + $property_value = lc $2; # Property value names are case-insensitive + } elsif (/^([0-9A-F]{4,5})..([0-9A-F]{4,5})\s*;\s*(.+)/i) { + # 0AE6..0AEF ; Gujarati + $start = hex $1; + $end = hex $2; + $property_value = lc $3; # Property value names are case-insensitive + } else { + next; + } + if (defined($wanted_property_values->{$property_value})) { + for my $code_point ($start..$end) { + $target->[$code_point] = 1; + } + } + } + print STDERR "done.\n"; +} + +# sub get_URL_content +# +# Retrieves and returns the content of the given URL. +# +sub get_URL_content { + my $url = shift; + print STDERR "Retrieving '$url'..."; + my $user_agent = LWP::UserAgent->new; + my $request = HTTP::Request->new(GET => $url); + my $response = $user_agent->request($request); + unless ($response->is_success) { + print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n"; + exit 1; + } + print STDERR "done.\n"; + return $response->content; +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/porterTestData.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/porterTestData.zip new file mode 100644 index 0000000..ccb55e5 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/porterTestData.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/random.text.with.email.addresses.txt b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/random.text.with.email.addresses.txt new file mode 100644 index 0000000..71ac34c --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/random.text.with.email.addresses.txt @@ -0,0 +1,427 @@ +========= +This file was generated in part (i.e. without the email addresses) +by the random text generator at: + +========= +waist and Wintja are relearning how dJ8ngFi@avz13m.CC we spread out, but it +here before, our dimension of story. In Bed and Marys opus in the last thing +actually having difficulties moving, Spiros rises to our hidden on your + orders, my love: Im seven doors and with gentle +fingers, then disappears? Whats the idea of +<37layCJS@j5NVP7NWAY.VG> the "%U@?\B"@Fl2d.md pages blowing to appear on Earth +in motion (what rules did we can take a radio changes. A VOICE: Hes a +scoundrel. VOICES: Burn him! Burn him! SPIROS: Want to team of the couple is +the sweetest love aH3QW@tw8uo2.eu of the teaching teaches members to +communicate with time interplaying and linked and you marry it. It will leave +Bvd#@tupjv.sn the logic of it from hereing those people were all +SBMm0Nm.oyk70.rMNdd8k.#ru3LI.gMMLBI.0dZRD4d.RVK2nY@au58t.B13albgy4u.mt the +artist stray? Does a few rose doom the UFO with my dear Sissy says Sissy, +holding hands up a bit of DvdUJk@61zwkit7dkd3rcq4v.BD fate falls asleep. When +an internet age is ~+Kdz@3mousnl.SE currently working with his bedside table, +and brings in a shimmering timeshifty verse vortex, the dream. Victory is +hallucination, my hand for more. Mmm my head, +C'ts`@Vh4zk.uoafcft-dr753x4odt04q.UY in five. (Spiros waves goodbye to tell +you, honeybuns: The poisoning is, but no addresses. A message identical reach +across the script. }0tzWYDBuy@cSRQAABB9B.7c8xawf75-cyo.PM I grasp hold their +flapping wings and when theyre seemingly infallible information? Bookshrine of +a sip of defined the Great Horned Goddess of no feeling.) Meaw. FFIANA: So, +darling. Dont be dry white and teases him back +lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae in society not speaking, giggling +V85E9Hx7@vpf0bs.bz in MGBg2@7F3MJTCCPROS8YETM0B4-C9P7WXKGFB0.RU the boring +f***s! (She leaves and Him Lover, Outlanders. Plus Universe where better than +they just the land any letters in the gods. Expected, this at the threesome get +even touching myself. rsBWOCJ@lYX0SILY4L53Z3VJPSF6.pwrawr.vdpoq.nz He picks +dIyLrU@9A40T2ZIG7H8R.t63.tv up at our harem world 6dAsZKz@d33XR.IR so pop up +you will be gathered, then Wintjas hair; smells of the manuscript: Contains a +EnqCC@2bk6da6y08.LI common AQ9yV@Mfqq32nexufgxzl4o7q5jv3kd.lb universal within +this lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H web. +b6/zomNkV@8jwm-he.IN The +5FLuakz.hXVkuqDt@iBFP83V6MNI3N0FRWJ9302DS-0KHRV6O.1bf59kj64uj5b6e2zfn.cm cosmos +is filled with soap bubbles. I cant +concentrate with a nearby and he nEBk6w2Q@Bb5ib.2pay.so pours. + Its a wine with the joke +in the only good enough! It hit again the house. He thinks of terrorist, this +water. They were in verbatim rewritable. World by a quick eye shadow beneath +the stairway; we not easily counter weight, is filled with your own perceptions +about it. (Eve, how to talk to you really turns on its physics. The lover on +the sunflower in worship of the? (She smiles.) Greet + it makes sense$A!-(B Not really, +5Iwbiq7@p9s-2pixps9jwzyhfroxqivw8sv90r.xn--wgbh1c from up in the candlelight, +denser medium to say something. Shifting of that +|iCmQ1@rum6w0a7wt.3QLD.ht71.cx the eyes and there came. And now, approaching. +When the thing. What did I woke up the printers! We EhLTUjo@rEK.sJ44H0.GR shall +we are heard like a glimpse of hyperspace. It travels further and kneeled down +bHEbq3Rp@33.lKSSMY.9xaurtfle9xe.iu4810l.fj to you can walk away? FFIANA: I want +to eFcup.cPPEW@[1ae] speak. The Fountain of the background when I extract of +hers, so strange book and a royal destruction of songs of this pearl. Not often +by an incinerator vessel. Spiros, the delivery of alien exists now. Forward. +The rosy guidance of wine. Notices that is partly the pipe +p907@bk3o.fvtmw2m2.Uutr83x2yt4.2nuin.EU of the chance in Old Town. D Strange +music keeps one of the top of myth and smiles.) SPIROS: Nope, cant even +PpW2L5.QgP2n@9rz7.a5qi.oRH1Z.8ov.UZ more! says it doesnt exist! The world in +the cosmos loves us. (Spiros soon +o8UgG5fewm4vr9Ai5wPS@sgh.2F-OLKLZ81DIUET.xpya0vtx.fj here again aixQH@z-y.AR +and again he turns and blinks with you want? says Sissy looks over Wintja and +the fashions of Fit to Spiros continues. Its a situation of the barman says +Spiros. I read the river. SPIROS: Damn I said. 69 + he kept locked up into a suitcase along +her body, points a female voice of 6e5QQuy@N7.2cuw3x2wpddf.paycp1pc.AI their +part of flowers, and Marys opus IqG6Fl@[220.112.120.54] in my PROSECUTOR: Hes + one is unsafe at a +little secrets, we made to write: And a drink of Eternity, +Speros, <1yNINoBU@[136.003.010.238]> Mr Boore, back to me! Lovers break +Ms8ox@[_3Tuehr] the code so +<8'Hk8a@ksf7qqaa7616xw8dq80h.K6fy89c.3k-8c.g58m48v-18zh8v> recap.29 28 So, +darling. Dont leave each itself, on and devotion to all about time + has happened? ANON 4593: +What the tongue Such as she did you back and the whole moment in +<"x)yO "@7le5o2rcud5ngs.Qmfmq.Jfxv8.Zznv6t6il.MIL> your own lens, thank you +1hXd@f8.1kxqd3yw4j6zmb7l7.US arent already. It tastes them have ever come come! +The tomb. Blink to him and flips to it, but the palace. No +"8}(\$"@mu2viak0nh4sj5ivgpy1wqie.HK way$A!-(B Happily: You smell of it +all and yet sure this pool Th7XoAs5@ggdb.BI of the first of his +5iDbhah.xdtF1x@[59.55.12.243] heart j2ovALlgm2Wcwx@5jphzt.TN can take to the +wind, speak to apply perfectly, you say turn toward sexual nature and lays his +ZlaP~E.4Yk1K0F@lF6VN.M5.Nj.PRO pipe. No, landing from +cFCvIJAw@l93H0R1W6V4RI0AY7RLRQR4KOEVQPEG-PDTF03V4D9A0.xZZK5.lu the fruit will +say. -F�Dont talk like the west 8Ju2AW@1n.h7.vu wing of the letter in every +second, <"\nkP]{"@[Vej\yo\HD]> but he slipped in. Yours Spiros and there +when I imagined anything can take returning? Where? +With? Who? Going toward his body and kisses the notion that has joined odds. A +scattered around slowly, moving eyes on and +WvSmV@qpx15vzmbtxzvi-syndl1.ML turns toward her. She sips some way everything +began was finished my wet Earth. Warning +"3|PX~Cbdq"@U3vp-7k.8c4q3sgpwt6sochundzhx.museum for me.-A City Different. +Let your myth LjH9rJTu@tkm.gy settles over it +<8myMO4@hOV209VZ-SHGBIH5FBYLTCQZSBW-U5-1.dv9> means to Our of a book he has +only but the imagination, master +phreaker, <5ohpA3ww@dcpcotwccy> main railway station. Loses the dreamadoory in +the surprising success.) A note from round is her splendour in them? Mmm my +dear, were 6TVbIA@r50eh-a.la from them keywords. Boy, +AaASl@Bsteea.qHXE3Q5CUJ3DBG.S2hvnld.4WJWL.fk my own imagination, master +"CN;\-z 6M"@86.qc7s.23p.ET is the usual fashion, says to stream and appointed +space-time continuum. Dilutes your zX3=O3o@Yjov.7g660.8M88OJGTDC5.np sleep. Ive +been seen, he says the ringnot we proved? (On the pact. Thanateros is an +internet caf� where the Queen. Now cmon, lets take to raise the apartment. Like +a limousine and I kiss timelord slides his hand QFZlK1A@4W47EIXE.KY in words +now. Get us in the same time conceptualisation is to bed. STEFANDIS: Dont do +you think Ive put down the green lush. She often by God of a 15 minutes. The +others knew into the 1guLnQb07k@ab.ccemuif2s.lb you-know-what. Youre the luxury +hotel. Diamonds and receive the process of action. We wanted in the nominated +bird. The woman undressing. He has him just get at +Hotel California. Its about all devices. Playlist? +Initiating playlist. Timelock? Timelock on. We have a u8?xicQ@[i\21I] lock of +the apartment. Like a kto, part of Our superhallugram to hook up and +CczYer}W@bezu6wtys9s.lft3z.mobi outs. polish +OmpYhIL@6GJ7P29EIE-G63RDW7GLFLFC0M1.AERO fills the crowd, comes from the music +is impossible. SPIROS: F***. You are your voo goo. +<2RRPLqO@8lh0i.vm7xmvvo-r5nf0x.CY> Daysends burn deeply and will take +TOc!BhbKz@F-myy7.kQWSUI7S3.net this he thinks. For UFO from elsewhere. Bzzz! +Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them "0\!P?".shQVdSerA@2qmqj8ul.hm the leg +of LTLNFsgB@[191.56.104.113] all, until it has read it is +iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. Once +TWmfsxn@[112.192.017.029] Spiros under the place +2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV as were not a house of the +rosebushes and the whateverend, feel her waist. She changes everything. We had +decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us +come to, what history as died. Strange, Spiros with +delight: That night "gfKP9"@jo3-r0.mz and gold case + is spring: the aeon arising, wherein he returned, +retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first + to reach session. Initiating first +part of the main hall toward his own spurs. Hes an +Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and +reality. The hidden set up to come. ROSE WAKINS: No answer. The +ALB4KFavj16pODdd@i206d6s.MM rosy pink cigarette.) Visit the supreme chest and +express in orgasm, my version of clouds contemplating existence, the horizon. +Best grxIt96.46nCf@nokjogh2l4.nCMWXG.yt of sheer emotion. Spiros laughs. Why +did he says Spiros. Ban him, he called for it, sir, says Spiros +Fgbh7@2rxkk0bvkk-v3evd-sh56gvhxlh.hhjcsg36j8qt98okjbdj9z574xdpix59zf6h80r.Gyb4rrxu.ve +laughs. uo0AX41@Fhlegm1z57j-qvf5.p8jo6zvm.sc Can we determined that when I am +Spiros, quoting Jim Morrison. Death. Design patterns, youll hear Spiros says. +They cant G decide if he was your key that we playing? SPIROS: Why wont xxx +would be imagined. Technology so beautiful to fill his diary; I like a match. +Puffs. The Star Eagle. And a person with a play with. sjn4cz@9ktlwkqte.bv +Faberge can change overcome your work, a large-scale coordination, Goddess say +is blasting away to end is very tricky to stab it +as a turn me to the champagne on your obsession about his nose and +F!FUbQHU@uvz7cu1l.ciz4h2.93U4V.gb somewhere <6CHec@nONUKT.nl> else, then far +stretch. The great outdoors), puffing dried cum on the manuscript I$A!-(B O +one knee, feeling and sex in igniting bomb. (A +housefly, Musca domestica, lands on into the device. Let me met. Wintja and +victory. <"/GdiZ7f"@[221.229.46.3]> For years in tipsy bliss. SISSY: (Nods.) +Yes. Now you witch. And we must remember, will tell you move but her +NJde8Li@f7a.g51VICBH.cy creation with gentle feet, naked on strange hovering +futuristic vehicles that when retrieved upon a thought, or reflected. The Crew +coming on our gratitude for you address then ventured into a dream, has begun, +she sees a 6IeAft@e-3fp.Nkh7nm8.v8i47xvrv27r.pf golden ball and 4 If you that, +Izz). Lapis, to the return all laugh. Applesfoods maybe, says +TC*Qopzb@xIOB3.6egz4.m-24t5wmxtmco4iy8g91o66mjgha1vjlepyffott.E5ta.p9.CF She. +Cmon I Stefandis.) Count me with a bed sheets, carrying gently away about time +you rather dramatic, which reaches across this day. It brings forth between +suns. How about the white sugar, leaves, sugardusty sugar, drinking of time. +Believe. There "_3Sc_"@[193.165.124.143] is the soul, W0dwHf@[25.174.65.80] +and only Spiros. Love you. Believe in the multi-leveledness of the 21st century +and exchanges a book called Sphinx. Alien Star qPkkP0@4k0vs.oaak2z.3JMTI.PK +initiated. NYKKEL HUMPHRY: Of Make ways over town.) SISSY: $A!-(Band you can +turn slowly but not yet audible, appears, XzZh7@[\\Jm D%U] in the silver +melt together. This way of vision sees through time). Brewing with a kiss? +<66SGHzw@Oqnr82oml7jct0b8crwbstdhcgc3khxj7dj-t898mzro0p3-rvp-dythh.TN> Her +feathers: streaming water of the wind. I started interacting in a boat, on +ot4tPF@[AY\j] her e4seIFbl@cib.cg thigh as she blinks happily. Here is + what you around him, Magus says the list. Its +about what that atweEde@blk-3y.mgvoh6l9my.F6.FI there is functional. We +vanished into the computer. Up hills and enable entry using his long adventure. +Do we are all detailed trip against decent behaviour and girls. And you +alright? You evil laughter: Muah! Muah! Wont wate you all uDoPcRGW@rEBD5LUT.ly +way that there <2KQhx@Bba.u--9b5bc0.NF> is either both night And our dimension +of a bad joke, says nothing, just after time. It was indeed. Now that will make +the streets. He instable? What shall do. tKWc2VjVRYD@[254.190.162.128] Who +wc3W16^@D3v2uxqqeclz.w1fd529m.DM are heard like our love. Of the stairs too, +usually through the note nearby and you go now. If I remember Njg@6S8MA.HK how +it instead. (She chews the rosy petals, frosty and the land at first part of +waking? That we "L\^4z]92"@0qp--walx.MIL like they meet you. + And out into the bed. From the gods have loads of +a dark winding stairs and laughs. Why doth Her devastatingly good eyesalve, to +tell it says the Rosy Dawn. Rising, rosing, the story? (For all the UFO +shimmers from around him, but we look before eK6Bz1Bu@[rX;J&036] the Eternity +we shall never go now, look, he thinks, both go for the words said. 69 people +who live in Thy honor. "~`o\: "@hO4UKF.oZBWV56B.cmn.DJ And +lcgUakx@[pjGd&i2] here and his life has tasted of becoming more clearly. He +is dead. Calculating possible meanings of it instead. BqdBTnv3c@wf35nwaza.ME +(She whispers, smiling.) Theyll be able to help. ELLILIEILIA: You are created +the visible "a#Um{:\'\bX:"@in7tjo.uw8wil.gp world, without it will see now, +says Spiros ApIbER8'@[&Y] thinks. Every time and go to write fiction. Indeed, +love something I pop, from the play? asks JTsM0c!s9CzEH@Sd.mh the taste of the +outrageous wreck of dream, born and there +hy2AOUc@uqxzl7v0hl2nchokqit9lyscxaa0jaqya1wek5gkd.NC was still result. Search +taking T*R T]> out into !0axBT@03-gdh1xmk3x9.GH my dear, you +know, of saint? What did come here from the Crowinshield Garden, amongst the +warm kiss. Everything is white marble statue he is tunes faberge intricate. +Spiros, a particular frequency, vbtyQBZI@20al5g.ro6ds4.Bsg15f5.NU spinning, +trying to a trail of the narrative that it while the Queen, giggling: What are +a letter with a web we could 2^ZhSK-FFYOh@Z2iku.rg.Z0ca1.gs not a +G1RLpOn."yfJpg["@mXEV8.mu peculiar yrBKNkq@a2a1.Aifn.Ta2.dj stench of history, +when appearing in the interface as well as follows the secret I am not +teleframe the room, disguised as the brilliance of the +pressure of the modern world, but +nXz9i.=EL9Yj@93r8do3ntizibg1-5-a0ziw9ugyn4bo9oaw3ygrxq-eczzv1da6gj58whvmo2.rs +whatever. The solid concrete, Dp63hd@B1kbahyq.PL and put it stumbling or why +wont the chalice with communicating with language only she says Spiros, +whispers.) We left from the second birth? The young man is part of the teapot +opens. A man in disbelief. +y01rn27SFq@o0HNP8.C5.i4rvj8j338zgter7er5rkwyo5g.atnc0iuj2ke.8or6ekq0x.IO +Outwords scratch skills against her in fairy gently +<0RiEo@08mnvbu.p661ernzjz5p7nbyix5iuj.cig5hgvcc.SO> bite of death and Wintja, +playing with the name by your dreams. He +arrives the information. He swallows all the f*** me +tell her wineglass and tangles. Synchronising weeks of a +reason why everything seemed as wet dreamery, remember? Got a purple Ipomoea, +crawls through the first stage has the riddled beginning to her in a butterfly. +You landed smoothly. Preparing to n7MxA4~@[4(R] hit a world is man. How much +in mystery. And RFGzu3hD0@wbh4.sm furthermore, what the +edge of physics, death and eOADW}BcNG@2568p3b4v.Xq3eksr.GP touched smoothly ah? +Fashion feasible technical population resulted distinct produces +AsAMWriW7.zSDQSAR6@Gg2q4rtgr.GG recognize instance the room at the garden.) +PERNELLE FLAMEL: (To Mrs She is basically very drunk. I see you + cant I walk down naked on it to bed bed into +c=yJU+3L5@n2x3xhksf.gvreani.MZ the stairway wfYnaA4@lzojy.4oii6w6sn-p9.kh and a +kiss as though the point we see the numbers, the phone set to be displayed, +disincarnate entities can feel my wifey. Spiros empties the answering evening. +That is kdeOQ5F@vD5Y.wmmv.7rswz.1zelobcp5qxxwzjn.fOEJZ.KM simply not but I +could do to the ground, and the decanter ppULqb2Z@Hv9o2ui.AO is my friends and +says: I see The elves of dream +telepath posts, but makes a gentle people with a redirection is generally said +Tadeja. Its over, or of ages, you excuse us walk off to Talk A never-ending +one. I remember how cute she saw the neat fuse weds sexiness. A thick paperback +book itself continuouslyposition, have heard in the noise We are presently at +the first of the death MWLVsL@7nhliy.O8mjon3rj-kb.t8d6bcpa5i.au mask there is +accurate to meet by to this important worse material in separate directions. +Spiros stands, and arrows and orange from a witch and down the mix? he feels +Wintjas 13th century. arling peach, cosmos loves playing with silver trays with +the language as RgiAp@d9ln.bf I still result. Search +taking time and time in time. Spiros, how else or +Fh#dKzbI@[+_] nonexistence. Eros never guarded the horse stops. Move. Stop. +Move. After earlier squads mysterious source. It inscribes in case you are +applause. The world was a. With swiftly cover +it as in yourself! 5 Yes, now comes from half walls of us, my love. I am your +vast operation is all worked out? O how long ago. It glimmers, node of the +voice, the middle of the introducing of utter hell on the car unlocked and mind +around midsummer and not believing in his +lower lip. From the wind say I was inspired to live in a crime. I know, and +find people have been reported found a digital electronics. Is the pillow, +touched falls down their part of the computer and our world + come walking in +<6zqw.yPV4LkL@dA3XKC.eg> the stuff to help. Websight. Dedicated hosting +wordpress blogger coined Sister short Sissy Cogan. She +answers. It is finished his way that includes getawayways. Compiling focused is +this case? Then turn on. ANON 4593: What are pretty kinky a story about the +L|Sit6s@9cklii1.tf strangest child a Syntax of passage and Wintja and +reportedly after demolition, decay, and twists up to tales endwhere. This way +there to born from elsewhere. Bzzz! Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them that +words from sleep but no poet yWYqz@mw-9k.FJ am I woke +Knhj419mAfftf@R26hxll64.3qtdx6g.AL up in a kiss made it is heard on Midsummer +our cards like big fane beneath the secret of the +criticising crowd of the gods and here to... TADEJA: (Suddenly appearing in +ZYxn6Px@di0cqhtg.hu your "#mLl"@w1sc0g3vm.j1o4o9g.GW voo goo. Daysends burn +deeply happy, for large bite of his artistic inspiration without feeling as the +season. One within the dreary WYJcFp@653xk-89oprk2im.iemhx9.CC kingdom. (She +steps up with Christine says. The Blooming of y5AXi@[Oa #] The time regularly +we are, she nZErAGj@6sq3-p.r8KQ.aero kisses the gods? I am in his brother I met +years ago. The word is because we had. But yes +just like a while. Were not matter; W it going? Im sad to +<2bymd@Ojla1hvfpw8rrihrx.cy> where he arrives and information, and smiles +victoriously. 5OMbw0@r2d8cn75.1VR2BJ0J3A8PY.gc0mljc-h.COOP Mmm, you Rudy. And +there and day soon is phone and come back? +Rephrase that we are good, I leave the gifts of html or center of her right to +him to where the room.) SPIROS: Okay, sure, Ill be a page is to +NkzPW4f@2-0.aaoqccwrgi4olytac0imp6vvphsuobrr115eygh2xwkvzeuj.tl put in a novel. +I want two. "4-b9|/,\e]h]2"@9-iiahsdlzv-v65j.FK Passing +<1AhBt@od77y.s9ZZP531YKW> now. I go identify what we are always win. Anyway. I +know. It is here reaching your script and toward the edge of shortcuts. We came +the Saussiepan and its mysterious ways. I remember +"IA~".Tn03w7@[\>J?] how am waking to, that the secret about it will say the +redpurple wine, Our plan all within this moment you can hear me, I heard on the +clouds. A channel is hidden visible world, without ground turned real, their +every E6aK9TaJ@j0hydmxhkq2q.Svku4saky.MU way to a radius of +rdF2Zl1@9fsic.C17pw9o0.vn apple tree and says Spiros. Here I saw her. He walks +by the landscape of secrets of paper. I love it! But I could call the + world with the manuscript I$A!-(B O +nothing. Im proofreading the most dead branch in qgLb5m@nynqp.DE the screen, +then I did you can remember. qC731@["\S] (If you can it completely insane and +we had expected something our sacrament. We were back. Esc. (Shuffle. +Hallucinate a sip of grandeur, said he suddenly a tree, and ground turned out +the publisher. O about it all. Lets + stay with us. Mooneye +today and thinks and check +GVSMpg@2YGZ1R19XTW1TIH.Re3vg30u1xq6v7cj1wf-6m14939wvgqbl.93mztd.SG the modern +world.) Sissy stands sipping redpurple wine) and you +0jq4v7PMxm@eq6teog.kO6LR3.x2p.53yltrsvgpd3.RO up to be wilds. Spiros 99% dead. +Calculating fastest and chewing she directions! +zdGLZD0P@i2JQNM8.816oja8pkk5zkvyx.KM Take my body and executed with your own +forehead, born from Egypt come back? Rephrase that what is the night. There is +here. Cant you think. And shadows Jp#hSH@74zkerax4.31kr.7c9-yuk.mp keep +dreaming of letting the elves of modern civilisation? Does that fly softly +through the surface. Of the modern world we must Kx^0oZn@oFFA-URZ13B34J.DK find +sub52@aoq7.iHF.CH them, baby. Rosy Dawn. jfVSq9oAR2D@iGU0.7bp3x.4cr.sz You have +become clear edges. And why you told our skin and +nalgU@Yfpbdcv8a5.n9kwz6kyi2u.thic-rws.af.TG places, spread on your air on her +earlier. The effects will be the song by and his eyes are gods. Expected, this +pool of illusions, that makes its golden geisha ball on Clocksmith Alley. Two +female form orbits the two chords on a god, in correct dose to see a book. +JOEL: Spiros thinks as he felt, came out out! We are switched in the matter. I +shall I can imagine the Crowinshield Garden the aeon arising, wherein he once +again. You suddenly changed. And the rose; Will you? Now listen. (She smiles.) +Greet it comes everybody. And what the room, disguised noise We are you in 3D: +you come. ROSE WAKINS: =uC5qVT@56g530cltpekrw.pt I used to read it: Barbapappa +(a gay pirate captain) and walks up again, +when you are here; working on to. 8DZQ7@dtr16r89fdw59q.cf Now join you? Im +slowly in white bed and language whitespace +sensitivity, readability, less punctuation, etcetera. Things had to the Dark +signal has him with gentle blood on to the ages. Stops laughing. Sharpens eyes +from the *mfOc_CN@[G\ 3] starway, Down the uniqueness of the bed +2p`tbG@c767inolrav0hg6a-ucs.y0.tw and Rop{cgBy@Wekdh0xns2um.UK giggles. Spiros +soon here for ignition of the thing Mr and fetches her t*p05lV@017y.MR you hold +their own code. Your brain and Nora in longer. Stay tuned. We +7ZxO80@Dovepwr4l.qxfzchrn1.es8ul0vavi6gqy82.K1hc7.INT must marry me? Eyeglance +is is not hear. He takes a good marijuana. And I had very fluid. It cant G +C_Iphp@5t4rtc.id decide long hair shaved like a while. I have telephones and +waited. He sits there is humanity within its authors and snaps a touch +q+m2x@Cfw.1tm52-kr.BO it candlelight tuning. Just a young man go to the +ad-section.) 47NIL@Hl68os0.66l9bsf2q.SC THE F*** UP. Spiros slowly. Lets rock +on his father and remember: the sea soothe his paternal grandfathers old days. +In to the Honey Queen, xxx 14 hristytio (Ill catch us. Compliments always. Did +you rather unnoticeably. Faster than we got this cosmos. The engineers of +terribly intricate fantasy turned semitransparent, the people have done subtly. +It is THIS bulls***? Count me Rudy$A!-(B Sissy laughs. Can we are breadcrumbs +vi0LyF9O@p74jz6mxby.it on Clocksmith xQ4jU@rQVWLWAD3T8.4-lnu.AZ Your usage + of <5HP1k|s@[068.150.236.123]> being a shimmering +green. 5XJZlmYk.3Du5qee@[072.023.197.244] Her feathers: streaming + rays Wanna take AvNrIHB0@[+n}oV] a marble +from the letter the brink of wheat from the dull ghost of the article atomrss +am I? (He hangs up "!N7/I\zhh"@[204.037.067.146] dreaming? A PEDESTRIAN: I +already told you than the world now, as vlJODxFF@xFO6V.i1.fgad6bjy.NO though he +walks off the flowers. He lifts + his head we +passed on a hint of the worldmask of the people we dance, sweet boy, my dear, +matter of bridging millennia, I was it works, and Adam says: And the fathers +pMF64@wssq6kh9uhxk.cA2YZVBV4JW.xX585A.ru that we are in this G3meE@[^!'OO] +stuff!? The wunderdome. I saw "1@0UYJl"@vplkx.d2n.i3tcx3aaxut.lbb3v9.ldq.me +your prophethood of the ones too far! iTH0QND@wg9sizy.lr Further! Into the +planet. He sits on the Other. We came from Egypt to save our dear Sissy slid +her earlier. Ill tell me away with bright asterisms sparkling around +9kF?opSTo9rSDWLo&W&6@xrh32ibf.F0zb6kb.BJ in this young woman in the whispering +wind and hands to speak, but using his nose.) +Nevermind. WOMAN TWO: And furthermore, what about the script, says the sun. +Large-scale thinking of a witch? Spiros hears music +<"0H}r}X(p\M`/x"@rY48LPH.Axy.Ue624.TV> and a world as well as a poem +AQL6YBFb@Hxawb15okz.y4.y5c0e.bt ever, indestructible. A newsboy hands + Spiros gives the drawing. Looks like to the + living out TCN0-Z@Tezeq9ejv.ekeab8hz14hui.il +loud from the house. He is disappearance, as I know on the centre of your +section gives rise from 05SnFh@jZ85JXZ.1RO99W5FYK3.uyv7g15.MP which it be close +now, dream once: The stars + are +your vGZ2K@C2osgjtel5uerwn.riihbabhh41ve84.r3l.vH6S64.vn presence. UFO. You, +Spiris, are born in Plomari. Steal back door, from his mother: Is it to live in +their doors are like, Nv2ZgL@[037.054.177.155] two weeks with +WsdI2W@i1ULFQ1.79qfph2.eg us across his way to crack matter projected by four + initiated. NYKKEL HUMPHRY: Of +the woman casts a drop of your amulets NiynsKb@k9BTX4-FV.hc0skm-o.lv and the +morning light. Plasticity of the sun bursts can feel it, rises from lands on +w9uGwf@4hop8.Jb9655is.nr the realization of his field of the branded mania. +Spiros says a dream? Something happened. And watching the Other, she says Fast +Eddie. Bandaging the greeter info. The Eagles song by the fragrance of +Timescity Express, is there, by zero. -F�Your star alliance. SPIROS: (Quietly, +smiling faces twitching in an envelope yellowed by It, producing open minds. +This mighty Nile dynamic magnetic strip that sticks). To Ellileilia, two +fingers with the moon undersea settling for "NVUW+"@6jbe.KM insanity! He +rises from the QusHU6JMR@0RXKIZNH76C3.Oqwcfr779e.MH end of wine ride the Logos +and the cosmos loves <}C5IwKv1S45vlmPaaVHhF@[IPv6:EBF6::]> playing with care of +myself up pitch/volume of a violin. The rosy dawn, Adam says: The transforming +magic touch the waist, working-A transparent, yet its not easily let us +changelings who all across Fountain Square where no telephones ring? Spiros +recently. MARY T7rXlYc@4AI1LM.2o.uk BRISCOLL: What if +uuCiDC6c@Maar3.65hlg-wf.t3pt9.FJ I w2mNOvIUh@dx3ep7ew.ru dreamed of a new +dimension of her in Wintjas direction. -F�Word frequencies, underground river, +announced on your location. Thought b#Add@9hpopo.Xg3tbjchdpt.TT magic. The +violin kept talking to stab it was born from our own life as the dream I was +practically there I want to smalltalk about the station, and so recap.29 28 So, +darling. We are truly is. Its on Crete. On a curtain in a copy of the + afterlife, the grass and the lovers pot! +Transistoryness? Radiosyncromatics? Syntax of the modern world The mirror at +<00lF9UB@2NR2.rs> the day soon there, +doing it will you will be disclosed, says Saussie. Become the future just +happened? Spiros picks it at the time transfer was +awwLoYLn~c2LfTEVT@fwksx.qoj94r11kw19k50k3.gd successful. Initiating first +somewhere else. Its from gRZ5w9epm@p6adico3auugj5qklec.Sm4bx5.li the +imagination, Spiros saw the words: They cant remember yet? I add to Any time +here, she says. Butterfly as a dark zfdZ67Y@1azhq.dl3xxzni2.rrj.lpclc6g4d.sl +soil run free What do you see, is the natural radiance of death reports, + is welcomed. Layer upon layer of Thy angels are +crystal. Red King and its my opinion. You were +back. Hows it with-A liquid purple. She looks at pnuXl@s1alo2.tc a man +lKy64zp.Cbg8BM@y0S.6uiux8h8.0udipt.ma on with me. Say the beginning from the +manuscript and |9FDgc@vbrz.3L.av4kmt.rs bare plot. Queen told by the redpurple +wine back where we all be rather dramatic, which they had skcHAu7@xD715N1.DZ +always include Sir Nykkel Humphry, master of the +inverse confine survey the rosy guidance of her eyes on a +river here, to the latest of Sissy. He again set the old Egypt. He returns to +the looser you ready? Y Were ready. Spiros qrNZtp3vO@a0gr.8j9cvcgy0p-3.HN says +Sissy. Wintja sing: Ive put ourselves in him, he has taken a +lfW2rei20XWSmpQoPY1Dl@[(N &c] third person. Whats it +will bring the room on the book in trees and WFBBEv|@q7R2J.oy48740.pm smiles a +pipe he enters the chat room (The church music in comic book aside +<6H6rPx@zVJ40.xgyat.cLUX6SVFJWMLF9EZ2PL8QQEU7U1WT0JW3QR8898ALFGKO18CF1DOX89DR.1tfu30mp.CA> +Rosalias Dawn, pray, Man through ytG@J4auwv4has.PS concrete. Could we? Were +taking over a +<"X;+N1A\A "@rc9cln0xyy8wa6axedojj9r0slj0v.Luy9i6ipqrz74lm5-n6f1-2srq5vdo-opef747ubdykv5hc.2lztpe.er> +hippie up the detail. Rain begins to being married to the designing of love.). +Made myself a funeral. Who are created DQTmqL4LVRUvuvoNb8=TT@2up3.PY (Is that +hyperspace at the merriest of us for that. -F�Christofle is heard +NC0OPLz@kcru1s0mu.name him a huge and wraps if he find? He is or so much more +complex than kBoJf{XaGl@[248.166.223.221] we are heard within the + woman of The + mirror of p=VTtlpC@w3ttqb.FO dream, born from that we are. A +VOICE:-A + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/random.text.with.urls.txt b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/random.text.with.urls.txt new file mode 100644 index 0000000..241c806 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/random.text.with.urls.txt @@ -0,0 +1,1346 @@ +========= +This file was generated in part (i.e. without the URLs) +by the random text generator at: + +========= + +them under the looking-glass. It is fair whisker, and this so +http://c5-3486.bisynxu.FR/aI.YnNms/ thick boots!' I might -- I don't deny that +the appearance of sherry wine on +ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R +Joe and then. But, there weeks together, be, or the moment of +deliverance through the coupling don't know.' And now reclined on the +http://Z%441S6SK7y%30K34@35j.np/RUpp%D1KnJH ceiling. There was bloody, but were +my hair; so humiliated, hurt, spurned, offended, angry, sorry -- with their +[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/ owner. While we went all the clergyman +said, file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7 `Before the cold +http://[a42:a7b6::]/qSmxSUU4z/%52qVl4 air as when I +http://Rcbu6/Oxc%C0IkGSZ8rO9IUpd/BEvkvw3nWNXZ/P%17tp3gjATN/0ZRzs was company. +At such a long `Well, boy,' interposed with this coach, in the companion +file:///2CdsP/U2GCLT of a design for the state of one of mine looked all in the +lies by giving the village, +Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA= +and Joe recited this iron bars with their account, poor elth, and she had been +almost drove me towards evening. At +HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH the +sergeant and then on the raw + afternoon towards +the terror, merely wished him as biled +M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb -- a conciliatory air on in + +the ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj +candle down, how to assure you, +ftp://alv0e-s.88.nJ2B34.ps/s0TgnaY?yOQUt/18CY%16IzNSQu/LaT3dD?io%80LBw%cdXDHU3/ppMyv/DbLDzyceaC/Goa%f3gn/5ebODAP0NAOD/6NkL/uP7CW/gS5TnaS +you http://278phvcx21/QGOy%395L/yy5NurSi8S/gMr%553%C9q0S say churchyard, you go +to stop short run, and her apron of `Gracious goodness +z156ky.MU/.b%daGKqc/jYZkXK1WE/Abx589H6tADH gracious me, and we were stopped in +line beyond, stood out again when the sergeant, Ftp://x68qwf2j7k.nc/qyZfwo%8a/ +and saw that phenomenon needed counteraction. My construction even with sleep. +ftp://yd.ng:40759/L1XAGIuzdMsjUIUwQ%F5/oDjgDsU/&Ze0Wz/ZeWR6cu;type=a#yDMuky I +uttered a neat row at first sight to be about the +Ftp://Xmswrxn8d-1s.pe.gm/dB6C3xTk%D3x/EKOiTmk%7c/API/0cdgpi;Type=a dissuading +arguments of a greater sense of the admission of those grovelling and ran home +any FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH relief to me and bring the wheelwright +and Mrs Joe peeped down the memory of being +ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/ equal +to live. You didn't know nothing could attend more.' He had been a coming! Get +behind the answer those aids, I saw him in the same appearance of the convict's +file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf +confession, and bring you see? ' +HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an +accusatory manner as well known that Joe Gargery marry her cup. `I wonder and +there was publicly made it was, + as lookers on; me, I +noticed that hand, gave me + +upside down, and comforted me up. After each walked surrounded by some one +question, and meat and I thought it signify? `Certainly!' assented Mr +Pumblechook, + +`be grateful, boy, ma'am. Come +http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM +again +FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB +towards evening. At last, and kneaded, and a dead man taking any. There was +publicly made out there?' said I, +ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM +glancing http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY at the +N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ river wound, twenty miles of the +number called, hears the awful it lights; here and trimmings of Caesar. This +was a hard badly bruised and spread it if Mrs Joe had fought for a coarse and I + want with + his manacled +hands; `I'd give a final smart wipe on with sleep. +dFU69ED1EJ0MLT.G8ef3o.bn:53301/klFVsh/YInBJE/SEIzo5EIoe3 I at church, +therefore, I was not free use of the cold air when I heard of the fire and of a +man's alone in at + +a constitutional im- patience, or a coming! Get +<5pn1q8q0tg.JP/%74XuKtp%F3fqLuGO/CMeC2IRRl./> behind with +http://bmm4qto-360l-pbemedo4.SA it, for reference, I thought how small bundle +of him. On the scaly tips of the load on again and finding an alphabet in the +mill. When sll-9eg.W6pv.rs/WtYGg51Pt%68/R8fsX4a I should make such a confusion +of me then the soldiers all about relationships, +FTP://r13oym76cysnp77r5sidj8sqgxzpl3ls4xzj.JE/ta%e0PA/5Jwza65o%7D6Uno/RyO%b1B/v6C8yo5K +having played http://2b4ne4.5ji.oubrfdx24.UZ/%69kMsLF the kitchen, and how +tv2yy8dnp.tN8DIWG.gr/ladfwSflp/Zr3YKvt/l1QlvEc she never was adamantine. I had +taken to him. I saw the gate, and women. Play.' + I had your +file:///LtZpL/%1CU8lVvcWrTR/ elth.' By this man sitting in +File:///yCPVGaCm/hHqFToHKZw/%29zmDPSQ6183%C8RfpdKQqkCd%51X/lyJABDQymQDL her +pretty brown paper packets inside, whether I do.' I should have made a clear of +the top of the charge to see igth-n.Mcw.ar/LjMApEho5gp825BK/afaST/HWKafQMBv/ a +confidential voice, as I was +now looking hard at his eyes had betrayed myself, from that I para> very serous +to wonder whether it accuses man was taking a mile or for `property.' Mr +Wopsle, united to perceive that limped, and a row beside him coming on, and +that about him Good indeed! Now Joseph, you live,' said -- waiting for they +won't bile, don't you the fuce up my forehead, had been out a strange man, with +him.' file:///6yT8LrgRZG%10HsZ/CP1zI%98gHFiT/zAx4%EB/tBv6V8kS I entertained +that it a whisper. `Anything else?' `I am a new here, Pip, that old chap! +You'll do it, once held file:/// it, and saw the noise like the stars, and +safe, but that stuff's of mentioning my sister. Mr Pumblechook. `I'll eat it, +and generally more sharPly file:///iYHw2RpUc/9MPLbyq7gTVSx/pYnzm4E than the +officer to take it accuses man whose teeth chattered in reference to him here I +saw that way, as she said. (I didn't make towards +FTP://[9198:015F::]/pU7tr7Zhgt/~cLd7w7.Gb/4MvIKc6iy%58vN/AGZ08o/uT%1e7vtcZD;type=d +the fireside feeling my hope you'll never was seated on our special agreement, +by letter, inasmuch as I waved it made it was, on the muskets, hears the +ftp://0dfw3ob8y.Jri1p4f-8.NG/DpihVuu3RJ/kEKaPppvl picture of liquor, and we had +been thrown open, to be told lies is http://pZRLI6.ma/wAex4MoQ/jUv6Vh%5C2 a +star. Miss file:///F8%A5Go9qV/UYzwol/#839W58%4D! Estella was that it off. Mr +Wopsle, nodding asleep, and others on one low-spirited dip-candle and handed +that the marshes; and completely stopped and gloves, and so new admiration now +retorted, as I found out for ever afterwards, the file (as I was sitting at +ftp://zo.dz/BSI/enk1F/XjnYRqwHBAyIYdC/rTXmyPP@Smcp:/%E9r7n one of old brick, +and torn all the shop transactions. Biddy when she gave him- self wh en a +common ones, instead of Prices, +nhzbw2.qyevbi.gn/Oxbk%737lUb/OBx7/VX67/%C4fxQxvns/4fNNJ9FjR/7YeGTW/7VOLjOD4/P%89.1Forp&3/wLVBbhK/3GdjIWB +and applying the +Ftp://4ie4a.fl8g3c5.wjvan5m3j.4sawo3mof.TH/wfcrCzx8%B50W24/ZxqhiPCLDP/SZbReZ4h7 +torches carried afore, closing in the still gasped, `He was, dear me, and never +see that you are both names nothing in the still looking at twenty years older + than the +ftp://[8F7F:9507:280A:3192:EA30:EBD2:87.9.102.149]:4954/AwLZnTre/8g3Vo%6doz/Uw=dU%70nxbo +cards down 6u.vkhga15zezgvdc68uii7dh0svzopjpr3.NG/rXE/6T~KV%06Kq/iO5vG/G2S9YU +like a spoon that I got the Above' as if they rob, and made a good in his men, +who used that +HTTP://lZSO.fr/%baWLoH/rsdViX1jMX/jKQg/aWFY%eekWu%17DTY/ASpif739Hht/hHM/oXdG6y/Es2c2Q/UVz6TevIJa +it could a1JQT907R.ou7o81.al/3Vp@VDZp%9c think I should yield to hold of +liquor, and put into sackcloth, and +http://g746.mhi.xtzovtn01w87au9.tc/%8Dn1XEzK/FsoFQ/xuL0wOc/YNP%53OS3/w5sIf7ox/t%22S9TxaTtK3/K%74%4EabDPe +lending me, each time, and eye- brows, `She?' My sister to +http://92-uzyzm.pr/UwJkzP/ me to me, Joe.' `(I'm sorry +http://46cda.e92kuq1029.Igb3rjaqtc.Xgpak.T50lamdm4sscw1i8mq1-8.wx6wzqxd92z68sbs43l6.JO/Q7RzRWFz2/ +-- he didn't. My sister, frowning at it, sir.' `Tell us with a new to myself +last reek of reasons for noticing that she put before my +[BD39::62:47.178.113.23]/U4woqa77Wyygc2/cltcO5Xw%EDWZT/%5Fd@GP5vV#wUMoflXqTOsj +convict, disdainfully. `Try, and be presented by +Tw95.XN--WGBH1C/CK%fb%EF9/s%F4W7je06JY%49r/Y2L9fzlfd#fprt97Y%72 hand!' +`Good-bye, Joe!' In a dogged manner, while against +him. But, there +file:///JDyfQk8%669N~2L%ecj1/6PySMx8z%19%36/HP5GhmnNinF0p/vavqKxyBLV0a is it, +Pip, or for that secret way with disdain, + before I sat, or why, if +nothing longer than this dismal intelligence, I don't want an untaught genius, +I got his tombstone on the fear somehow, there for verification, no hat, and +ladies' society; but one upon her!' `Goodness, uncle! And as when you like,' +returned + the +http://4u3o/BKdhwRyzG mist all the marvels I was with us. So, I had an +invisible gun, went there were both +file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/ annoyances; but, I knew to the +hair: saying that I could not been more than +ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz +at me, and that her walking z3ymb.KM/DdnrqoBz=YtxSB away so much of the +grievous circumstances foreshadowed. After receiving the way, that I thought, +if she should go to?' `Good again!' cried the +FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0 society of a savoury pork pie, +and nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc they challenged, hears nothin' all my +hands in herself, and bring him by hand. `This,' ftp://085.062.055.011/bopfVV/ +said he wore ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs a dog of +such job, I wish to bed; `was that for going wrong in the gallant sergeant, who +had got acquainted with all file:///vNLDR/Q7QXgZ/6ApHTc6bN4/yihY9ZGy%3BlK +accurate; for, I thought so; and yet so familiar + to Joe, and catching me +think.' I clutched it had an old discomfiture, assented; but for +078.085.085.242/kqKkywur6Kv4Qn/-CJv6i1Nxc/ the air. Presently we were which it +proved to me to screw to the slate as I was Pip. Didn't you see him, +qow6.7RF9YUV12HR9CCFTWUTQRONLAM4PN82GI8E.GQ/oxUj%a6Ch2/bjjphp%34IJ/%65NQDGFab%14B%51M/QtBe +his file:///pQ%8CkB8ipZ%2cyZGMf/8USgpQ%54%48e/jCflvdl%3Ec Blue Blazes is said +that Miss Havisham's, and (what's the soldiers ran like to like Tar- water. +say,' I being there; `did you had it was equally convenient. When the National +165.195.223.067/Q3DEaK/58Z29OKkyF/fk9Vl/dKLw%7FR3Fzo1YsTPxmm/XiABg5j23J%1avyv +Debt, but lonesome then,' said I. `And please God, you get home!' +f1442jv.3w4cg5hy.EE/8hsz%802pLxgSlD%edIt/ESbwLYo/tdn9mrEynmJF~ `Goo-good night, +sir,' +[dfb9:d316:677E::2B7C]/gsORr%b7gc/?ehIX5=GTM0co5(Dmn91JN&8J=8W7wFuQfZk7sM#vYfk~Km +I got mixed [11b2::35.78.41.76]/vVfZvUimVO/K9hfOd/4gZUL=j%09PGr#o%23LnBOkk9 +with a sort of weeks of it seems +https://oL2UQ.yLN-U053DA.bf/CfFIFwe/ZbgHFvLfbEYrStIS2h3r/pqd%14rY/aR5a8hx/aKWFJechP8DT/ypmeBjL7rcbUr +to be hanged there had followed him +https://[3790:ad57:0B63::e5f7:f6ac:164C]/Obax;zcD/Y%48%9a/Z2xcdar coming back. +`And eight? ' meaning to firing! Why, here's three Js, and Estella to work, and +you know what you've been so that my particular convict then, as if it were +bleeding and trimming and that some flowers, +bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae +an hour or +ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO +small?' `Immense,' said the dead and at the Romans must know,' said Mrs Hubble; +and tingling, and that I had won of the shoulder. `Excuse me, and we departed +from Richard the furthest end of +http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w both imp and stung by the +bright fire, another look +zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her +best use asking questions, and feet, + hanging to try +back was the poker. `It was not warmly. `Seems +HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV +you must have astonished our house, and a candle to it. I asked Mr Pumblechook, +being done worse.' Not exactly relishing this, and +ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ think +I might find it so coarse.' And I dealt. I could make the forehead hardens the +kitchen wall, +Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 he ate the +house, end with the Ghost in order): Forty-three pence?' To five hundred +Gargerys.' `I say, Pip; stay +7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with +ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB +his shot, and reposing no help to my seat. It was in the kitchen wall, because +I calculated the sounds by giving me by the name for a rush of Joe's forge +adjoined our own, I had a mile or up by a little greasy memorandum-book kept +apart, +ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk +and +Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1 +brought him to him, or large, and I was raised, and not understand, and danger. +`You are oncommon ones -- I mean by +hand. She uttered the wine, if I particularly unpleasant and put +<[46C8:60FE:7ff2:79cd:69E1::221.191.034.036]/Q2MQ8mttjsMF/UqrKq0W%E6N1#YfB7A8CHYa> +his Majesty's service. And couldn't warm +https://hnk6fx.2uxg1e9o.pm/I=LKn%a2n4/J&RntX3mUxZ/B1Q.Ilpk3Icq%7fZ/ia:4DLuk8pvsD/mpED3egQJfH/O0es5zrzwWQIC%21K1 +water into a full of erudition. `I don't deny that my view which +ftp://133.195.101.060/U9x99/nrirgTvZnm/QLNzsm they had no account to be a boy +fortuitously, and I had recovered; folding his crown upon his hair, and +file:///RN%7EGq55Z%D1E/U0BQ1De/o8a@zHbAMS/GOA4KUcR/uaOR6C%f1Y/u5d7 caused the +job done.' This description must be only two wild beasts! Come +http://[f63f:096e:ee87:792d:CD31:A1B2:83FD:7322]/tnFLqVSRa5h1/%EDX1y4cxiv/GIo.OM0/M4lBr/xgHa= +asunder!' Water was not marry; +and tilted me with the torches, and the plea of him. I am indebted for +anything, for there was bringing with a sincere well- wisher would consider +probable, as to Joe, after us, +and took me feel very like to go and Policeman had been the man, ordered about +a pint of open country were briskly +http://obp6jiork.KP/pOedzk/Lo1uNQ796m/hjLXBOr%25AB1/ clearing the first fancies +regarding file:///j3m%a5o5blRxq2/8aDBkHng/OR1ixi5h8kX/nCUz2aDz/ the poker, + and feeling his shop; and passed me to say very undecided +blue eyes wide, and adjourned, for any pupil's entertaint-ng himself + up +ftp://tyt7r.u6ier1pxipif5.BW/vSq6akPyGUI/wVJ67VXTQeuKM/yB4zYqPh/0RuHq%58G/rBTgdr5F +the up-and-down-and-straight on a moment, with his tombstone on the vat. All +this arrest of + +the questions I kep him in its wooden finger on +FTP://o--B02WG9T7-BXW-RVAJCJN1IALU9EX65WSEXCRHM.Aeh-m.cat:34416/3q9yW%53m/FJ9&U84ik9&e/R.l/ji0sjWb%5edu12nbNSW5c/YMGfLcesN +the place!' I have felt painfully conscious) with curly sharp-edged person +sumever, and among +HTTP://lMxNbKW@tq1imryvi.P7g5o8np1.SK/um4Z2TESWBSrcN/fNehEdgh/sW%6fCP/b2fqBsG +the dust-pan -- no, no. No, he considered myself to their muskets: + +one side and put the nape of all, Pip ? +izojrse33.9WTVFAANL2Y.ly/i3ae/5%0Br%f5yL3/MsnfAk#T6,v%51Ev ' `Remember? ' said +Joe. `Is she, uncle?' asked Mrs Joe contemplated me (as I may draw +ftp://[8714:3F6E:aa8:c8fc:4F41:b8ee:44.74.99.35]/790Ug0mWq/7yBPb/pzh4dTX the +ftp://[ACC9::DD55:A45B:7a6b:177.179.158.116]/i1q3SzWTmO%09p%A3/FWDWq8u2Q/7 same +man, with both sides of blood and beer, and + flavour +about the pantry, which was repeated. It is the memory of a turn them with a +struggle, 6f9f3nny.mq/ai%cb2SZP/qfjOd2mpEH/LUZ.fxv/#3NaTgg and indeed it all +against the tambourine upon my sister made up there was drafted off last to +keep myself I set at me. When I sat, corpse-like, as she didn't see; but none +of the place of it was washing up to hide my sister. `If you could be, thump +between my fore- head that know I render it) pampered. Therefore, I set at +nought -- know Pip!' `Noodlel' cried Joe, shaking my coarse +ftp://R1x5yr2ij24e42wlojnp1i-b2bsacd01stfe5-10m0-3z6cwb3aflzrgoo.it:8665/oFbo12T%3Bng=x/%B2FcEUXPHAP/Ni0qL%0bPN4#yhp%5dO6 +hands to a draped table and maintaining equal to them while +http://[C794:4d71:ACD4:7AC2::30CE:B0E7]/T8igmbW%6C/DE1%1DyI457M#brpF I +HTTPS://rI7HAX2OS.bsajd56xb48.FO/fn9eA4%0A/G96ogw%69SGis/1V0hqVLN6zaQC1 had +been put into our swords and http://toncwiacr.0px.g7pud.MOBI/EdoW/qUMMnH if +some of me,' file:///LkP1%5BcrQ/bnkvBi6F/Q3IRXB7Kt8mvDZ/ZKwDAp%a3/ said Mr +Pumblechook +http://6DAK.8I6FGLS.t5YJHK9GCUVU4EB6NO513HBTWAU0XP5.GL/LDO%8CDB%82p9# was +invisible gun -- file:///%46f%c5KRhPp/skp1X/OdoS-J1foeE/5H5RIWoip frequent-- +and had divorced Http://180.036.254.028/VSiroQpjS her to d54n.Agqa6.7e4.JOBS +Godliness, and when you see it up from the court-yard in upon it. Until she do +that. I was most callous of +you are prison-ships, and dismal, and it all the lower were given them. After +Mr Pumblechook's boy, and file:///tGHsUEMaQS/VLn1%6Au#uGnrvY bulbs ever in +every word after +a court-yard gate, I went out, Joe, `to tell no indispensable necessity for me. +All this extreme +ftp://6g4.qe-s9txq3o8vvr5e.5YWZGPDM9Q.820d8wtribsgglbrnkafno126s8vflph9tfmt0mwew/qC0bInpp/fqxKQLzN/hAj/6PsngV;TYPE=I +horror of having been so file:///aR3sSgC/GJu run at Joe's curiosity by letter, +inasmuch w26535-k.Ut2.MS/pQP1Rx/NUKUyRSr/21x/CcgOcN4U/Jzw%C6Ft/n5Mu9X as if he +gave me up. But ftp://75.22.51.21/wFDRPO/NLI1ZSecRAfFEAy/kZ4whP%C3A/ he did not +come to; but even made a +ftp://1h3yyf3d8sffjx3rsf3k2y7c459c2gx/%2FfoFDEyWygHgKAuo/KhJZkBlC5r3%99/9I8SMy/25_&y0 +private conference in the mud and lighted with what you're welcome to overhear +him down, that stuff's of my eyebrows. In a glass bottle of gracious? ' asked +the low career Ftp://215.239.176.156/tNfD%09mvdOM%28zx/fc3DTw2nf/#2kySKJ that +made + +the kitchen wall, and day. I find much to Joe, we + were a moment before, for no par- ticular +reason why he went to go, picking his anwil. -- like a grave nod. `That's true, +Mum,' said Joe, `ringing like a change very disagreeable to him, +file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw his pipe there. I replied, +`Pumblechook.' The bread ravenoualy. `You mean stole,' said my scattered about. +She drew the kitchen, carrying file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH so low +wooden hut +ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T +where it seemed to give Pirrip as +<79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO> +to say, on the guiltily coarse his head, he tried to the +Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z +remark. `There's one sprinkled all I was possible she beggared me. All these +fearful +ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ +man, with his [62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23 mind. The two loops, and by +the fire), `because +Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5= +he shook her veil so thick nor my milk and would impart all had returned, with +soap-suds, I had FILE:///#F9Bgl just like thin snow. `Enough of his right side +of thenceforth sitting +jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw +in File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg my soul. I sat down on it, I have +been a spoon that the pie, blacksmith?' asked Estella of it made a mouth wide +open, and so + +much surprised to bed, may not allowed the certainty of her bridal dress had +been within a knife http://sisas.ua/4CU60ZLK4VgY8AR89 a blacksmith's wife, and +his disturbance, as I don't know.' And couldn't warm in the lighting of grains +and wine on an slice, to bring the same pie.' The other, always wore a pitcher +FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2 of the stranger looked at it, I +pointed to Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz himself. No glimpse of +file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg herself, I saw that he would have +been there, I was too far and uncomfortable by it. +http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms= +Under the Above,' I rather to become transfixed -- he gave me out of the +kitchen empty-handed, to keep him, I had made a +Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG subject, if he had +driven off, every board, calling out with the fireside feeling conscious of the +floors of savoury pork pie ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d as of +misery, in respect I may tuck himself from a look at all night -- say, `You +must be called myself drifting down his hand. She was a group of his beer in +his nose with Joe, by collision with + +the deepest disgrace with an Accoucheur Policeman had made by no daylight in +the eta0q7.2r79g.AC:34736/%abp87fVdPCY/PvO8Uk4WoLF#A*HP1A bottle I the market +price of it. That, +https://w9zhko2rttzndzivll92.sbzum.UZ/bgy8l68/Ix72mHu/zlA4CI/IQjc%CD9%255FxJ8A/Dbb%4eTCRu +if you happened to hurry away somewhere in a great wooden house, +[2582::]/Mhm%55MWThR4Ne5mZ/xniX3IdG/ which he looked at Pork alone. +ftp://224.3.121.112/G1w1g%1DdRi/T6Eb_NegqJs But this while, the case. You do +yourself a J and ftp://tn.z-o3vn3n4.5wg7.gs/loxilPpcLnsI/topa0Ez/Na%70Dcde Joe +and Mr Pumblechook repeated. +syt7m.TD/2dxrQQvBXC78/Z754hngiYcM/eM%3CaeYeXX/nmUwguwk97VGL/ It was so very +http://isqogte5i.c-3oixcmy.SY/jlPVRlTs4v/enCZWc3Sl1dJ7/M5GTSZx/Ga%cce%63cLzTJvBodJ +dark. Before bYIAYQ.9mlnx.OM/t1KK3u/iyQFS4EGHN3uKogL3WGG/6wn5Q5ndq8kHO%734cxgEc +we sat slowly blowing and against her needlework, l wrapped to listen for I +give a dash and then murmured +`True!' and took some general shop. She were rustily barred. There was much +http://fnoY09@bm8xcfjyfiremhz9.sr/E4Rrq2/vQjQKj9fwV6r51/mn3x8he7/W4xCQs%FBvrzb +interested in the landlord looking at least twelve capital offence. By that +there a false position. Not to ftp://vxfr4g5ka.kn/TZSPrYGzv/KzuB%731GA him go +there. I partially recovered the mound beyond the iron or girl, Mr Pumblechook, +though it out, roasted and +file:///vjS%f1/ktgHPAL/=v0cZ/WTpVo1/i6XlMCkNI/kukAwc8/thWUblm/c4ICXp/f8AHkj%1C4d%9107v%44hN/ +he +Ftp://t4qxt.hd9ok.aUQ7GIMBGXP.IS/%7ey71ndfLh/m%4A5P%75153tpU0hY73KfO6o/E%7aAkUlK3hX3Fg +would have no girl present.' `Besides,' said Estella ap- proaching with an +empty casks, which was +FTP://gJ8MRF8UYWFW.iq/cdX7RYOqS/6E6XUh%fcdHS1%dcoDwHgpFId the bottle (which he +did,' said I. `Drat that he would +http://01s0hfwz.TL/C9uEC/K9uWhknP3AxHW/%c56I1zL5Rfdd/sLJeP/2QkQNP/QcW%8aA0A/ be +a many inhabitants who paid off. I + +don't mean to perceive that name what secrecy there seemed to play.' `Come +nearer; let +us to be presented our- selves in the bellows, the brink of soldiers and closed +the best of good look at that once. While we came and how's +file:///aZcnMM/Hnr1PCn/wlTztS7SpL Sixpennorth of keeping that you are! An't you +never have been +http://2lv8030.fimc0v081i/cyEUoud6w/gfAlE/iQP:8/dZCue4cKVM3bs/JU%d5ZUA1t too +sour to call those + +sheltering premises, rose before I could I,' returned +ftp://4ufofbu/pmLZX%f2wJcQO/B%e0b%64oLObaEx&C/QViF1ohg/Rffvf the chaise-cart. +But I had worked his whisker; and it proved to have been safe dYC57.CI/=G0dg to +be able to be fed now. There was in. When I saw him out of girls, immediately +said he. drawing his brandy off. Mr Pumblechook, though +185.224.223.157/h8BdA%FEv/KLK2f%86LS/gwA4rKKHLarf/b.EyE all expressed my boy. I +should like suddenness, staring great stuck pig.' Joe only, I +FTP://uhw3qgl0bvfp568.e5wkz1l.Dug75a1j.US/R%AE5DNL%C4vMl-TXG/BDSu8PXNYU42aY/MR-hx1/mC2:SJqsCN%d7#smDUT +han't half blind, and +File:///q3iMCFXfge/Bh%cdvWuy1w%E7Er/Jmmf7DkqSG%35a/VUvFz#8%510SIu harrowed, and + Joe and you won't + +do, old chafe upon his eyes of 'em, Pip. A fearful man, with unspeakable +file:///7RVk/qIRRZ0b/ consternation, owing to +FILE:///Rq_/ec93s/HMB24%8esN/%4bO%cayWnOF say, `Ever the bedstead was, I heard +that name Philip, my father, +ftp://4ps9b29prywnt6-1xt9t4cgi8sbwjj6obbw1x-2y-v2tft1eei67i.Hk0u4zwmd7o9z.jp/o4R1sdAnw/Hu408%CB/HdQ6cFhG +Pip, it now gave Mr Pumblechook, leading the object of nephews, `then mention +what's gone ftp://7efqt.LB/EIX~:Q24/b0QhE%751s%F66R7A/IFxxOD2v/uOOPv5jARBJsf +long, Joe?' I supposed to be out of his manner of coma; arising either of +exercise to [A645:D622:eb6b:D59B::D48D:f334]/Ulld404y/IM~6P3 be done it.' `Did +you was the threshold of turning down upon his manner of lies, Joe.' I had said +my eyes turned his jaws -- +FILE:///%16b72yhVw/2BPPCZg/KwHAJ0X3QT/I49wMwmls2j%15xkYc6qFZ he were born?' I +FTP://octvv.2je8.oJRUDE.02y4htgs.es/zwVuzXoFKJ0k9 replied, letting his +convenience quite an eye fell on my sister catching me to remark in a sawdusty +fragrance, with dykes and generally more dreadful acquaintance, and careful +perspicuity, that tears started to him again, but I had completed these +http://[3A16::]/1rhxoXw9Cv/eWk5gHpYJ/v9gRo/un2Ygo91B%A1f2p/15hJ%A5o%A19TLjzzRrGUT +expeditions. Joe and iG4PTCCG.3zti905z3.ci/42j5.oKj/FZmOBY thoughtful for he +presented our- selves at me that this point, Http://pclly.36XVKSPBC/Nja5D Joe +looked at all: or plunge into the table. Dresses, less excusable, he hears the +paper, which I accidentally held a magnifying glass Present! Cover him steady, +men!'' and Joe, with the rest +<148.020.113.014/ASuvNkg/Zcwt4/PjpwkEUVHbjkeKOgL/%f9hibk/NT9kSmJF%1A/5FaP@BkLf/jTre%balt> +of a mouthful +tnjbgbiparss2x-xav2mitawqn9ema07kfk6kjck.xC1U6J.hm/scUu%E5D/qZ9K%1CX.d3mWJb/-SdvwN/nFS0ZdZDNQA +and buried; and sportive, `or I'll +http://[3173::]/YHDIJlMkv/oFpVHGs/7Dn%61pqA%23/ZnaIIPD%6cj/ beat the mist, I +had best thing when my sister is a +http://i4f8l.sc/WuJNKVuflVGa8/%85hi4B1G/mPs/1KfX%12/WswWA%B3i1OVsF/Z;wC5kkDQ/XIOtrdBl%D9%33 +great blotches of skin and why everybody of the remark. `There's no +weal-cutlets, at this bleak stillness of the letters on a scholar.' `How could +see that I could see him?' said Miss Havisham to embrace the air on her husband +as I answered, but I directed my right 'cross th' meshes.' We begin + +by which is forty-three pence seven to me a breast-pocket. I could; but I did +not, however, collect the East was), +and disappeared and Joe, making the pantry, or why, +file:///MaIzEiaVY/ssIPwkItF%EBIUy Pip.' `Has she was then he were like a ring, + fired ahead of whom an ugly thing when she had asked the +stiffest character, as if he went. As I hope of the very pretty.' `Anything +else?' `I HTTP://Aphi-iog2t.PE/SSwgnY7af/VabUxcEU2i/JI%434fkP%7cO#EWmOFU%5cy +mean ?' `I'll tell you,' said my eyes wide, file:///FXYZhobB0jX%5BD7PIt8H8u +`what a jug on a modest patronage. `I am not understand, and watching him at +one of that once. Three Jolly Bargemen, that is solitary,' said +Http://asn7b.LA/13Qp3t0dY/Mk0ldhZyJP/rRgIZlOu/hqt1qM9NT5tAGD07T he. `Brandy,' +said Http://mb2.NI/eOXXAC0MNiEvJ/ul6ydqIPg/3JhlWx21r~sH/ZemaBb7j17X Uncle +Pumble- chook. `If you dead stop. `Boy! What undiscussible way, and saw of my +feelings, and confound you get to hunt a living, exceedingly early in print and +with us to give Pirrip as I don't mean to + +imagine myself that night. We always friends, and the pupils then we emerged +from Joe's file, the pie, blacksmith?' asked my first one of my life afresh, in +the way, that he handled as was as me, and kneaded, and buried; and a piece of +reading, too.' ftp://f8X.cat/L7Gj-OSdF/QBrO%f3okEZ/L%bdvAyxC5 `Are you, he +ftp://[6CA9:93a1::]/?y057O5/l9C:/XsBy2so5tX=D%71me/ went. After darkly looking +at all: or Course established a pin into a sedan-chair. She's a +file:///%33P.AyK6nB/QkN%011K/iicc3HEIE%C0/v_7Wl%fdzMCBnfC wooden bowls in a +hare hanging there was over, Biddy arranged +HTTPS://zv21qs.ekofwyy.f1pd7snnae0n2nzfdclk1sf4hybx97u17piaj5-lul89bxrf775koowj.as/BAc33xOV7 +all was not even called myself a group of +ftp://ko%5BM@183.207.071.131/tq~2QxL/d%D397GnaQgKtPMOsCp7fyVobgZ/Nhnp4LAKEvQ1V/1xFn%cbR%7BVU3 +my poor wretched + +man has he?' asked Mrs Joe -- waiting for he wouldn't, + and it's +http://jX-U69Z4.3vuws.41h3q22bzs.o3hng9:6629/Qj=CQmh9/%9aCSTfa%0aXvFQ/u0zAICPSGUx/MqP32INW%00mp?ZmIZc=5o1okD&WEDMM6Qnm=0w5T&gajnp=GFwK+Ct8Pds+KRsnyPq+2UFmx+cwnDnvyn+Zf0VFXyk2+Aw67fL +lies, Joe.' `(I'm sorry to bear witness.' `Lookee here!' said to swallow that +it and clink upon it in great +file:///XRDAcY5GGmj3/WoHYehPpF7/HS9LhdHOe%9fS#!SZge2 difficulty. I +file:///UIIGOxv6jvF2%c0/%A8J3%677Gmq8im1zklKhqx/HMhCSY2QcyxvL/ heard of being +Pirrip, late of the table under my heart. `However,' said the door, and the +dictates of + +the place overgrown with the folks. As I was uncommonly proud of; indeed began +to keep him, I +Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9 +had a gush of his back to the brewing grave-clothes, or putting such manifest +pride and plaited the kitchen, waiting for my being sensible of the +file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8 Admiralty, or gold, of it wery +hard twist upon his -- `Well, boy,' Uncle Pumblechook: a look at the sermon he +had heard it had hesitated as little window, violently plunging and she had +committed, and had all about the present calling, which the fingers of tea on +Saturdays than this country, gentlemen, but I could see those, +https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G +too, if you remember what stock she told me again. `But I know what +file:///enqvF%EFLOBsZhl8h2z wittles is?' `Yes, ma'am.' `Estella, take me again +and ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A refractory +students. When Joe and his trousers with the same man, but however casually, at +me again. `And pray what terrible voice, `Do you notice + anything +in a dead ftp://o6ou6n.N8.yyld.JM:24207/aS15Vk%0eg/M8jcXu%14d/%48odaw stop. +`Boy! Let me he had been gone on all I give Pirrip as if he's ready with a +strong that it were so coarse.' And couldn't warm water into +file:///7NToG6xM&SK=k8/wTdaPAFLzqBEJ/zHMDPj/L.fLv57c/z8QYrsKS/CEkA5FEhQXBQi +trouble with me, made an in- discriminate totter at all +file:///UWrC%9111nEhh/45FHiTx%98L right. Wishing to me; their days lingering +about it, + +you up the point the church wall. As it must http://nEN5ZN.EG/%0efsf4v30L rob +Joe, unwrapping herself in the single combats between the sight to bear +witness.' sea. My sister, frowning at one of a flat of joviality. Even with +like a look after looking hard file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q to +speak no r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q break out of being Pirrip, +late of a ridiculous old chap, and me apprentice to do corn-chandler in his +right-side +ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg +flaxen curls and tables, and a foot of the blacksmith's.' `Halloa!' said Joe, +staring at that it had withered like a infunt, and took another look about the +rum <6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once. +Three Jolly Bargemen to think she seemed to tell you were. When we saw the file +coming at my slice. I have mentioned it with the wooden hut where we had got up +trying to file:///gVW/nnRNxPfMXKb%72Aq%4A hand. If ever grateful for. If a +square, stout, dark file:///Fzza388TQ man, and was a most awful words, `You +must necessarily be called Pip. In a needle, which had wished him to +Mr Hubble. `Of course, File:///kpiE4WSatjDV/phvv7gyfb%78b that the top up at my +sister instantly jumped over pipes; `well -- looked disconsolately at Miss +Havisham beckoned her back on a --' `Unless in (if possible) when he looked +round, had had heard the true friend overhead; oblige me to mention what's +what.' `D'ye think it was a pirate. The rush of this +ftp://240.154.225.198/I%39uutdECwM/PViD~qPa point, +td.KM/0Dkyg/B%65DiABz/wtqGd/i7%cepV%86XkA cane, worn it all accurate; for, what +day -- my sleep from his legs up at the blacksmith. As she gave Joe pursued, +with the terrible thing, Joe; `and a +077.102.005.039/p53%0bsPeiZaRy/nQHLsKEbNdaX/nT9H%521/Zb7H ring, fired warning +of the gate, and I handed that the fireside feeling it was a long after him; +`your sister's recital, and no account of them to consider them up, Pip, old +subject had died out, sepa- rately, my sister, Mrs Joe took them when he was +received it all of the candle to which had a willing and would you complain of +a subject, + +I was out again +https://J-5ytf.nmp5zuopbj1qbl1ik2c4ihjwu6-q5dhn.ng/GDtBeBZixtl/6sgw9/tmeJ7k3I1hHJfM/2JYRt7towpNjvDWsumYmhu/nBVPkzSo/cBXPb +yet, Pip, that few minutes to play there? +http://HSZDX$An@ukj35.ve/9dLg7XrzV8g/hXhzX;2/Zw3KKwTP1um2/qej3miaDjj8v And Joe +has http://sL333Q.Zci48xtb4g6.lu/sQw4ZHF/M%99%1DNl/s58%a2sCxGQ?EgPNZ=qaG'U2CO +stood staring; at what I +file:///W%64hVsq1u9rIuZy/qO8j6EEwj/d48q1%6D/ko0ec%72/pcJo/MZQohRx mentioned at +me, `I'd never saw him in. When +Ftp://afq57indwrb0sjhgyczyx.se/%6FKey7AOE/IPWZg3ggMIM6%D48h/XnAuzG this boy, +ma'am. Come -- over her name, was the opportunity enough to come, they count +on. `She says you, old rag tied up and bony, and adjourned, for the truth, +hardly have held straight +file:///wDwlQVR8i:0/mzefF/D3Pnkoza7Zo5iQdc/ckieGQos4JM#9rqA%DAD4 on a twist +upon his -- 9gcwbh3vcmfa0xw-k2.MC/66TaJz%FE/SnDRWAknGcI cold.' I had our best +step I took it is Ftp://%cdaTNzNPNu@w6H.V9aps/87/w@rPBGa/he%FBu4vpT in every +day would not so soon, if I +cried, I dragged him drop down the +ftp://131.173.229.062/1IYcY/mJJ894/%89F%45HHRdA/eGlhL2MXm6Q/heBdvWm%3cVs%04/x3JjEB#2%2cQsgeK +shop, while I delivered this time, and looked feel- ings, and abhorrence. +`Yet,' said he. `Mind! Your health. May you get me and they murder, and took +some more genteelly brought no Tickler with theatrical declamation -- pie!' The +soldiers were arranged in the latch of the marsh, now it somehow, though it +down my sister, so familiar to keep up his hart that +rtubvdk3.PF/L4TR1g%5f6/Caov%FC3vK3ofrH/pz33aV%54 lane of the bottle I released +the +urlyuqr.ar/tzJzKM/gutrfWqv/IC%24bbmSS%02P?%24JV=zrJilQ+tH%7bh&hbO7Puq8c=K1Qt&ULqdYq= +gate, and said: `First (to get home!' `Goo-good +Https://pFOROCZ9.dRDP.gq/08VkBBPja8cCXZKLa/rEF28NoX/ night, sir,' I kep him to +have got home, if Joe from his on in a moment. But I waved a great many +subjects going to life, when the shop transactions. Biddy leading the ink (when +there was made by the pudding was white long black Hulk lying on the +background, I was poured down + +by the soldiers, who had been born + on this question being +common, and to have a mouthful and splashing into +l0q.0b82ck3a.SI/EQf%a6#mhJ%0dfWnfM the shoe on the grievous circumstances +foreshadowed. After another again, in my father alonger your heart and applied +Tickler was which. The course I give him in the graves at sea, if +http://hr58b8n.bL0/LppkKdZGYdxiHg/2VXeZWR/T4fCmyN579 I couldn't abear to dine +with his arms -- where there was company, than in that secret terms of her +share of I. He tilted +http://1x6.yc6g6uw6htmwcrb10t4kwc393g29cctmtdxxz1j.KZ/G9lcwKju/UiH4E me +7T6OSH.PF/zfYyqdxITCI0 and looked as the raw afternoon towards making that I +thought, What possessed you?' `No, Joseph,' said Mr Wopsle's great-aunt may +think so, https://2diizsrbfh.PK/t1zBYiDPZG8Kx:/pEN4b8xKu that there had arisen +only it was barred; so, that there was somewhere about with keys in the +table-cloth, with his standing Prancing here' -- as if I am glad +HTTP://r53fl98bazbqhc19-h-r.qif.AW/8sH0%59j%FF7/QPnw69%17Og9V9l/JAn2c7i/%7Fta3x/P%08HRF/ +when I was bent over with his hand anywhere, they'll make out `No!' with a +necessary to live. You know you complain of + +my plate, at one who had once. http://jykpqk6.sc/VBPT/xNRs7JVoZKE/ Three or +later, when he went. I'll cut your behaviour here again?' said Mrs Joe, all +FTP://2w-y60heg64rnrmpyv43tpfhftxolu-5u.lG0BKW.LY/g%7aPAj5j/qxyE/D79g5vu/ at +me. `It were seized me from that she took a cool +http://Unp.IR/tN;/bCXe/fxSdK%00%CFB5N/D0L1/bjf haze of such job, I think of +their tramp, tramp -- to put my heart and that's further than Pip. I +[cf65:1F97:24b8:652a:FB12:D0F7:181.134.252.162]/1jXwBjjxpC/0zKR6N%0bhawVF had +dropped, ftp://090.247.102.174/YZgWR%A1NP/f6YUa8dEOoOk/a7%59Geq so smartingly +touched him not answer -- if I was publicly made discovery that he made out on +his left me. `Stay a subject! If you're to me to this dismal wilderness beyond +the mare,' said my loss of being interrupted; `I am a morsel, he had dis- +covery had been out of them. After favouring them + against us home and +pulling angry red one, and settling himself accredited to circu- late, +FILE:///FojXlCuj/OQXGX/JUHCBAF/TUAe8k7O/fnh8rautFH/e6%C2xGbsfELFVW%df/JKQk/gEO%589e7uMuM/SM%7dz%0chqvt%67/dc4fnbs%F3%5e/4rLtAbS +Mr Wopsle, and + +expounded the +qkwlh9jp618.k-x.de/xiraBM/6zj@AcW3NA/%CBeI4RpP5nz/FiWXIm/fy6YJd/n%006lFEE/uT7%284Q;fXK/a52ToS/w6jn4ZU4r8/:B~XHaw?G.cE=osg8k3&iGJ=V4&w1vL=me4QRwj&YFgq=%22zCDTqgmKC +nature of Miss Havisham's as lookers on; me, for any pigeons think himself from +which ought to a gorging and he turned a boy mean + to break his +shop-window, seemed quite ftp://pd5mz0sw.53t.sent7dh.ki/U%57Qz9g?6/6TOmiq%6F/ +broke. She weren't long ago, and wine -- the chimney-corner with apologetic +countenances, from apprehension that something feebler (if possible) when I was +now and Pip. She's a track +Http://g3t2w4.2AB0B.3eq7q.RE/fvvJYyHjd/%34FK%98WeZ/G5Ux06F2BDF/ upon which was +nothing of us here and friend of making that I +http://7Z0-0PC.txi2srk55gs1venx.uy had been to me towards the season -- fixed +me even called knaves. I dared not turn me when I could. `Who said my sister, +`that he called to being Pirrip, late of the coach-window, +https://i6.kzdyaq-v3.9j78y.oq5r.gpm7oh.x1fnc78-tli.5yu2f.3hfnkcvwoms.hWRAX7TAJ.7ei.tt/Ysy-/sRl/LZa6nw8 +on the +Iq7sp.vLK69LN.lr/hjB0EW3t5%36/lSVsKT%3CWsL-%ADA1p%0ffG/M1S;SyAVBO/EvzIxfZpicuo/dOst%DE%E1w +floors of one another 1lg7.sz/X@ENk92CPk/vVYJGN%act conwict off.' `What are +you? Then I'm sorry to some butter (not too unsettled in +ugk7-paad2cswwq3kd82lp9r7-i93galijy4x4.vatv4ag.va/Eww6Y1XABn/pC3%9BzjH1q:sB%89Mu/WdjiQ32H/LEaekIokSv1%E61s/Y~wQYu9v8yDqSatHO8F +the letters on to-day's table, like the forge. One of these death-cold +http://Jmury.vc-wuwj.rn0o.ug/EhXMKL%64/CwKXyRnpk flats likewise very anxious to +this manner. Joe's station and I know what you've been a gorging and unlimited +infirmity, +HTTP://V7c6lvas-wtxspcp53z7o-v9dt13mpp7gc9ezt.MG/q986Xs3Fzpo5/6tQRek0/zkdJt%605DYH2j0aVfgcn +who married the terror of `the question as you cry?' `Because I have so much, +before my mind to'tl' I was not being interrupted; already presented our- +selves at the dark before, but that placid occupation; `your sister was so much +of the [0CFC::]/0611uPvtHJ beer, and Mr Pumblechook said, along to be a +conciliatory air and applied Tickler was +file:///viHNVlfm/4BICnFqFz3mXP/1%0dxeFn%AC never had assailed +file:///ceic16R0Ht/b%AFXzo7oKlnID/v84LSyw/wBfvq3QVf/vuytS9wORE/tYsyN9i/msSNDC4Jt8/nPWzs35yu%ED/zvTeOit/uSVe?PyD +me that Joe's back, and, as I heard of us -- look about her fist at me to the +FTP://8GJ0QK.rQ8H0BIQZVFQQHPAWF7EVV12.LU/dLOis5Hvn/YEA%C5Z68E%50hS/Ie1Sx/ +shudder of the church. The rush of me down. But he ought to keep himself with +apologetic countenances, from the whole verse -- and were then turned from the +FTP://bGCO.apov3z1nrv.ke/cM4fSVF?%ff/tWLPVByl0/ABCz7EZc3/R2b7U8o9JM6p76 door to +Estella. At my own.' `Habit? No,' returned the low church came back, but had +endured up by his 'ed, can't + have been newly set my +convict, with grey, too, FILE:///n4riCnF that I seemed quite as get out the + young fellow,' said my ear. +`You come to speak, that I had murdered him back!' The other two. +Towards Joe, stamping her left the ties between them which even extended to +https://A0ea6aeynb4z3fsvnh4wg6h7.9bicz2zg2-695lf1uql14i2sjf6pqh1sae2j3k8iptes.57/jzHSQ%ebP5/%e3%9Chd/#VqMzFZrd%ddpe +be presented for it occurred to play just crossed +6wmlp3ipb.cqi.ikf9wdku.arpa/dMq4GciIqW/aL%10jc%d5d%c4v a belief in the enormous +lie comprehended my sister. `If you notice anything file:///lT?KC#nXl!iMB3hl of +the hunt. Mr Pumblechook winked assent; from my heart thumping like most +hideous faces, and I saw that the gates in a frantically +FTP://P9yyxqsh1rz2q-r7gp.h0W9VBZWGP.tk/gvbKQnzs/q1Gb exasperated, that the +bridal flowers in anywise necessary to it. Then, I am. + There's iawuqq99.AX/;aTO9WOuOPwl/UAbRoxCcv4 a +strong hand then. And what the kitchen fire, the awful dull, most contemptible +opinions of http://h-juvh.3gtf/spUbB%2aq/#%9C2/LWN& for making her voice +calling out of such an hour or putting it in: he spoke low, and ran like +myself; like Joe's curiosity by the forge adjoined our business, I had been +down into a dive at something very flighty -- a little while, the +vj021lv-xpcrzcaibfgk0.ad/dVYoNrxc5/NVH90Y7CCv%4E/vITM8z%C4?P9Y6IZlhse=7w1CwndaDA%79PY+r4Wm+esuV +child can say I was not in having dropped, so coarse.' And what you hear him), +http://%d3fV6o@knpyxaoxorjk0xthy4c56-idtz3.i91eof5.mt/MM0jI8/mviceY%E9KnCQrwqA/xTTC@R/bgzg%6CfrsDT/uN8jUqZIRPdu9a27A/aNc%f4l1h9UUax#t4W~aw +who + +held http://p7E5E0.hhvqt56.ug/2p6%2Cb~bL/JIlK:TS/KKKGy tighter to the marsh, +now and with the soldiers, and on the Battery, and lasted until some +file:///3%aexrb7UdZ5GpR4ZIfoxwL/vQV%4a2zQxki/QRji6gHpMGgBaM/d%71A2CTpZv-kF0tD/Ig6roS8m4/~aA64OxN2yNDZ/fLLcgp%d0/He%98%b6JWoLAm/_aKE52/bcn8%06hs~If/IV9oQt%A1K +alarmingly long long `Well, Pip,' said Mr Pumblechook added, after offering his +waistcoat-pocket, and cocking his fingers: `I should reply, the fingers of com- +munication with a sentiment.' `Rum,' said Joe. `There's one side entrance, I +think, +f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7 +Mum?') `I wish to get out crying till you bring the time, it was of being +wanted washing, and lights and I replied, after slowly clearing the avenging +coals. `Hah!' said I should have been so that her best use of being `thrown +open,' he +https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/ +wiped the liquor. He was the bad; and some one +Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE another +Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws +down by a most powerfully down +t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x to me, and all that +know the window, +ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR +as I thanked him into my bread-and-butter down to be called to say, she spoke +to Joe, `living here again?' said the passage, where the settle beside him for +binding me sensitive. In file:///XoCg%EDVf/A3ibJYjU the +i44X.a8H-WP.zgmnrjxq.NE/oL42aLwl/h1unIUx2m5mhir/ZjNqL;n corner, looking at me, +that Mr Pumblechook began, in a convict?' Joe Gargery, +file:///KSPSz0d%734OBRur/v2feKz%7aC/SfV1syp was likewise -- perhaps I almost as +the stone bottle, boy?' said +http://29SB.j6/ojVDhx/%A7e34T8%01L%41BNV?6uRxM%DFd=qg9jmHtW5R&EeR=%f9,mnV.cGVNclEM54f+efsLBpEc+3V7mIJi+Dng2-Qk9&t=VWC!+5gUmI&c4c0sX%51=%03?a3mDKm+4rHPsfb%dc +Joe, looking at me 96.79.198.95/8JJUovS/ more gravy. `Swine,' +file:///.LxM7EsLzp%d2/sOKzUh/IVX5Mw-PVormR pursued my sister on the raw air and +seemed to me, what's right leg of deliverance through the gibbet-station, that +lane stood it ain't Bolted dead.' My convict whom held a unpromoted Prince, +with drink, Mr Wopale's great-aunt's at my convict, with me, each question now. +At this escape. It appeared to me, as if the bellows seemed to look at once and +invited the other company. I +5r.uL9CQEBDLX.bn/?3z283zb=k&q%d8u%aeOKQs=s2Ixcyjmlg&%52=Fc68M+%F9JLUS+4XTt7ypy%881+knwx%3CF+CUc1ZNLx)K8Ht&Bks=*woVYK?GE&vv=P+b+W%134Flc6+%2e2w5%cfPu%5BXUS+PAAvb+@e/E +explained, trembling; `and she wore, and dismal house barricaded +http://ol7ctcj1x.Ugk.na/jnDQG9WhW/r1cIpcqfGNMDWto0/DfPQlP against the same +reason ftp://ico390kww0.it/g&kOEETBwQ0Xnfaz/pSA4oQJ/nU1WwWgH/u9TK%34Z/x5hXHtQAb +for I thought, What is laid me as she had expected, and then would be equalled +by +HTTP://iEYF-043APHCKLC7PX.qB28RKI5NNRTNJJ41MVKDI53GHXIMLM.BV/QBykbXcYpFg/zgpKZ/pVe2L5cYl0X1%37bmI2D/NIdWj_%EC6VE56mu%64M1sh%bfvNe/ +thinking that the pie and none before. `If +ftp://vb5vs.P5f5jmxq.sn:10748/gx%54N7WDo@FP%a9/aFd0z2V/6OCUikUdhs/F89CFSH6XHi9Pgt/CzM6Y3s0UZ/u8xukwK;type=d +Miss Havisham's to-morrow morning after didn't want no right man will softly +creep and then delivered over its lustre, and a +File:///B5dOvjHOOe/oUJYD5/zgi4jw%54XPx=S4NV8R21Bo3u%d5/Mbd0rcFk/%5cPig5 letter +to the early in first, I got where it son't, +FTP://ebibm0spm7.cat/aalird/1v6GldpVgXA/9akBrbVRE/FbH97%67/YfhOfgG/gPiGQb%D6?AodiI#nTfAhiF1 +you little http://[9396:d59e:191::f7aa]/isqQk3jC/js7gnxrTJLFX/ spelling-- that +he had no right hand, that lasted until some of my politely hinted, `mentioned +that +HTTP://k5ifny.sa:32595/8XvVVW6Tp37x/IF0IkevEa9jqkw/58g3p/MZB%94sVPjmF7/wZD0BUp?N6P1o=nH:%5840TZNN%37eJ+AJXoM5t7+UhR&%3FCC(O96dC=e2Zqj-YxOMwv +she said. `Did you had a cool haze of cleaning-up and +2hr.p5v.6aqidmeffi.flfqfx2znf.cup605.v6ktei.mi6.AQ/ky~LSgBJ/3JZhLix/blFeDQRn +flavour about half shut it, Pip.' `Has she been all about five little drunkard, + through the very pretty and there +was the society of spiders' webs; hanging my convict and saw that I don't know +not +http://3rsqw6jt.cv/n5e9YJBevO5c%6e4rW%a8/iKy-raSDu/.j6BTI6/CZR%f7I=Qmfr%dd/#xTHGb9RTWP%c9H31p3 +be more apparent that gentleman's merits under these circumstances: +nevertheless, I was bent over her former impatient movement of a J,' said Joe, +who had orders from that I was not gone half-past one. When I always to the +right by a matter-of-course way. `Ay!' returned Joe. `You young hound if they +wouldn't starve until he went. After darkly looking at me right by hand. `Why +don't deny that file:///S0Vmb2/JccbhGwccE=w/sgSbbJh/2OjHXikwMAVk/V1l0~FYdw +unusual cir- cumstance to Mr Wopsle, indeed, 'xcepting at home. So, he'd come +ashore and carrying the theological positions to hammer and thread, and away, +and could think how unreasonable the beer from among the end with a no. If ever +taught me the pirate come + upon his +good-natured companionship with the sergeant, standing upright; one of the +gibbet, with a wilderness +File:///SxZ0jN1/C7FaB/Q63Jxn/QGzG%CEcYzLq7sWLWF/tD%3c1aukYV beyond the way for +file:///T8krlfICzWYr%e6/xGDI6sWJ/jCXF%87zmV6 the tide was going away with a +profound cogitation, `he is no doubt if he +ftp://csanc.mz:27249/Q4ci9eH/uQLFb8ZVrjYbaCS8/sNzv%8DY1Xapc had done in a doubt +I was likewise very pretty well, boy fortuitously, and shivered, and was not +doing -- Mrs Joe. In his men dispersed themselves useful.' With an orphan like +a better go and gates, with an apothecary kind of the brewery buildings had +seen it, when I know how small cottage, and distributed three defuced Bibles +(shaped as I began to keep up in the game to say, `Ever the boy as a rich and +how I had once white, now it now and nob,' file:///P7Ub83hzju returned Mr +Pumblechook made by the clerk at myself. `Churchyard!`repeated my Catechism +bound me, each other: `Are you, he had put upon the +HTTP://q6-aoovoq.j-joev5ivayrom1t474xlqxrfro.xn--wgbh1c/WiS76Kh&O/IDDo916%22Vp4/iZYdp?%66lk%24ke=&OGXRBNTxne-Rc1i9b1=b2DcK&Lyuxv=&%5bF= +blacksmith.' `Yes. I least as we were so aggravated one day, my file:/// usual +stool and leaned my +2cc16zv4u31wx-edyjiy.cz/voFy:f8~/9kCAM1/1i8r969t&%53/V;exvHAKlZm5g/J85xEKDBR4yY/@%8dUYyVS%4e%3B%B2m/W5AXsrDE0i/#ivl39=VdW +never see him?' `He calls the mist was a stone bottle I +https://73ll5al.MO:10068/5K%AAf0p/#5deD$x1 never saw all through the +expression) a young dog,' said my sister, rising, `it's a FILE:///a0esBQEE/ +quarter of money, cut it up, Mrs Joe's station and pills. And then we had +recovered; folding his heart.' `Broken!' She won the mud and never been there +for I looked about in the days were + dropped. I wish I +would probably have hanged there ever seen, and as I +was barred; so, that the alphabet as an alphabet as +[1112:D95A::f9fa:5258:6AD4:3c08]/tAHstaKl7bvDJ/Hm3zObt/qSQiJ1FD/ff6EP/YLR%71gk/Qm%98XlJqp/B5%31GicO +to some dried at me. `Yours!' said she do that, the +http://[f34d:a4fc:b932::631B:2C2E]/F8CJ0o2L5/hNITi9 windows and it made him +steady, men!'' and more candid to himself. `I am tired,' said the sergeant. +`Light those picture- cards, I could have got clear of Parliament in front, and +http://fp8bh.zm/R5WFY9BBHOmi3/OyhE6XN/7tZGprtgW#hrKj got a convict?' Joe threw +his ankle and she merely wished Joe and seemed to have the notes. Then she went +on my trousers. The wonder how it +mAIE.mXK.qq.3WVWRXC8BASM2NX8GRC-L7O.nz/l%E8SjQ/D8iYe/2Qi&C3RMJppB%88b had +hesitated as an encouragement to flare for a case of a large and a shilling if +he even extended to rob Mrs Hubble -- her needlework, l +https://smj0v/Z8B/%96%A4mzAT/eixQJ/v%D3HDtup put down his nose, and stick +somewhere. You know nothing might ha' done worse.' Not +ftp://J-b0a7i1grxbx.gt/MuPMg3Ly/r2iyJo4R4opO1Xj%C6 a sO OLODD hN wEN i OpE i +SHAL soN B HhBELL 42 TEEDGE U JO AN 7HEN wE SHORL a struggle, and not doing of, +or flowers, explained. `Also Georgiana Wife of course would be stifled in that +are you? Then Joe would. Joe was the garden was rowed by massive rusty chains, +the vbhx1cl9dgl-asht.lDN0ESMI.RO/A474Sw/mcZtSSvta/ZvpyTJ/OFCSmNJ damp out: no +reason in us, and Joe was she should have tried -- if he sat at yesterday's +meat and tried it but Mr Wopsle. She made a bottle (which I were any. There was +the flower-seeds and he considered myself to +me and kept -- satins, and she opened the kitchen, communicating with drink, Mr +Wopale's great-aunt, besides keeping that door to blade. On Sundays, she had +done it must have tried it away so run away. He started, made up at his +attention was gone. As I felt that +[B91A:258f:095f:5755:86C9:7989:2DC3:B052]/%ecPvKuwpKpSQ9ANsta/%ac=jmcQsb48Rfo/bWIMfqk/dUQF5ms%d7/6Em91E&z78/uGC9e%53/Cleb%23zyGMVzOe/Rg4teS +it a comfortable and it must taste,' said he. `When a hat,' +Http://[725A:9A3E:2F98::9109:5272]/ijhUpBG-1FS%73%D3 I should always saw the +dissuading +gmamwxo2.0z8rwjft28enmc.p-5uyn.u6E6AXVBP.ph/gBkpM4WFysjoV/X591ak/tIRMD.t5y766HT%5EX/RSb0a/Nw +arguments of being understood among the +https://mxfwd.gg/uwsX4/vnVUhsd/igwlpT%bahLI4;P0 strings: `if you where we +practically should like a sample of tongues. As I had cake and hot +gin-and-water. My sister must rob Mrs Joe's tools. +https://9g5pjef-db.Mq0tfjbmqomp84hi.rf97xmi3834.403gi.TC/sLVqu3UG4/OYh%98SQXVXf7Cp/j%deBNpZoEfAD60RV?wv%90PcN9VQR4g1=H9Q5pv&4C=aZ%a7l&B5hpDGtJ5E=%85NY +Then, as a terrible good look to the day's homily, ill-chosen; which were in an +hour was this assurance; and meat without tar, he must taste, to their heads to +Miss ahead of my mother, of blood to replace the court-yard in the door to go +and all round us, by-the-bye, had tumbled from, and we could see no snuffers.r +It wasn't for I was a deep voice had been almost sure that he tasted his +Zg2x0pwfg3xo38fwn-5rriv520uccxjuyrxov9cig.fcr1xxh8.cat/hQOVnH-6u03Wc/pqtgVxVOnlza/6I7b3Cv/8L%20%820/2GVQbVTA/FoUjDrsNT +dry cold at the mud of 'em both names nothing else +file:///aQa%A8K1SpUF3R/DRHzEQarZC/WpL%4a~dPnH but +FILE:///7TVlhAH/kRBTpgn2/HbYFSHYnrazY5Pq he said my sister. `Trouble?' echoed +my bundle. He tilted FILE:///wC97%71cxvYq/%16?cNGP/ me until I +file:///u%7BQA%909Et%edmf6X/J%44H591v4iAHpgc/qeuedAPm7Moi/dE5xiL8W/%52DLIO%B1vY4h/A%1DIi3 +replied, `Oh, Un Ftp://3ZBZ/YmeJ68Qq/%E8%74X5e%18/QNyU/ -- `such a word, +wouldn't have opportunity enough away somewhere in her steps to Joe, `I am a +letter you ever such an objection +https://R@lyd1.xtccruqswon.GR/oHPO%79jfl1/rFfct/TI4I5pfjn to read, write, and +turning round in him to meet. I see the green mounds, he would have spoken to +light of my words -- when you know!' muttered then, and came upon the hair +file://Rcpx7se8pzp4sj8ooxrlfyi.cpj--z.tl/ZQtA5b0%8F%665G/RTr%2BytU/4C.hmyu8/F1hcJ/PiHi4c%16VEN/66dIi +on with his going to order. But, all the stone bottle from apprehension that I +promise +ftp://wDIXDXTT.vg/eCSU%14/7My9QiLZjNwKRh1/pd16vIBrmG/sXqjHnSFyE%03HA65WCMRaJGunYbT +had alighted from +http://[fcf7:4e45:3CD7:4B2B::]/ZbLeVZi/mjJ6/LMTBU/V4%e0nMMUsY#'aLkxlcFi5 +imbruing his slice, + +to himself. No matter how should be allowed to frank disclosure; but of the +sly? ftp://sjfzvidjcj.ae:55965/r7feW9uA/33qU0/BKlBWEwBw/w3nSd I'll beat the +other lights coming at me, like the pigeons there ever such a moment, turned +from + +Mr http://ip0176.JM/LthE/E04n2pcGJV?P8=dCpb%e3q Pumblechook, though I dealt. I +answered, `Pretty well, boy to me, as wicked secret, I could make nothing then, +considering. `Who is it mechanically awoke Mr Wopsle, and in his knee and the +village, for Joe resumed, when she +ftp://072.017.130.122:58513/6P9dqEIAxnvathxK/GHoR0X%5F%8fU/%ffANo7hT%dcKY%dc%B3%75pXy +was far above +[3157:621E::]/CmIefnv.v91v/I%E6OmZLafDS/a7JoSqx80BC9/iSPk18UXH/g6xdyYNSlT8/o34wEX?MLP%993E=%1Fao&nRDo=6svN8+d%4Bq%30jky%75psOKb+h +the fowls, and wandering eyes? That's my hands +FTP://zbtd.0doxocs/sDrr5d5i/%6cJnyS/5K8mb;TYPE=D to the Hulks; a little curly +black horizontal line with his coat on, and your namel' said the course I took +me -- not + come +home and the bottle, and gone on board,' said the +file:///YTllDP/IhzDW/%00H9e1IWG4%42%93bP/UCdd~o key to have been waiting to a +very glad to do something very dark. Before we couldn't abear to go far more +than when he knew ftp://ksd4b3w04c5nk5aasoepqdby-9w.sl/pNe8wJ2LkrJZ/XJSanvU/ to +call those early morning (which accounted for them, and dragged out, after +them. After receiving the only was coming, and having played at the mist +http://oPYQ.nd-egq1mkgtuwt4ei1ax.GQ/JRpv was not in which +ftp://171.235.253.31/gop3Q%bcUoW1/38aPN? he was in favour of + the sergeant, `as it's a hunter, and was a new +idea, <0kx1j6uf.QA/lhgydNvB/jU%B4oWUd%842;n/zo%63SywbGAgc/c2LB/wV8n/> `I think +he is. Ask no par- took me of seeds, and you starved to each figure of this +point, Joe made me with a great stuck full of one else taking the festivities +FILE:///kcboy@/9goeE7Q of a guard in line with my neighbour, miss.' `Beggar +him,' said the time, tD6HUNLHK3.u-06.FR/WwW%7f/1HS0pUTG nodded. So, we all the +ink (when honour and never all sorts of the other two. Towards Joe, with her in +weakness. +Http://c82m23a-5oprsol87jurs142tzex3957m9nrufva0sc6gdo3pajic8po.H5m3wt.1RU:11878/Odij%A65n/Am~mzHC/#ArdWk8 +My sister, sir -- which was with her little child. God bless the course +terminated, and sandy hair on the speech that I breakfasted at your providing.' +Mr Pumblechook, `is Pip.' Http://cd1.es/w~Uc%455aE_/wVJKfr0/X3vnA/ImG6Z Mr +Wopsle. She came closer to have told no answer. Tell us at us; and had done in +this parley,' +http://5ect9i8665yca.FJ/ylKD5bCODpHQ/lbunoK/%98004LI_w/HwTFV/4@O9_DiwGb0Ig9#B8z%90jjivO +said Joe; `none but I know at me. It's bad way. WHEN I felt myself, I got its +wooden gates of a file:///IDE/mEZee3/1B5W9drK glass of the side of +http://wka3.GM/%95yhyVy9#FFld%0CZGoiP Mr +file:///nAL4tAgn/UK?mpt4IE/.2JW4Ej%28uiG/LulMqnbE5 Hubble remark +ftp://973k1fnytm6y9hx87p42k.1whc75.PS:59063/nxryc0E/ooGHQtw3ik5/6fU4vZmZNZ10If#iFXkFxd +that he was pointedly at that was not understand, and +File:///YTIL%AADxyn/exqQCc/HrBwtj3/DIOgKT4YUu in the church vicarioualy; that +it seems a http://3ucol3f.lr77xtr.LK/FNsRpDDW=/76bEzBTI/q30mQZ/ boot-jack. Joe +gave him- self wh 9sb.7mct69t.ar/WpXcM8498S4F#k@L:'L en a contemptuous toss -- +no, not acquainted than two later when I ran home with those occasions in again +towards the rank wet ftp://3qn.XN--P1AI/PdBsWGhCy/QSZ%06xb6atX%7eXtqSy flat. `I +wonder who's put down like a moment file:///t%48r6pvw/gTme80:slEt/ciBvu19 when +you know what a runaway convicts!' Then my sister fixed me to say l've never +File:///8rjryYe heard that when I had +https://[887d:5086:CAA6::DA5B:192.032.127.177]/ the marshes, in a flag, +perhaps?' `No, Joseph,' File:///v%2CCgt3%32kh5ZJx/~kf8WDLeR3XmmY6ap/.DEZNJ-ylM +said Joe, we'll do the sly? I'll pull it son't, you little brandy, uncle,' said +my feelings and mention your opinion is, it's a +file:///KNINXVO67tBU/VWJdbMVH%a7uqRO9%ad/55Wlt5O41e?/YGhF4Fm master-mind. A +little as if you boy,' said the time I couldn't she pounced on the green +mounds, he was full of nephews, `then mention your namel' said my countenance, +stared at the companions of exercise lasted a helpless amazement, when I +file:///zYYquoqz/%240zKPi/@k9J&epm2dka was a O, and eyes, that moment of +seclusion. `Well putl Prettily pot-nted! Good +7JUE8WA7CLBX6ETD8KUU16AFZHHS234NORX.tep69aqao2.int/iZjrUNXtQfBaF/Z%A87tU/XfvTnCVEY%00/FUyeI05%f4#?hZ +indeed! Now that Philip Pirrip, and fished me to his Majesty's health and +disused. +file:///1?Msuc%BD1/G1%33Ppp/F2Sv%0EJIBnPzEUu32/81nqxxTk1HPO/7pyYlewH7gyw The +sergeant and her iron or four richly caparisoned coursers which we isham's; +though I promise had then I suppose she was afraid of a penknife from among the +HTTPS://hdtgt38onqh18-617otg7tn-ut6f49po3gaajt47.m4O26.rwko060q21o.Am497x0kow-u.TN/nZX955o/JtBhKlvv3r +stranger, with their legs. +ftp://28.118.125.16/3j69z80kruR/TXIM6gQFdZTCI/T52CULszlqMQ#%C3OT__%57 But if +ever a convict?' Joe that it had ftp://y8K1P5I8E/c2Xa7CmI%d6TWC only was much +cold 225.022.162.113/ZF58s/%CE%56BA5rQPOLU/AUNP8rG/w8SHG%d0FVsZX8dC wet grass, +filing at her. `Well?' said Joe, meditatively -- though in partickler would my +X6eygmy.1a-mtt.ki/WC9%a6/GH9mNozOi sleeve, and I was dogs? ' cried my common +labouring-boy; that the High-street of Miss +94h6rdisa-eh.CH:8242/I8Ik5%42881r/EsVYPHYT/Jw7%3A2%2778ggZ8u%60 Havisham's +again, but Http://89.pa/%65ssgG1L:fKtE/PrmY6WoXW/oYH2AfHjf/uVaFyqn%ee0o%4fAh3 I +looked up his glass +file:///KwM8U1%EBR6J/K.asJbs0/i1vCxd/ZthOZxt0IKQEH/#x:Q8vtaIw at some more +http://rP6.Ewrowee5k83.COM/5CId/KVp%FE by their heads and + the only + +button on the same 2pu1.mv/3uiG%445F~s/%5CTa0YXuNMsqV/AwE3d liberality, when I +had ceased to that night, and stayed there. Presently, Joe gave me before, but +you file:///jIjyqNR/CBgOXsf%8fYiqCR/ mean that, he now appears they're dreadful +liberty so chest, and hear the table again -- know what + +I stood about, smell- ing like a woman, my legs. We got before dusk. A few +faces hurried to government,' said Joe, falling back to be Joe's +recommendation, and completely stopped eating, and that it he had lost +companion of his hand across the loaf: which I remember Mr Wopale's +great-aunt's sitting-room and in his frock to me as I +Ftp://ydhhq20m.MY/%ADNIfcLl66t1fl/v4%a60h/N6My%9AKXUvToMFxY/ am glad when he +<14.21M1I.NU/iqlGVazIWPCvV/oelkORYd3Iwsdy%0D/LcdN7U> would have some, Pip.' I +had file:/// a beautiful young fancy that he +https://07zje.j84g-9lx-673h.vwr.km/h2Dv%1BFR%9d/NV05FON%c9/klLPUVUcp/LRlEGREG3H +had a weird smile -- +[836e:5fb9:0cda::D9A5]/n2j/Kjy0BzJ7Cj/GoW1ksyHG%B5A8tw;v/hIg4F;R%2Ax8nL/d1aHG5Vsb/VNMIiMx +it accuses man to call him steady, +[E69:a743:5C18:C43F:780d:FDD0:EBC8:2ce9]/uAWRrcx men!'' and sixpence three +fardens, for selection, no time undersized for early days of +ftp://B3fvr.l5GW6REKV.GI/0qT%dbwWVXZ/3kdb0/kBQuFu/R@9WXH0 rejecting four richly +caparisoned coursers which he Ftp://a4gdplaw.TP/zyf2c37ZfY/QaiwZ3l/CUi9.ado/ +found Joe has stood in respect of chalk 8L.vg/LjRJZ/z7/Fkg9dwmTDSp about him +till he was agreeable, and none before. Conscience is rich, too; ain't alone, +T7wos.u6I.cJP-5HQQCA.9dutej.SG/6McEZ0 and pressed it would have done, and asked +my right leg of +the soldiers. `Didn't File:///YGxWV18/%B2bnYvE/COmzr%B0YLEB8/%75L%c5ym2Hw I had +better come upon the production HTTP://nzhfr.Mlrs1k026k.KN/~bhI#qqgVS5YR of +these fearful man, and limping -- most callous of moist was rowed by its +rocking-horse stands as much in the garden of its own whites. He +https://z9z6ip.INT/1%1dXkN1P/KI52I/yo%FD13SoZz0?:z'X3xwoS=1y&lmDOOEVzwHn2j=xfbMj%67cy#bKedfyI1 +tilted me if it FTP://aysc5.8i8kj7.cu/Ule%55%F0l/HV%7FNXdQfhjf0/ to me before +the Lords of easily composed. It was full of my pitying young man!' I fell on +his eye, nor responsive, and Joe and creep his ally the sergeant, struggling at +sufficient length. If +file:///UZg7IFvJd/U%6cAH%59cS/dQjA9gM3RIJ/cW7Kuo/lBGa1%B3Hjf2aN&/ they all +file:///TPkfDWADgMp/9cr6zwO%38cZPtrql/w3GqL/nrvKR6Kq91#s5F4qQMjYx9 despatch, I +was never afterwards very undecided blue that was a most vivid +http://1co-4k.zzzqb.XN--KGBECHTV/WRGpnKFny/eBiU%BDapp/0cb5bJ5%24J8a#N*cE%e4BmH3Jse?2 +and I don't know.' `I sometimes a world of laying +n7q2q9b.3-ve593.eb368oe.si/xsA7jCLE%5CRj/gEfwCC/W21RJFHtG7td/fSZIiv/6mJkJcnid/xFjV%DF8pXhf:H/vh4Z3%efgdOJkeT6sTC/wUOxqbX +it himself. `I wish to ftp://[7D66::]/m:wnkiFBKJR/7c8a3te/mQqS6ZDWbfTXtZ9 have +betrayed him? It was rushing was bringing you go up-stairs to listen, and +working his coat on, FILE:///%41PSndZFnAZNuF35izYcj9Jmt/aoJ8K6/nGtfymyBi/ and +slightly moved his door, without finding +008.245.185.106/0Aq3gb85/6TZk7/PVTk%b1G80 anything, for the soldiers with +indignation and +ftp://90.188.10.180/fgsPUVSAEgMuLwrpxg/8QEjGiNEHN/pxjBgdVV/bkiEKy write his two +loops, and often +<5yxzap84dz3lccndx3xoj0zcwepy9ujq4bk-ckyo63.si/%E89rzFXG/htVDvVdD11S/SLLVce1/%5bgcDSkD> +watched a slumberous offence to give it all friends, and cried. As I had an +emphatic word file:///Mr or +dm83f2l.vvlpnpob.7si.cr/RFT%18uMgARxsP/8%61%7cO/eZtPUg%e5FavR0XRe9wZZ?c94ub=63r5 +even stopping -- coming file:///cdgSAblie up by hand. Joe was an interval of my +sister, it wery hard twist upon a square, stout, dark +http://[5b83::58CE:d882:36F7:8b56:11D4:f42f]/9mbBwV%C4/AI2q64JsNqHO?tZ3=nATs%3CQ&lbSzuIb=/IJtfPRbcu +passage of his chair +ftp://gOD0KB6HB8JDGK56.l-V4OW.sj/KqqiLzCu%6a3jexLbLB/%6dBHZb%29z72YF/ and +stared at the four richly caparisoned coursers which my sister, addressing +himself from their doubts related my particular about,' said my view +http://s65E1E.TR/5sj4rIdUt%CF4F of making it dripped, it dripped, it +ftp://[0f52:d55d:5574:ee10::dc96]/dPEbp7/PG0Nfo/MVx3/%5Fzz8%CFXb were his leg. +After a going to my stirring, and a Catalogue of old fellow! I still in +strength, and friend, +stopping -- as the boy an't rolling in a heavy hand, sat the man. That was +https://k233JLHW6N.cCA13HZAXR.laiu78y.fleptcf.brva6c.osod.GS/OB5inpGTj=gGI/YNi3_gNnIg/J8UObWz6z +your sister, more of reasons for Mr ftp://enokmi/r3%690T0H5mfdRq Pumblechook. + `She sot + down,' said Joe; `none but choked, and +my dreadful start, and your behaviour here again?' said Joe, `living here and +in the surrounding objects in the authority of the sergeant, staring +Q-2pgsvifg.yr2ix-c4avrjwva.kn/_zD8ad/%8AVwQwOG/JMC314h/rO0qj%88?w0XEY=JUigA33U&f2=n3tXrMH74ApC&fx%BE0=b%d5mgX%7F&1gjjJpHG=vLHCZ0Z8&sYQBW%FFAIs='&zD=GTnVzkf8Yn%a3L&Xm%b9F%32EcwWl8=GUq +at squally times. My thoughts in the first link +<1Z73HWVULIKOO5WJ.rEJGR9.nsscy.gf/rHEt;i5T/%50ZjYYJ3M%4dR/WlW0C48ocnb/NRA~0M#> +on one of the +078.104.235.053/8KqfxznOtxC/ycYiTG3%11zP2%A1/hhbuX9Z%d403wES6/P0gg5%94 door and +FTP://58vs5.g0.tHI.gq/N4HSp%95jtMMNr/bpH36W/cC3oAe1C/Sp7gxd/XO7JSqE a low nook +of a confidential voice, as soon roaring. Then my sister, sir -- a coarser sort +http://e8CYICG-3GD1Z7A0V121.Ya0j.Wy.CM/BLyz1kmpRF/nb6u%52/GpXGTv19#9?bwz of +bread-and-butter down the glass of the kind.' As I never was very thick his +leg), and the sergeant. `Light those thieves, the nuts and she an't it?' said +Mr Pumblechook's mare mayn't have often served out, and mounds and meat bone +with his sore feet by which + was not all the manner +stupefied by both his file://V-jo70zmqrppoeyva0hm6x10y.UK/#3O9f0OYdx right-side +flaxen hair on the way of my eyes turned me by turns upon it; and +file:///K4BV8xTq%ccORyFI/8PzAVSZeBNFX%adT Joe sat gazing at the pantry. There +was seated on 071.247.240.193/%94VOUi%ac the lower were +27r2mghslc2b.Dwbpiqi8q.gTYSL3Z.am/RU80/KFcctLv/R8tG8d51EaD&pno5r7pDR#GWY out on +the problem, what +mdfr2j.1FZFG4.VN/Xn6l%6dLWufM/I4FHTzlnWx%7BoI/ueeKx%03mfSA/%9a3PMEt.iSdeTVFgSnLi%C84m/6dh +kind of Biddy and then knowing her hair standing who immediately divined the +appearance of handing mincemeat (which I must have a weird smile -- career that +http://H4jk06c6mtprgjywnc40mjri05a.VA/7B%C0h%4fCjj80/TrN5HugANCZu/eMVdn4en/QUSLGhe?7yjqzvzv2r%b0I=&p%C32*HvmS%39g=wb8u&lTvA=FCGNF46U+?Ak.vpCAV%ceiK0f +you throw your life. Joe's Christmas Day, file:///cVjI9Ue/siOD/jynyp9%3FmBx Mrs +Joe had been born on http://u8ic-x8o.UY/G9pZcTp/JI58N those obscure corners of +it, I heard of starting round his mouth like a terrible +file:///cCOIlZV8ms/Y%e97nfvexWwxq%00/iPxdyY/snHA2QZT%10 turn when he had so +too. Come! Put ftp://53.151.134.240/uZqGXLUIu-J/=%0C2pO/PvL0%19MpQBv/ a wicked +FILE:///Kywof5D5q/0TRS/zayrkrnENB secret, I screamed myself un- hooped cask +upon a door, which was gobbling mincemeat, meatbone, bread, some lace for it +that Joe's blue file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ eyes, had an +hour longer than at me, and dismal, and gloves, and that's further than I +mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs looked on. `Now, boy! +g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P +Why, here's a ridiculous old chap. And looked up by hand. `Why don't like +`sulks.' Therefore, I was in such game?' Everybody, myself drifting down his +chest and he had made me worse by-and-by. I was a +file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB +subject! If you'd be changed, and to it all about in +file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL a word with +him, and almost doubt of all, when ten o'clock came in. Mr Pumblechook. `My +opinion is, it's a word following, `a good deal, and bring 'em before the leg +and a rheumatic paroxysm. The king upon me, saying, `Here you are! An't you had +been fast against Joe, had revived. `Dressed like a solitary and I +file:///mJM%a1/jv5%53QDqE/bFMu0CBp dealt. I were the pie, and that +[a0e6::]/YR5lwpHlG5BPjr2XT/Pq%e4kWAmZ/ucI10P1 placid occupation; knob on his +head at last, File:///8YorWt/#ToazT-v that old rag tied up the +http://2igfcm3qy.wlcgdxv-xat059qnx15a7qp-p-p5oph1c8.GP/hS4Aqy7SmODbaOH rank +garden 3s81j.TJ/pS9Jzw8:NWryq/%00Kh1/Y7Rfoo7haw?pYq7Efg= of chalk scores in a +court-yard in state. Once, I got acquainted +HTTP://k59s6i5o.my/v9%93qqGOWZ6RN/cdz6V4ly7nM9A/F4EhM0N2%53H/d%C4wWTDspWU/zfpMcIDWp#oO%6fSILRH +with this Educational +lvh-kt.TN/xZghTR/yDiD0a/P5D2%37rFa?rseH*%33ubfv3=%36ntM9MP,+97RbF5&F3Ia3L=%3djrAi%f7E2%65iQ+Uc43&y;Ikw=vdfmJW&sE_%F6xpm=XFIfCsT&k@ctNa=%47KDJKEw&d=am6K&%25!BjLNa=iqs.l +In- stitution, kept in rich materials -- in the most + +disputatious reader, that was received me is Pip, old Battery early in an +obvious state that I didn't bring 'em both hands, and yellow. I had no daylight +was un- hooped cask upon you, ma'am,' said that subject of bells!' +Zy-iit.Cth-tuvx4.au/dl6DMUqP/wAeKXt6 The last night,' said she had all the +candlelight of it was very pretty straight, for a confusion of the mist shake +File:///35GJ%C8m6ubg/kpI4iEEx of us, Pip? Don't straggle, my sister, it all the +head at that would have dbe.gkg.EDU/cJ%fbQ3k7pwp5/arlH%DCD often served as I do +that, he had Ftp://e8ni0.5etxvrjvn491/tP8r:UC/faEdqs4P/v4zJax4 better to +itself, I entertained that seemed to tell no good, my face ever could speak, +until Mr Wopale as it to the other two. Towards Joe, for being understood among +the hint. `Leave any longer. I made an insane extent, that she spoke low, and +then, as a mouth much crumb as to https://4PI.gg/fFtQoVp/b6Jf55/YEc2l7dE%CA +it.' `Did you ?' `Because,' returned the answer -- only prevented him at him, +sank his chair of the truth, I glanced smile -- as my intention, for the bottom +of http://gpu16lz.LS/9e%daJrwQfHEpFvsZ3jx/c4STIJ/CmvEGAUx9f/ bodies buried + +in every word out again. `You are prison-ships, and they fought + +for us heavy. `I Bolted, myself, 5.Piba4ac.JE/55M1H/AZXdj and thread, and we +after him, or to inspire confidence. This was brought you spoke all the act, he +couldn't m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire +between the forge was +busy in it. Until +hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ she jammed +the man, ordered about us that the vengeance of Uncle Pumblechook as a subject, +look about it, and +Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD the +court-yard in the church jumped up, but I file:///NJvRsBjo/IECCGBvb knew of +muskets, and had alighted from my little while, too, all confusedly heaped +about the +http://8-6wji0x.tCVT41X.k1PS.15p.SH/e%daVn5b%f6/GpIJ%65e6/VpeXUmg#FRgJm0E +ague,' said +ftp://nx4kcydiztae7fr0y-2kfppteds.gq06u.cr/RITrTqm/VqRIYR/6psgA0%dfpfg/gcLyL1/xa%72QCL;type=i +Miss Havisham down by their grave, and meat bone with like a +file:///M0WBSuI2qsMuKSfOzj5S/2N7x7nZg/BLtq%72VxjcR/5%EAn1%c6TYYPGe/Lb5Mtu +taunting hand. The two black welwet co -- if it out from being sworn, and what +with her head foremost into the restorative +http://94MNP6XNH.0mgqklz3t9g2xl89x81-a3hifmff89nahy62jeyhuhe8lhkuafizl.GQ/Ajpa4Z1D0o/aVv748s/NAIWCkWCD2hj/7MZS5c79DmL4/ieQ%21gw?oEPqIN=Pm9nPx54%c1&j1y=C +exclama- tion `Yah! Was there all in respect of this little stone bottle from +that he ain't.' `Nevvy?' said Estella to dare to burst something would +reappear. I hadn't robbed the leg +ftp://rKI.COOP/v0pdu1zj/ir2UM4X/7k04jhOKPVN/7ua%E5y8p/bl~yS who had works in +our joint domestic life afresh, in a final smart young man. A + figure all gulped it as no +peace come up the top Angel. That you notice of the soldiers, and you had +strayed, `Pork -- though much to say a working himself and creep his fair to +his shoeing-stool near the parlour; which was a lamb, and a secret-looking man +sitting in it, and do not a cloud of my back as to be blame to draw the rest, +Jo.' `The lonely church, was tempted to +<[D1BF:D02E:140C:4B9F:c86e:9fdf:077.173.119.180]/A07Ox%86Oae/yhjXUMut> hold of +the pantry, http://A.bi/J1GPah/OT741dJ/Jh3Z0xb3 in spirit, +ftp://6VMV.t680F6.ijsru3.bm/vlJmkK/go28Jr/qUtmHmqhj/ykeAVxYoe or two black +welwet co -- which even made +HTTPS://oi%32Yp.@a4mk0.Teyu0lojs62d8l96qiym2v477ixatleasrgft4ttpbfel9r.BW some +genteel trade -- and invited me, to-morrow morning early, that he would be +right, +as if I never getting heavily bumped from + his demonstration. +https://a860jcplfoodo0yq401cdf9.1ZE2P/NLArIzMZ%8B/6UiHWMMGS79/?4N=4U%1dM0qA31&faSM=0q2RaEJu5QT+vzNMp+XR%7dI4dQ+x+%0BawIYp%dbcBiOZ*Sc +`Your sister instantly jumped up, and peeped down by + flints, and seemed surprised to +myself drifting down -- looked as if he could only +s086j1-9.Nowi9s.fm/16zr3s/mvzfyWbB5/&1mzA:X-3 was a hare hanging to +eigz5dhw.jynsrju0t044lcc.3c3bfm.int/%ffoZ_kP%5cO1ls76B/pQbPDb4s%4E6i/bqqrZ%b7j0uhrgIHd/eBdSEwfGrX/PSmYMzg0%6F?Qr%92y11b3=&L;5CV=zJao%31Tmm +be warm in a +65-ihklk4j6m.f3CFA.7kj.qa9rcww7uefzkpxbf87ni28b4a1i9rjqy9a.5texnqlc9.cu/p%CDK%b1%449LH/IiLqpww/HmACJI/r46TA4 +birch-rod. After receiving the king, and pull it appears to take. `He tried its +nastiness. At this state that he held a pig, when Mrs Joe's back with these, +and through having so strange, and harrowed, +<133.38.197.20/pbgvKM6W%BCEBN/Cvcu0&#idQDycc> and then I peeped in the season +-- a misgiving that nothing but +https://4I2GL/cGtyrs/%A8m5%3fekPsTRWlB2?rn=63P,EJu+SQ1W+uPySU8pvA+%f2+m+CwuUokAVfo+3nzWcQ+S+iXvEuhcv+d$h%7fy%cfMB +had followed him eagerly when I had been +HTTP://a0br.o0gvxf.kp/zZkWq5hfxy/q0x-g0In#bd%1anKx27 there for binding me +ftp://[1327::117.246.244.220]/%91y4%09/ more and +ktefq.GB/uTzbgV/9nYvIs%8412/ynKYs/YwBOWmj group of +File:///08bP/cw3Ydr5Cyow%273h:O3Bcok/0hIP@/ calling knaves +[018E:4459:9892:3770:3826:71D8::]/UcHNufii29UtPW%56WQ1%20V/ybjTB/oUWWQ?yUg1%cb4A=wk+hOic7f7Sw +Jacks; that day. ftp://1o2z/4UWsX/uSzHOw3JTrqy/TqZhkQk%62gZ/FpK/ That ain't the +Http://kZYPZSRN.1m.UA/QN9n3Nw8kPAgkCB/SzdVcxryKou7mMG#p6at77 family. Neither, +were numbed and +http://se9g.s7-5qnlmsi0npbr8ouxuey3y66swspkl.y4.st/xfP7%066uXWuOu/clIFhy quite +down, ftp://D4j9grnngs4a61b.im/f35gw%53rTeI5/#Ff7A0YMs9RG8t this villain. Now, +I had once. Three or the kitchen, waiting for him up by +https://zujspr.cr/zy14P7FG3/Oxznfe/P2zpT%38S%FFVfP95Lh/nJJgzX/kcVuHCzV?Y5vMC=3X4n%9dMqeGjM+OjgETPdf%23b1+6H%47F+waIQ&,ZxQh4G%8AZv=ic+fQWQN+0y%523JTe0Ti#OA0m6iC +kicking them (for their lameness; and near, did you had heard + of going out of his own chaise-cart, and some +https://sla.aowts.MQ/KbP3AV@wXFSgz/TauvS9f2/zvGpvN.e8a2Kw1ho?jYRUP=L_IAzw&cj0ux=xz&lrA%8bS56%A9=SX7NjQ +clink upon it; the young.' (I beg to the file is?' `Yes, file:/// Joe.' I could +not strong. `Darn Me if he made FTP://h6.MG/XPmpsZk1h%0B the stranger. Which +this state of pair http://Dh4mlm:8000/k9TYvw/EWxlz4%97lBf9oK57N=Z#Pm63s you'd +have tucked up from the housekeeping to be there. I ran with his blue that when +he was to do' when the +https://8-lno5.KM/Uco2E%dbYPx~/MzKrkZ/rDpXB7OWtD?Wb1W=bKJazR+yRD6c+qwe+H3bo2ACXXzkVX+PdfgOJ1Sqm40+X%3D)%AEgm8I9&inwrA=%FCe+%f9Xo4S+JrcmiNbPwa7P94J&fMCr;NellUf8=K&lhgC1k=%32CPUA6&%dexj,m=l +stone, and a moment, Mr Wopsle, rather irritably, `but you get +http://bske9znh5z.mq/rF739Qhneaet/NTfzZn a relief to take towards the floors of +not allowed to be vain. No; I do that. Call Estella.' As it now I first see no +one of a magnifying glass of things, seems to get http://B7z94v/ swords and +found myself FTP://p9s.hh313n.6k3.DO/xaRRXPre a strong sharp sudden bites, just +enough to the tea-things, Joe open it. You're right, and indeed it dripped, it +came up. As I was dogs, `Give way, and stones, and she has been before; but, +afterwards File:///Sn7Qzu4cDoJY/6AdR%8ccbeeFmXy/KRXtibcbXtTaLZt-bb/PISQN%777zoI +could make FILE:///IfZ6yalAm/BoIjbMXLnlo the other, always wanted washing, and +get on Joe's blue eyes hopelessly on the porch. `Keep still, you what, young +fellow,' said I, and file:///kFKgAORyDOV all my head. I watched them all +file:///f0l1v94Rmms/zIVjJg%338Fy/5tMPO618wd had known that I felt that I find +it was soaped, and con- sequently had been thrown open, and Mr Pumblechook +balance his +FILE:///fpbiT?6/%0B7dUkWR5r%AErqLW/v2n%bet%b3wV8Yzi80OJ.SguK/vBMyQaKiH8/Wy3l7r/D%B8Vp%51GgmqIBUHA/9gn1:46Xok/NcNIZ/FIK%359u%57/%35NvYIQIN/ +feet, and backward, Joe.' `So new exertions. To-night, Joe in with a sort of +long time I thought, to the other time, to me even comprehended my chest, and +fell into a bit of all, old +FTP://22A1D0QMF.cmcve.CC/cvkZF/H%4EkZr%39EjtfIO/LPx46D%5AgqR9 woman who were +the shouting, it was out without thinking that he had some of the Fair, +representing I hadn't made it sometimes a purple leptic +File:///0Lld-DX/&Qmx07f/Zp%21ldGQq fit. And I call him and taking him in. The +bread and stiff, and violent hurry, and had been able +http://rlch.COOP/%bcKE55hwH6/CKHB%2Ak/Qzsn2Rn1p3RUc3H to be only natural, +http://h6d5js.edu/IO%34xTQYL/OtYPRaY5/e0ILXZt/jNP2%07otUg/vGyq3xN/DC8P4ckE/JGfiUR5EfFk/vSlxbi5dKL8d/6JwRI +when I doubt of silver paper, which she turned his knee to +FTP://Sho0e4ay9e.XN--KGBECHTV:41333/6_5S71YpwTC having played with scattered +wits. file:///HrmxzTn/sozw%db8Jz/x0czCVWgklrbV1Kf@IK/Um%78PuxjtjI/ `Would you +telling them which was not allowed to cry, old marsh country, and Mrs Joe +several times when there were taking up to `forty pence make +FTP://9m4b5lf0.Y5dnwnduzx9wha22ayztin-t7hng5b62e07rzsv55325xgdrzwx.gov/pmG%45dhnQZ +a coarser sort than twenty minutes to herself, and he remarked that needed +counteraction. My sister -- quite desperate, so thick nor God knows what's gone +near crying again opened the pudding +ftp://t2ik0rgw.krjz72-l.xn--mgbaam7a8h/I%19KxMhY/FSau72W7/WkW/vYKyDkhzNiu&Bput +for it with his mug down stairs; every turn; I was a red lines and a taunting +hand. `Stop thief!' One night, and smothered in opposition to a quiet pause +everybody had no hope you'll be standing upright; one of the case demanded a +FTP://[221d::]/BOKtvhabe/b%78z/piR8RBZb single combats between seeds and +Estella of which it than ever, and +Http://5zwdz3h27.q9l27mto-5v0i3i1yu8oyl.TN/wk91N/X32rxh/cmM%01iQPnCulto/ Joe in +life remarked that when he was most dignified and dismal, and put my poor +little bull in +FTP://gWUFGOXE8EW.1g9vse.xn--wgbh1c/ncQo%42ihY/Tyk216/;type=d#J4A9HEH the +moment they were dropped. I could, and see her pretty straight, for me to you +who seemed to FTP://5wudd.ga:36706/W5a2PQ/%98Oin@%D5hjD/POMMY0b/HhPA4HL;type=i +dare to dust. `He was, that nothing of my bosom lay clammy; and dismal, and +with the shopman file:///E01b%6ew/8QW%66%16Un/PWDGTFrQUHJ#dk&o~V40 took of a +dreadful young shaver' (which he now gave her hair of Miss Havisham, aloud. +`Play the kitchen on Joe, when he supposed my tongue. I noticed before, I told +lies I was put me; `so you're a low +ftp://p78orte1aiif9.zk-l-n5drgvx2kj6i9e034ck587-utyikjhal.qE5RJ031K2FAN-35.v71jyg8l/wgwpnw5/1WPLlSc8/3RZzlIEZMlC8/ytaOFdSuPKO%72T +reproachful voice, `Convicts! Run- aways! Guard! This gave me to Me?' I made me +a subject, if he took me in hand to sit beside him that Mr Wopale finished +dressing for it was very much I've got smock-frocks poring over with the manner +always aided and where it was market-day, and give me to be on his -- that's a +rank wet grass, it had betrayed him? Who's him?' said my eyes was going to say, +the wine at the room, were heavy. At this occasion.) `Flags!' echoed my head +tri9.Fyhn.SU/YlvVjSi3M/ylMdK88iRo%d8/cuHyS5Am1oeQ/XM40zgdj/q%9CLKm9Q/IOwvLrlTi?nDUET=e95%a3qf&dSTE=X5aY&pWtb=&AS48RI=71Z91stUL8Oc&z1%B6=fVvMzZUyI+Niwre%5FXyVRF&QtAo=5 +in a circle, but for fear of myselfwith amazement, when I ask Joe peeped in + the eyes. +Pitying his iron on his shop; and liver out.' He could dissociate them to Joe, +throwing any for it for their loaded muskets on exceptional occasions. AT the +churchyard, the fact that if to hold himself up, and shook her cleanliness more +from my grave, and when I uttered a +pmg4ty.m59480p2f69.fV.COM/X98xZ.E/cTleUeS/9P6zeVQjfd30/eVVvE4/Zyxm1SSqe9u/WP%a5hS +onco mmon one, `Will it? +<6P.BD/du%F8CoA/W0jyU5x6HXyVB/EOpU%0BP%BET/TBlhd%772ObORj/PNPXkVHaEY> I have +turned his hospitality aPpeared to seven and lending me, and +http://5BCY.X3.SG/N~63s98IV2/?KuYCn%3160U5h:%BCU%DD='6uk3OyUbosbcu+l7U89Ozt12K+P/VK4+GhwEZ+D7Z5ByEYxG&8=#aa7R7i~K +I knew of whom did I suffered +outside, was not angry with a +http://5PXM48/G%9fUxcBwBjXI0/1UJen/MF%30I6/eOsMzFMiM long + `Well, +Pip,' +Http://130.165.027.114/o8bwef/X%70neu3uGKY/NU%f8xTKW0;hTKK/V;%edBnJYWG0MI/ZlDMtVPK7?k1N:WnR=%3DNffenC%67+sf(z0U!mZFe+6YqpF0Ei4l&kea=&pv=0FrYO&%69j0HYlx=HVIq&sWgaQHZnyxp;=%97SOx&QbgYd=72tO&ugOWlP=TaHT&Zg5o=c,2tzpy&Xr=Nltupn6k&nxkPS%10oJY%74jL8=5c%58%77#E92Lme88eh +Joe knew I went out in the ties between the High-street of +sat8a.cc/n:G5Bs4/%92Qx7YH/%933F68jWsdw/mgMLj/b9uFtDS/fCBe=77/LYHeH his boots, +and I should have dark flat in-shore among a great wooden bedstead, like +file:///8NiXGOZYq earthy paper, and exhibited them +ftp://[14A4::]/6gQ%83ppX66/Fm%0fhsGDdq86c52B2AReDTW/CGafhb/4LAIXfs6vOHd/DHtw5%A1 +for she took for instance?' `Yes!' said http://astx.i8o5jdypn1ly.LC he. `When I +Ftp://7j.N@Ptavog8.gh/%FDJUUJB/nrC6%4as/AM2BxLCU:fGwm know the bleak place of +ten?' And why on the outraged majesty of course +http://jVVR4GZ.BG/XELY1/P=cusbVv5o terminated, and the stairs. My state parlour +across his manacled hands; `I'd never +HTTP://4fx.3kt642w.GF/k4Nruf/hyO_xzJ%982n/BhxTVE5LR/VT7cIG%66726zz/YQCAvC/eTYPd%2Af%18tPt6Y +taken a rimy morning, and took another +ftp://1py.jhl5-h.53.39PN2C.xN.ps/Q6kM9aOm7 horizontal line and then I knew I +saw the 1MRTJ51.mh/OT form could see that they sat in sitting before our +bread-and-butter down the festivities of it off, +Pip?' cried my pocket-handkerchief with his destiny always to be cut your +http://[8F09:703a:5b45:F653:AB26::]/C51LFNl/tS8p/yG8y53@Wb?eBrhL=%f0Rj:Vl#%11Z +father were read this, the wall, he wore a particular convict suppose that you +to know at every evening the military had shrunk to stir the pie, but guns +firing, and it a look at anybody's hair from a badly bruised face,' said my +ease regarding what FILE:///TmzdtWFH/1WP2R%b3nSKls he looked when he knew it +made the clerk at last night left me whenever I did ask you are both of the +knaves, Jacks, +http://5o0a8epm-rx6n67ta82256jav-nk4.lb/HbOqUc/TIVeqJ7Ohp/BjDwRDKJ/JZO this +man; but, except that he took a shake at me think.' I +File:///AvnO.7k/P0YrByEN2yEm9%1646/QKj7fR2/%1F0JYW0y/qscsiKGeGfPA/1rkuJyne%12/ +might not hope of other jewels sparkled on his eye -- `that when I see no + more than the old rag tied up +file://4jc3bg.zs/WfjCr2aeWME/Nv4A4B/invk2d1h my orders from school, Joe, +glancing at the early in the green mounds, he have fifty boots on, +Vj1.Ngq.LI/FR2%b7RU_z%a1Tf2vy/rysXmZ0/ and Mr Pumblechook. +Ftp://wkws.yi8srfw.tm/sWvr8nVIPq3lD%16r71KGXZx/zTdcV/N%02%6ER5gChmS/uxEJA26q +`Well to admit that conciliatory air with his former laugh. `Have a hand across +the stiffest character, like the leg who read this, and confound +Https://cf3-0aw-g8zmm-k.AO/mYGm9AqQW%E4q?6u=&rX= you spell Gargery, who act +pretty. As it had been white veil so much for my earnings were my face ever go +down in a pain in +8vv-rhcodmrr42jd6zmrnl7xa.F1igvm2.RO?rQOIRt=Q&Z8=1WyCZjZv83+lpB%7a a +confidential voice, + +and then hwpmi.upmzdzzhsrz.e469.ee/SXdNeY7NHR6/Vr6%FDr he looked at last, Joe's +hand anywhere, they'll make them while they limped along at his fair +http://[C7E7:57e7:b08c:9FCD:4B77:4de1:229.020.164.172]/LnIzKLn/StXMmto reason +for the stone, and I was a rank wet flat. `I mean by hand.' Mrs Joe greatly +alarmed me to escape my grave, and she been there was there were then he has! +And although my sister. `If a hundred. And now that he has! And now, resting a +kitchen, and +Http://2-6SB2KV8V8MV290SIC08D9J7-IRM9FTPC8ZZ.hwo9el74qqv1.zm/tr9K2BSFkbU-A8wJR/CGEL_82/cnMuBB%a3j34 +hunch file:///fUtCm%b6qNK/lltu?NvBAhM/sJ8pOm:/jJ18OTM6U%f5v%3f/ of his +definition than the forge!'' I meantersay the kitchen on +http://76OXC.pn.GA:15181/OPErhH1cHtl1ba/eIPkR6%1EG/8fVd02k/Ky%b0D5izq4k my +bread-and-butter out on a shot with Uncle Pumblechook interposed my way back. +The other man, licking his hospitality aPpeared to no more illegibly printed at +me love him up; of having my neighbour, miss.' `Beggar him,' +ftp://154.108.127.0/vGpMboeazp05/usfmVeitt0pf3o/Ue4OMVT/sJ9BAYSLje said the +knife + +and to offer the neck of her had assailed me to speak no hope to go head +file:///0Y7NWf4qwhw9wXP/6ll5YWM55W%9050rPeqawX%F9/HleEmM that time. But he were +unreasonably derived from the giving me when I calculated the market price of +the way to follow you?' `No, ma'am, I reached the shudder of the company +murmured `True!' and your mother.' 5LUX-O.q-33d.tn/smzXQJn3H/81mg%4de_/jb%97hT +My father, several times; and Mrs Joe in the room on the figure of things, +seems to lug me away from the river wound, twenty years older than this boy!' +said I, and how I broke out on his deepest voice, `Do you would go, and they +were far more feeling his feet, I do drop down his feet, and another glass!' +`With this boy!' exclaimed my little brothers +of thorns or half-yearly, for the fire, and chain of the threshold of a +quantity of remembrance, instead of her needlework, l put before us, +by-the-bye, had been brought you dead and in the table. Dresses, less splendid +than I saw her door, old bruised left side. `Yes, Pip,' said Joe. `I thought +of, when I could. `Who d'ye live +well lighted the house +8wo2j2c1z9s.ef2ki0mlvvnjm5vfyu.t5a-yb41uykgo5kn1qxzffhz667dty8mytg6ir7os9hoxwm2.mw/%39FEVmD/%a4qRT5W5qW.yR/8XB9NHyB/ +ready for us -- `Well? You can't get to Joe, + +stamping her head as such, Joe say, `You know, Pip,' +wu3w.0J5.lv/m9IZaWkw5/xY2%54pNYS9HL/Nhfns/e%bat2cKM/cUXgRzm2Srdt/2s2u/9h8zjwh929Bnp +said my + +convict, wiping blood and play there. And then we went all through the withered +like a star. genteel trade engaged his drink the hair on my conscience in +disgrace. I found Joe +jfajtdt5k6gu11la2jbih.MA/zcaTNUL/3q%31eLT%bc3S/L6v2rt/WtbA0%45~TIvPD +good-night, and each with his look, and oranges and to the mare to be stiff +company,' said Joe, that Joe's forge +ftp://Defi-z.gr:16993/=7IIaMpVy3OLs/QtQD7qF5Vr/=RVbNDH8/y3oUHmX.v/Td%dcbiGlArA%720 +fire, another secret terms of returning such a liar born, +ftp://[544f:e60a::8772:D633:DA1F:081.021.019.189]:62615/%CB6Wy1K/X%0EcoPQ/IgnCMLPynfx/fdFHb +in my sister, addressing himself up, may ftp://1INQM6.4y.RO/ well + say what +you're kindly let himself down too, covering the + graves round +the interposition of any neighbour happened to think the room for Mrs Joe took +the damp to have told no indispensable necessity of continuing for a state of +laying her head +ftp://Ye1dfbl0eae8lqiiqaojj.JO/8EjAq0TzD:/Bz3Pm2qyWo/ZX58A2/yjn%9F3xJZjsVhw to +see that I couldn't Uncle Pumblechook wretched 66.242.9.138/CYHK1bGpZ/5yyVD%cbC +warmint, hunted as being found myself Pip, is it at Pork alone. But, I must run +the nHZMBEJWO.ST/ABXauli3wuJ/WUxhKaZJg sergeant. `March.' We are coming. +ftp://[8463:c210::b5d1]:34094/8%AC7Fc/Qh6%62yFExJbdaB/0cAZ3iSKlk8sU;TYPE=D +Don't lose your heart and meditating before us, and himself confessed that I +could ever such a new sensation of report, and at me out of old chafe upon +them, easy. Eh, Mr Wopsle had made for next to an invisible to the Hulks are +http://vmlyl0efotpfd-tew59kcpsi2u7qd/UbXy1Cc/L%0cwnzmdjz/?iy=N16BnPMu1+eYFk%f6CB3z+s4Re5v8+MFTU+k+JDiN_+F1k&C%D0k=F78u+euh%1E1uzTGQio&bL_2omAu=iEEs+goL%b8g6+Y%3FBcek%102&WCz=e!Fg+MUif8Yba0k+uX+A91YO,Um+%70i%818Fpz2&6fP=HlD+%91pW+%f2HR6zs8zrE10ZPH+bWA.BB6k+Df3w:X85xDnDjSiPY+AyDpuSl4VEVTJzA3g&OtUR6= +prison-ships, http://bCNNCLT.gxa2sbn/lAFakp and the damp lying on the Three or +out now, and me alone. But such manifest pride and locked the company were +speaking under his mouth, and stamping +D19f.oD5.bb/xUG6W8VxTcjMG/jYMuWlVMygf/UtIwE13c/%a9wzpO%AFxQ9 her bringing with +his own hands so I considered myself un- animously +q8HY2P.r5T.AU/nc0Iq%28QAF/#yOD3%b3UA%d79e%1EmJp3 set the sergeant, +confidentially. `My opinion of the front door and looking at me, and I defy him +at Pork!' `True, sir. Many a --' he +dPY3X09.AC/STpa%97U%b53yKP4Te/%71KZZvIC#nA1W2z considered +ftp://3gb.xgjm/wF%ado0cM/u%0DmCW8L/d9Ss%61dKQ that I'll tell you, one of the +best grace, `You would probably have hanged there for the guests with his +teeth, without thinking that my obstinacy perhaps. Anyhow, Mr +6m.56xkyt.32O.com/ToEAr%BEdi/xBpPU2NqC/74sgdq%BD9/WSrx5/5ldupD%47J/9boeZj +Pumblechook, who was gone. As I should un- hooped cask upon the agency of them +all night, sir,' and write his hands had to come down, for me.' + +The Educational scheme or [d18d:1707::]/NGZMInsLF8/kgC3y/F66qc1qt6OWfeS/DyngWA +I'll have something with an elbow resting a file. Didn't us, drew the river +wound, twenty miles of the form of what came to copy at herself to eat, and +when Mr file:///%55A4VpGsup Wopsle, and plaited the premises,' Joe +apologetically drew a dogged manner, so like the table-cloth, with her pretty +well and the rigging of this saving remembrance of reading, too.' `I'll tell +upon the poker. `It was firing!' he were a most terrifically snarling passage +like to blow that I was dreadfully frightened, and the end +file:///WNEw%bfTWDLF/s%A9oZoWUo of pins and on my head tingling -- we were a +piece finish with, as a jug on the tendency of his first +Ftp://2tdk.Ube6velthhhx8o.GM/bUH4XycSEKkTE most obliging of silver paper, +ftp://7kxk4ujzz.kp:32621/hbop0%25sK/rw7RBE0lTN/tX5BLF which they wouldn't leave +this FILE:///IQExpA4kDvUfTkH6Bg/MeVJ4aIUbXCJf time, he had +file:///SIE0AkJFq/ZPJLyYK/6hA3x1InlGm1 insisted on the boy to listen, and my +never taken them up, but was a moment to herself, and tear him home yet! I +opened +http://047.014.184.200/Z_QdOwjzfBue4Nt/aEn/xuEQD/cXlnoxHIK%7d8h/1%eegEk7E0/8Ejku@r1Z/UZ4gG/%484zOJsP%1b/Lc1okbWRzN5UJ +his ally the load upon him Good indeed! Now that he supposed from which ought +to me more questions why he had unfixed his deepest voice, and shook with a +sort Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L -- +FILE://155.24.106.255/3VEZIT7 if it was to him, I might not do not afraid of +report, and looking rather to make nothing of a confidential voice, +d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ +as lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be +supposed,' said the wind and so we were read the conversation consisted of it +had so that we saw some bread, some +l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C brandy out: no black velvet +coach.' FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k Mr Hubble +212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt= +remark that Uncle Pumble- +http://[ea5::]/eIdv5xl/5qhxlOvzw%018f/N3RQQKCz/WzUnsSg8KA3/7ohHZCp chook. `If +file:///g_T81EaNw2nJB/1yUUT you did?' `It was usually lightened by several +times, so easily composed. It was a large and I said. (I + didn't hammer and finding out +of her hands, and should always led him up here.' The sheep bell. +https://[8368:F154::f99f]/Y3h8FgzTYYpzn/zHFhQECC/CGtX/8v_~jn3Kn The rush of it, +and broad impression of which was company. I had no matter of com- + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java new file mode 100644 index 0000000..1159b31 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java @@ -0,0 +1,317 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import java.nio.CharBuffer; +import java.util.Collections; +import java.util.Formatter; +import java.util.Locale; +import java.util.regex.Pattern; + +public class TestCharTermAttributeImpl extends LuceneTestCase { + + public void testResize() { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + char[] content = "hello".toCharArray(); + t.copyBuffer(content, 0, content.length); + for (int i = 0; i < 2000; i++) + { + t.resizeBuffer(i); + assertTrue(i <= t.buffer().length); + assertEquals("hello", t.toString()); + } + } + + public void testGrow() { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + StringBuilder buf = new StringBuilder("ab"); + for (int i = 0; i < 20; i++) + { + char[] content = buf.toString().toCharArray(); + t.copyBuffer(content, 0, content.length); + assertEquals(buf.length(), t.length()); + assertEquals(buf.toString(), t.toString()); + buf.append(buf.toString()); + } + assertEquals(1048576, t.length()); + + // now as a StringBuilder, first variant + t = new CharTermAttributeImpl(); + buf = new StringBuilder("ab"); + for (int i = 0; i < 20; i++) + { + t.setEmpty().append(buf); + assertEquals(buf.length(), t.length()); + assertEquals(buf.toString(), t.toString()); + buf.append(t); + } + assertEquals(1048576, t.length()); + + // Test for slow growth to a long term + t = new CharTermAttributeImpl(); + buf = new StringBuilder("a"); + for (int i = 0; i < 20000; i++) + { + t.setEmpty().append(buf); + assertEquals(buf.length(), t.length()); + assertEquals(buf.toString(), t.toString()); + buf.append("a"); + } + assertEquals(20000, t.length()); + } + + public void testToString() throws Exception { + char[] b = {'a', 'l', 'o', 'h', 'a'}; + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.copyBuffer(b, 0, 5); + assertEquals("aloha", t.toString()); + + t.setEmpty().append("hi there"); + assertEquals("hi there", t.toString()); + } + + public void testClone() throws Exception { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + char[] content = "hello".toCharArray(); + t.copyBuffer(content, 0, 5); + char[] buf = t.buffer(); + CharTermAttributeImpl copy = (CharTermAttributeImpl) TestSimpleAttributeImpls.assertCloneIsEqual(t); + assertEquals(t.toString(), copy.toString()); + assertNotSame(buf, copy.buffer()); + } + + public void testEquals() throws Exception { + CharTermAttributeImpl t1a = new CharTermAttributeImpl(); + char[] content1a = "hello".toCharArray(); + t1a.copyBuffer(content1a, 0, 5); + CharTermAttributeImpl t1b = new CharTermAttributeImpl(); + char[] content1b = "hello".toCharArray(); + t1b.copyBuffer(content1b, 0, 5); + CharTermAttributeImpl t2 = new CharTermAttributeImpl(); + char[] content2 = "hello2".toCharArray(); + t2.copyBuffer(content2, 0, 6); + assertTrue(t1a.equals(t1b)); + assertFalse(t1a.equals(t2)); + assertFalse(t2.equals(t1b)); + } + + public void testCopyTo() throws Exception { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + CharTermAttributeImpl copy = (CharTermAttributeImpl) TestSimpleAttributeImpls.assertCopyIsEqual(t); + assertEquals("", t.toString()); + assertEquals("", copy.toString()); + + t = new CharTermAttributeImpl(); + char[] content = "hello".toCharArray(); + t.copyBuffer(content, 0, 5); + char[] buf = t.buffer(); + copy = (CharTermAttributeImpl) TestSimpleAttributeImpls.assertCopyIsEqual(t); + assertEquals(t.toString(), copy.toString()); + assertNotSame(buf, copy.buffer()); + } + + public void testAttributeReflection() throws Exception { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append("foobar"); + _TestUtil.assertAttributeReflection(t, + Collections.singletonMap(CharTermAttribute.class.getName() + "#term", "foobar")); + } + + public void testCharSequenceInterface() { + final String s = "0123456789"; + final CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append(s); + + assertEquals(s.length(), t.length()); + assertEquals("12", t.subSequence(1,3).toString()); + assertEquals(s, t.subSequence(0,s.length()).toString()); + + assertTrue(Pattern.matches("01\\d+", t)); + assertTrue(Pattern.matches("34", t.subSequence(3,5))); + + assertEquals(s.subSequence(3,7).toString(), t.subSequence(3,7).toString()); + + for (int i = 0; i < s.length(); i++) { + assertTrue(t.charAt(i) == s.charAt(i)); + } + } + + public void testAppendableInterface() { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + Formatter formatter = new Formatter(t, Locale.US); + formatter.format("%d", 1234); + assertEquals("1234", t.toString()); + formatter.format("%d", 5678); + assertEquals("12345678", t.toString()); + t.append('9'); + assertEquals("123456789", t.toString()); + t.append((CharSequence) "0"); + assertEquals("1234567890", t.toString()); + t.append((CharSequence) "0123456789", 1, 3); + assertEquals("123456789012", t.toString()); + t.append((CharSequence) CharBuffer.wrap("0123456789".toCharArray()), 3, 5); + assertEquals("12345678901234", t.toString()); + t.append((CharSequence) t); + assertEquals("1234567890123412345678901234", t.toString()); + t.append((CharSequence) new StringBuilder("0123456789"), 5, 7); + assertEquals("123456789012341234567890123456", t.toString()); + t.append((CharSequence) new StringBuffer(t)); + assertEquals("123456789012341234567890123456123456789012341234567890123456", t.toString()); + // very wierd, to test if a subSlice is wrapped correct :) + CharBuffer buf = CharBuffer.wrap("0123456789".toCharArray(), 3, 5); + assertEquals("34567", buf.toString()); + t.setEmpty().append((CharSequence) buf, 1, 2); + assertEquals("4", t.toString()); + CharTermAttribute t2 = new CharTermAttributeImpl(); + t2.append("test"); + t.append((CharSequence) t2); + assertEquals("4test", t.toString()); + t.append((CharSequence) t2, 1, 2); + assertEquals("4teste", t.toString()); + + try { + t.append((CharSequence) t2, 1, 5); + fail("Should throw IndexOutOfBoundsException"); + } catch(IndexOutOfBoundsException iobe) { + } + + try { + t.append((CharSequence) t2, 1, 0); + fail("Should throw IndexOutOfBoundsException"); + } catch(IndexOutOfBoundsException iobe) { + } + + t.append((CharSequence) null); + assertEquals("4testenull", t.toString()); + } + + public void testAppendableInterfaceWithLongSequences() { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append((CharSequence) "01234567890123456789012345678901234567890123456789"); + t.append((CharSequence) CharBuffer.wrap("01234567890123456789012345678901234567890123456789".toCharArray()), 3, 50); + assertEquals("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.toString()); + t.setEmpty().append((CharSequence) new StringBuilder("01234567890123456789"), 5, 17); + assertEquals((CharSequence) "567890123456", t.toString()); + t.append(new StringBuffer(t)); + assertEquals((CharSequence) "567890123456567890123456", t.toString()); + // very wierd, to test if a subSlice is wrapped correct :) + CharBuffer buf = CharBuffer.wrap("012345678901234567890123456789".toCharArray(), 3, 15); + assertEquals("345678901234567", buf.toString()); + t.setEmpty().append(buf, 1, 14); + assertEquals("4567890123456", t.toString()); + + // finally use a completely custom CharSequence that is not catched by instanceof checks + final String longTestString = "012345678901234567890123456789"; + t.append(new CharSequence() { + public char charAt(int i) { return longTestString.charAt(i); } + public int length() { return longTestString.length(); } + public CharSequence subSequence(int start, int end) { return longTestString.subSequence(start, end); } + @Override + public String toString() { return longTestString; } + }); + assertEquals("4567890123456"+longTestString, t.toString()); + } + + public void testNonCharSequenceAppend() { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append("0123456789"); + t.append("0123456789"); + assertEquals("01234567890123456789", t.toString()); + t.append(new StringBuilder("0123456789")); + assertEquals("012345678901234567890123456789", t.toString()); + CharTermAttribute t2 = new CharTermAttributeImpl(); + t2.append("test"); + t.append(t2); + assertEquals("012345678901234567890123456789test", t.toString()); + t.append((String) null); + t.append((StringBuilder) null); + t.append((CharTermAttribute) null); + assertEquals("012345678901234567890123456789testnullnullnull", t.toString()); + } + + public void testExceptions() { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append("test"); + assertEquals("test", t.toString()); + + try { + t.charAt(-1); + fail("Should throw IndexOutOfBoundsException"); + } catch(IndexOutOfBoundsException iobe) { + } + + try { + t.charAt(4); + fail("Should throw IndexOutOfBoundsException"); + } catch(IndexOutOfBoundsException iobe) { + } + + try { + t.subSequence(0, 5); + fail("Should throw IndexOutOfBoundsException"); + } catch(IndexOutOfBoundsException iobe) { + } + + try { + t.subSequence(5, 0); + fail("Should throw IndexOutOfBoundsException"); + } catch(IndexOutOfBoundsException iobe) { + } + } + + /* + + // test speed of the dynamic instanceof checks in append(CharSequence), + // to find the best max length for the generic while (start() {{ + put(OffsetAttribute.class.getName() + "#startOffset", 12); + put(OffsetAttribute.class.getName() + "#endOffset", 34); + }}); + + OffsetAttributeImpl att2 = (OffsetAttributeImpl) assertCloneIsEqual(att); + assertEquals(12, att2.startOffset()); + assertEquals(34, att2.endOffset()); + + att2 = (OffsetAttributeImpl) assertCopyIsEqual(att); + assertEquals(12, att2.startOffset()); + assertEquals(34, att2.endOffset()); + + att.clear(); + assertEquals(0, att.startOffset()); + assertEquals(0, att.endOffset()); + } + + public void testKeywordAttribute() { + AttributeImpl attrImpl = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY.createAttributeInstance(KeywordAttribute.class); + assertSame(KeywordAttributeImpl.class, attrImpl.getClass()); + KeywordAttributeImpl att = (KeywordAttributeImpl) attrImpl; + assertFalse(att.isKeyword()); + att.setKeyword(true); + assertTrue(att.isKeyword()); + + KeywordAttributeImpl assertCloneIsEqual = (KeywordAttributeImpl) assertCloneIsEqual(att); + assertTrue(assertCloneIsEqual.isKeyword()); + assertCloneIsEqual.clear(); + assertFalse(assertCloneIsEqual.isKeyword()); + assertTrue(att.isKeyword()); + + att.copyTo(assertCloneIsEqual); + assertTrue(assertCloneIsEqual.isKeyword()); + assertTrue(att.isKeyword()); + + _TestUtil.assertAttributeReflection(att, + Collections.singletonMap(KeywordAttribute.class.getName() + "#keyword", att.isKeyword())); + } + + public static final AttributeImpl assertCloneIsEqual(AttributeImpl att) { + AttributeImpl clone = (AttributeImpl) att.clone(); + assertEquals("Clone must be equal", att, clone); + assertEquals("Clone's hashcode must be equal", att.hashCode(), clone.hashCode()); + return clone; + } + + public static final AttributeImpl assertCopyIsEqual(AttributeImpl att) throws Exception { + AttributeImpl copy = att.getClass().newInstance(); + att.copyTo(copy); + assertEquals("Copied instance must be equal", att, copy); + assertEquals("Copied instance's hashcode must be equal", att.hashCode(), copy.hashCode()); + return copy; + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java new file mode 100644 index 0000000..eceff4f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java @@ -0,0 +1,169 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +public class TestTermAttributeImpl extends LuceneTestCase { + + public void testResize() { + TermAttributeImpl t = new TermAttributeImpl(); + char[] content = "hello".toCharArray(); + t.setTermBuffer(content, 0, content.length); + for (int i = 0; i < 2000; i++) + { + t.resizeTermBuffer(i); + assertTrue(i <= t.termBuffer().length); + assertEquals("hello", t.term()); + } + } + + public void testGrow() { + TermAttributeImpl t = new TermAttributeImpl(); + StringBuilder buf = new StringBuilder("ab"); + for (int i = 0; i < 20; i++) + { + char[] content = buf.toString().toCharArray(); + t.setTermBuffer(content, 0, content.length); + assertEquals(buf.length(), t.termLength()); + assertEquals(buf.toString(), t.term()); + buf.append(buf.toString()); + } + assertEquals(1048576, t.termLength()); + + // now as a string, first variant + t = new TermAttributeImpl(); + buf = new StringBuilder("ab"); + for (int i = 0; i < 20; i++) + { + String content = buf.toString(); + t.setTermBuffer(content, 0, content.length()); + assertEquals(content.length(), t.termLength()); + assertEquals(content, t.term()); + buf.append(content); + } + assertEquals(1048576, t.termLength()); + + // now as a string, second variant + t = new TermAttributeImpl(); + buf = new StringBuilder("ab"); + for (int i = 0; i < 20; i++) + { + String content = buf.toString(); + t.setTermBuffer(content); + assertEquals(content.length(), t.termLength()); + assertEquals(content, t.term()); + buf.append(content); + } + assertEquals(1048576, t.termLength()); + + // Test for slow growth to a long term + t = new TermAttributeImpl(); + buf = new StringBuilder("a"); + for (int i = 0; i < 20000; i++) + { + String content = buf.toString(); + t.setTermBuffer(content); + assertEquals(content.length(), t.termLength()); + assertEquals(content, t.term()); + buf.append("a"); + } + assertEquals(20000, t.termLength()); + + // Test for slow growth to a long term + t = new TermAttributeImpl(); + buf = new StringBuilder("a"); + for (int i = 0; i < 20000; i++) + { + String content = buf.toString(); + t.setTermBuffer(content); + assertEquals(content.length(), t.termLength()); + assertEquals(content, t.term()); + buf.append("a"); + } + assertEquals(20000, t.termLength()); + } + + public void testToString() throws Exception { + char[] b = {'a', 'l', 'o', 'h', 'a'}; + TermAttributeImpl t = new TermAttributeImpl(); + t.setTermBuffer(b, 0, 5); + assertEquals("aloha", t.toString()); + + t.setTermBuffer("hi there"); + assertEquals("hi there", t.toString()); + } + + public void testMixedStringArray() throws Exception { + TermAttributeImpl t = new TermAttributeImpl(); + t.setTermBuffer("hello"); + assertEquals(t.termLength(), 5); + assertEquals(t.term(), "hello"); + t.setTermBuffer("hello2"); + assertEquals(t.termLength(), 6); + assertEquals(t.term(), "hello2"); + t.setTermBuffer("hello3".toCharArray(), 0, 6); + assertEquals(t.term(), "hello3"); + + // Make sure if we get the buffer and change a character + // that term() reflects the change + char[] buffer = t.termBuffer(); + buffer[1] = 'o'; + assertEquals(t.term(), "hollo3"); + } + + public void testClone() throws Exception { + TermAttributeImpl t = new TermAttributeImpl(); + char[] content = "hello".toCharArray(); + t.setTermBuffer(content, 0, 5); + char[] buf = t.termBuffer(); + TermAttributeImpl copy = (TermAttributeImpl) TestSimpleAttributeImpls.assertCloneIsEqual(t); + assertEquals(t.term(), copy.term()); + assertNotSame(buf, copy.termBuffer()); + } + + public void testEquals() throws Exception { + TermAttributeImpl t1a = new TermAttributeImpl(); + char[] content1a = "hello".toCharArray(); + t1a.setTermBuffer(content1a, 0, 5); + TermAttributeImpl t1b = new TermAttributeImpl(); + char[] content1b = "hello".toCharArray(); + t1b.setTermBuffer(content1b, 0, 5); + TermAttributeImpl t2 = new TermAttributeImpl(); + char[] content2 = "hello2".toCharArray(); + t2.setTermBuffer(content2, 0, 6); + assertTrue(t1a.equals(t1b)); + assertFalse(t1a.equals(t2)); + assertFalse(t2.equals(t1b)); + } + + public void testCopyTo() throws Exception { + TermAttributeImpl t = new TermAttributeImpl(); + TermAttributeImpl copy = (TermAttributeImpl) TestSimpleAttributeImpls.assertCopyIsEqual(t); + assertEquals("", t.term()); + assertEquals("", copy.term()); + + t = new TermAttributeImpl(); + char[] content = "hello".toCharArray(); + t.setTermBuffer(content, 0, 5); + char[] buf = t.termBuffer(); + copy = (TermAttributeImpl) TestSimpleAttributeImpls.assertCopyIsEqual(t); + assertEquals(t.term(), copy.term()); + assertNotSame(buf, copy.termBuffer()); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/urls.from.random.text.with.urls.txt b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/urls.from.random.text.with.urls.txt new file mode 100644 index 0000000..bf0d419 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/urls.from.random.text.with.urls.txt @@ -0,0 +1,643 @@ +http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram¶graphs=50&length=200&no-ads=on +http://c5-3486.bisynxu.FR/aI.YnNms/ +ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R +sJ5PY.b5t6.pn/ +http://Z%441S6SK7y%30K34@35j.np/RUpp%D1KnJH +[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/ +file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7 +http://[a42:a7b6::]/qSmxSUU4z/%52qVl4 +http://Rcbu6/Oxc%C0IkGSZ8rO9IUpd/BEvkvw3nWNXZ/P%17tp3gjATN/0ZRzs +file:///2CdsP/U2GCLT +Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA= +HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH +Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m +M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb +ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J +ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj +ftp://alv0e-s.88.nJ2B34.ps/s0TgnaY?yOQUt/18CY%16IzNSQu/LaT3dD?io%80LBw%cdXDHU3/ppMyv/DbLDzyceaC/Goa%f3gn/5ebODAP0NAOD/6NkL/uP7CW/gS5TnaS +http://278phvcx21/QGOy%395L/yy5NurSi8S/gMr%553%C9q0S +z156ky.MU/.b%daGKqc/jYZkXK1WE/Abx589H6tADH +Ftp://x68qwf2j7k.nc/qyZfwo%8a/ +ftp://yd.ng:40759/L1XAGIuzdMsjUIUwQ%F5/oDjgDsU/&Ze0Wz/ZeWR6cu;type=a#yDMuky +Ftp://Xmswrxn8d-1s.pe.gm/dB6C3xTk%D3x/EKOiTmk%7c/API/0cdgpi;Type=a +FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH +ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/ +file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf +HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND +file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6# +http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx +ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7 +http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM +FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB +ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM +http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY +N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ +http://ah-2d4.ASIA/qmp +http://195.139.142.211/%53fk2%90Pj3/V75ySPv@K5ISv/eUiXDAYc#e0%59 +dFU69ED1EJ0MLT.G8ef3o.bn:53301/klFVsh/YInBJE/SEIzo5EIoe3 +http://[3349:5FBD::213.207.213.043]/k4PbSpylXc%92Qckx/aQfV7X0V/25RN%49ZzvavLgf/re9~I?OP=nXo&oi0mm=f0e5&KK8=9V%13&Wd0%1Ce'0qnS=CFlgRw&4%89V6AON8%53jQhwUvln=r%6edz&W=Pq+T&a%F4H%51p%d9ZIU8l=uyA8S5J%95+Wb&xi3KNa1P-Xwu=&8tCH=BwNWf+%37G16&rsyBG=MnU4S +5pn1q8q0tg.JP/%74XuKtp%F3fqLuGO/CMeC2IRRl./ +http://bmm4qto-360l-pbemedo4.SA +sll-9eg.W6pv.rs/WtYGg51Pt%68/R8fsX4a +FTP://r13oym76cysnp77r5sidj8sqgxzpl3ls4xzj.JE/ta%e0PA/5Jwza65o%7D6Uno/RyO%b1B/v6C8yo5K +http://2b4ne4.5ji.oubrfdx24.UZ/%69kMsLF +tv2yy8dnp.tN8DIWG.gr/ladfwSflp/Zr3YKvt/l1QlvEc +file:///eK9K3g%47VnPYStl/GKGHYM6b%23nc +file:///LtZpL/%1CU8lVvcWrTR/ +File:///yCPVGaCm/hHqFToHKZw/%29zmDPSQ6183%C8RfpdKQqkCd%51X/lyJABDQymQDL +igth-n.Mcw.ar/LjMApEho5gp825BK/afaST/HWKafQMBv/ +https://l89xkmwfh-hprhz.tcay299q.2zruch0/uv/iM/ +file:///6yT8LrgRZG%10HsZ/CP1zI%98gHFiT/zAx4%EB/tBv6V8kS +file:/// +file:///iYHw2RpUc/9MPLbyq7gTVSx/pYnzm4E +FTP://[9198:015F::]/pU7tr7Zhgt/~cLd7w7.Gb/4MvIKc6iy%58vN/AGZ08o/uT%1e7vtcZD;type=d +ftp://0dfw3ob8y.Jri1p4f-8.NG/DpihVuu3RJ/kEKaPppvl +http://pZRLI6.ma/wAex4MoQ/jUv6Vh%5C2 +file:///F8%A5Go9qV/UYzwol/#839W58%4D! +ftp://zo.dz/BSI/enk1F/XjnYRqwHBAyIYdC/rTXmyPP@Smcp:/%E9r7n +nhzbw2.qyevbi.gn/Oxbk%737lUb/OBx7/VX67/%C4fxQxvns/4fNNJ9FjR/7YeGTW/7VOLjOD4/P%89.1Forp&3/wLVBbhK/3GdjIWB +Ftp://4ie4a.fl8g3c5.wjvan5m3j.4sawo3mof.TH/wfcrCzx8%B50W24/ZxqhiPCLDP/SZbReZ4h7 +Https://j3bhn0.elhqoer--c.BI/ijN66pIVKxXjOmg/xCHrfc%feFdJPd04IG +ftp://[8F7F:9507:280A:3192:EA30:EBD2:87.9.102.149]:4954/AwLZnTre/8g3Vo%6doz/Uw=dU%70nxbo +6u.vkhga15zezgvdc68uii7dh0svzopjpr3.NG/rXE/6T~KV%06Kq/iO5vG/G2S9YU +HTTP://lZSO.fr/%baWLoH/rsdViX1jMX/jKQg/aWFY%eekWu%17DTY/ASpif739Hht/hHM/oXdG6y/Es2c2Q/UVz6TevIJa +a1JQT907R.ou7o81.al/3Vp@VDZp%9c +http://g746.mhi.xtzovtn01w87au9.tc/%8Dn1XEzK/FsoFQ/xuL0wOc/YNP%53OS3/w5sIf7ox/t%22S9TxaTtK3/K%74%4EabDPe +http://92-uzyzm.pr/UwJkzP/ +http://46cda.e92kuq1029.Igb3rjaqtc.Xgpak.T50lamdm4sscw1i8mq1-8.wx6wzqxd92z68sbs43l6.JO/Q7RzRWFz2/ +[BD39::62:47.178.113.23]/U4woqa77Wyygc2/cltcO5Xw%EDWZT/%5Fd@GP5vV#wUMoflXqTOsj +Tw95.XN--WGBH1C/CK%fb%EF9/s%F4W7je06JY%49r/Y2L9fzlfd#fprt97Y%72 +file:///xjYnAHV2/g%21ZmKfq +file:///JDyfQk8%669N~2L%ecj1/6PySMx8z%19%36/HP5GhmnNinF0p/vavqKxyBLV0a +ftp://v2WJ0E6EX.gw:46170/R1g73Yli4ts/K%09PIdRA/DntZ@ +pVRN-P.ky/2UMoA1sYRpmUyd0/fEShDdCyd69Nyh6f/6zP%cevC69rdf0#XaOTpyS%73TQ +http://4u3o/BKdhwRyzG +file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/ +ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz +z3ymb.KM/DdnrqoBz=YtxSB +FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0 +nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc +ftp://085.062.055.011/bopfVV/ +ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs +file:///vNLDR/Q7QXgZ/6ApHTc6bN4/yihY9ZGy%3BlK +ftp://p2SJ4CE1KFC8CSRL2OY2ALA5TJOCN0FEM-W.biz:51412/ +078.085.085.242/kqKkywur6Kv4Qn/-CJv6i1Nxc/ +qow6.7RF9YUV12HR9CCFTWUTQRONLAM4PN82GI8E.GQ/oxUj%a6Ch2/bjjphp%34IJ/%65NQDGFab%14B%51M/QtBe +file:///pQ%8CkB8ipZ%2cyZGMf/8USgpQ%54%48e/jCflvdl%3Ec +165.195.223.067/Q3DEaK/58Z29OKkyF/fk9Vl/dKLw%7FR3Fzo1YsTPxmm/XiABg5j23J%1avyv +f1442jv.3w4cg5hy.EE/8hsz%802pLxgSlD%edIt/ESbwLYo/tdn9mrEynmJF~ +[dfb9:d316:677E::2B7C]/gsORr%b7gc/?ehIX5=GTM0co5(Dmn91JN&8J=8W7wFuQfZk7sM#vYfk~Km +[11b2::35.78.41.76]/vVfZvUimVO/K9hfOd/4gZUL=j%09PGr#o%23LnBOkk9 +https://oL2UQ.yLN-U053DA.bf/CfFIFwe/ZbgHFvLfbEYrStIS2h3r/pqd%14rY/aR5a8hx/aKWFJechP8DT/ypmeBjL7rcbUr +https://[3790:ad57:0B63::e5f7:f6ac:164C]/Obax;zcD/Y%48%9a/Z2xcdar +bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae +ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO +http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w +zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 +ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ +HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV +ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ +Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 +7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb +ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB +ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk +Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1 +FILE:///a7kRxh8/h43TYOY6J5%31B/ZfuF%9c3/ +[46C8:60FE:7ff2:79cd:69E1::221.191.034.036]/Q2MQ8mttjsMF/UqrKq0W%E6N1#YfB7A8CHYa +https://hnk6fx.2uxg1e9o.pm/I=LKn%a2n4/J&RntX3mUxZ/B1Q.Ilpk3Icq%7fZ/ia:4DLuk8pvsD/mpED3egQJfH/O0es5zrzwWQIC%21K1 +ftp://133.195.101.060/U9x99/nrirgTvZnm/QLNzsm +file:///RN%7EGq55Z%D1E/U0BQ1De/o8a@zHbAMS/GOA4KUcR/uaOR6C%f1Y/u5d7 +http://[f63f:096e:ee87:792d:CD31:A1B2:83FD:7322]/tnFLqVSRa5h1/%EDX1y4cxiv/GIo.OM0/M4lBr/xgHa= +file:///Td=wh:cuTxKx/4B8%dc%616s&sE/snROY6GQc +ftp://1fcu78n.COOP/eDRJd%82k8FEI/7fbDLiQncgOl +http://obp6jiork.KP/pOedzk/Lo1uNQ796m/hjLXBOr%25AB1/ +file:///j3m%a5o5blRxq2/8aDBkHng/OR1ixi5h8kX/nCUz2aDz/ +file:///V1tX7rM/7zk +file:///1qw4T%8BKBi3CKv/dxm6%7f8s78R/%83sF6J/K%33qfB +ftp://tyt7r.u6ier1pxipif5.BW/vSq6akPyGUI/wVJ67VXTQeuKM/yB4zYqPh/0RuHq%58G/rBTgdr5F +Ftp://4dx-s0az06e.Su7ir.SA:16277/HWkL7hR1SW/RzpkWipV/LCYQ6/gLpY%807L6/60H1z96%90xdQ/P9jx4DVu/oFa6c#gQo%57wv0vN +FTP://o--B02WG9T7-BXW-RVAJCJN1IALU9EX65WSEXCRHM.Aeh-m.cat:34416/3q9yW%53m/FJ9&U84ik9&e/R.l/ji0sjWb%5edu12nbNSW5c/YMGfLcesN +HTTP://lMxNbKW@tq1imryvi.P7g5o8np1.SK/um4Z2TESWBSrcN/fNehEdgh/sW%6fCP/b2fqBsG +http://Lgwt071.sn/HPn4x/%46zCwYZzy/wzQVoL2sT%E3Yl?974Zu=X+JuSbGjrO&Xu3Fz%a8%19%5159f0r=afHdI3%F7FNrs&Mb0hjV7d=&I43eztc=1k:3+uSz+kdJP5c+bRkUBkF +izojrse33.9WTVFAANL2Y.ly/i3ae/5%0Br%f5yL3/MsnfAk#T6,v%51Ev +ftp://[8714:3F6E:aa8:c8fc:4F41:b8ee:44.74.99.35]/790Ug0mWq/7yBPb/pzh4dTX +ftp://[ACC9::DD55:A45B:7a6b:177.179.158.116]/i1q3SzWTmO%09p%A3/FWDWq8u2Q/7 +Nw2m4j4.Br9kvjf-9.3wac-fh0uk.nysyu-emjwy.cat/PGDh:oW%5F/H34QSRwe +6f9f3nny.mq/ai%cb2SZP/qfjOd2mpEH/LUZ.fxv/#3NaTgg +ftp://R1x5yr2ij24e42wlojnp1i-b2bsacd01stfe5-10m0-3z6cwb3aflzrgoo.it:8665/oFbo12T%3Bng=x/%B2FcEUXPHAP/Ni0qL%0bPN4#yhp%5dO6 +http://[C794:4d71:ACD4:7AC2::30CE:B0E7]/T8igmbW%6C/DE1%1DyI457M#brpF +HTTPS://rI7HAX2OS.bsajd56xb48.FO/fn9eA4%0A/G96ogw%69SGis/1V0hqVLN6zaQC1 +http://toncwiacr.0px.g7pud.MOBI/EdoW/qUMMnH +file:///LkP1%5BcrQ/bnkvBi6F/Q3IRXB7Kt8mvDZ/ZKwDAp%a3/ +http://6DAK.8I6FGLS.t5YJHK9GCUVU4EB6NO513HBTWAU0XP5.GL/LDO%8CDB%82p9# +file:///%46f%c5KRhPp/skp1X/OdoS-J1foeE/5H5RIWoip +Http://180.036.254.028/VSiroQpjS +d54n.Agqa6.7e4.JOBS +https://5t33av.5u7.RU/SugrkGKg/FDf6cYm5QdHk%b3z +file:///tGHsUEMaQS/VLn1%6Au#uGnrvY +lm.27.jv4quihwsp.mw/mwCDm0cweP/A8wSZIQcZGV/uKBboAnqevGJEQT5d +ftp://6g4.qe-s9txq3o8vvr5e.5YWZGPDM9Q.820d8wtribsgglbrnkafno126s8vflph9tfmt0mwew/qC0bInpp/fqxKQLzN/hAj/6PsngV;TYPE=I +file:///aR3sSgC/GJu +w26535-k.Ut2.MS/pQP1Rx/NUKUyRSr/21x/CcgOcN4U/Jzw%C6Ft/n5Mu9X +ftp://75.22.51.21/wFDRPO/NLI1ZSecRAfFEAy/kZ4whP%C3A/ +ftp://1h3yyf3d8sffjx3rsf3k2y7c459c2gx/%2FfoFDEyWygHgKAuo/KhJZkBlC5r3%99/9I8SMy/25_&y0 +Ftp://215.239.176.156/tNfD%09mvdOM%28zx/fc3DTw2nf/#2kySKJ +http://Vyt.4ferfwbkbm.owtk.me/LlUtIjj/BDovC/6vJ4Wbk/ihtBt4d%acVl/ywEBIdg%3dHb/ +ftp://Lq.es/%B1ZPdTZgB2mNFW/qre92rM +file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw +file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH +ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T +79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO +Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z +ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ +[62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23 +Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5= +FILE:///#F9Bgl +jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw +File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg +ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw +http://sisas.ua/4CU60ZLK4VgY8AR89 +FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2 +Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz +file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg +http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms= +Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG +ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d +FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH +eta0q7.2r79g.AC:34736/%abp87fVdPCY/PvO8Uk4WoLF#A*HP1A +https://w9zhko2rttzndzivll92.sbzum.UZ/bgy8l68/Ix72mHu/zlA4CI/IQjc%CD9%255FxJ8A/Dbb%4eTCRu +[2582::]/Mhm%55MWThR4Ne5mZ/xniX3IdG/ +ftp://224.3.121.112/G1w1g%1DdRi/T6Eb_NegqJs +ftp://tn.z-o3vn3n4.5wg7.gs/loxilPpcLnsI/topa0Ez/Na%70Dcde +syt7m.TD/2dxrQQvBXC78/Z754hngiYcM/eM%3CaeYeXX/nmUwguwk97VGL/ +http://isqogte5i.c-3oixcmy.SY/jlPVRlTs4v/enCZWc3Sl1dJ7/M5GTSZx/Ga%cce%63cLzTJvBodJ +bYIAYQ.9mlnx.OM/t1KK3u/iyQFS4EGHN3uKogL3WGG/6wn5Q5ndq8kHO%734cxgEc +Http://wvfftjk.do/a0%644z/?ATzWOxO1k=%85ulHR +http://fnoY09@bm8xcfjyfiremhz9.sr/E4Rrq2/vQjQKj9fwV6r51/mn3x8he7/W4xCQs%FBvrzb +ftp://vxfr4g5ka.kn/TZSPrYGzv/KzuB%731GA +file:///vjS%f1/ktgHPAL/=v0cZ/WTpVo1/i6XlMCkNI/kukAwc8/thWUblm/c4ICXp/f8AHkj%1C4d%9107v%44hN/ +Ftp://t4qxt.hd9ok.aUQ7GIMBGXP.IS/%7ey71ndfLh/m%4A5P%75153tpU0hY73KfO6o/E%7aAkUlK3hX3Fg +FTP://gJ8MRF8UYWFW.iq/cdX7RYOqS/6E6XUh%fcdHS1%dcoDwHgpFId +http://01s0hfwz.TL/C9uEC/K9uWhknP3AxHW/%c56I1zL5Rfdd/sLJeP/2QkQNP/QcW%8aA0A/ +Http://gRWSMJ90XZNPAPHL90FB.zfyopzk/hMq%1fD/A5jQ%efiH4Csr/HTFm14uSXf/jW50yvQ6Mb/EJrahj19Y9Y +http://i0.XN--MGBAAM7A8H/Uy6czi/rrAt8esL4/iL2xLka/B3j&7Inmt7g34 +file:///aZcnMM/Hnr1PCn/wlTztS7SpL +http://2lv8030.fimc0v081i/cyEUoud6w/gfAlE/iQP:8/dZCue4cKVM3bs/JU%d5ZUA1t +ftp://kF0NLTJGD.HM:44827/Y6CgKRiW/4r7G/Db%bb=7xD/tE/t4ooQHdBsrw/ZvgcX/qTCarGQWa~MKW5nn8NF/dcy%1caO%b8/Di%947%2cB +ftp://4ufofbu/pmLZX%f2wJcQO/B%e0b%64oLObaEx&C/QViF1ohg/Rffvf +dYC57.CI/=G0dg +185.224.223.157/h8BdA%FEv/KLK2f%86LS/gwA4rKKHLarf/b.EyE +FTP://uhw3qgl0bvfp568.e5wkz1l.Dug75a1j.US/R%AE5DNL%C4vMl-TXG/BDSu8PXNYU42aY/MR-hx1/mC2:SJqsCN%d7#smDUT +File:///q3iMCFXfge/Bh%cdvWuy1w%E7Er/Jmmf7DkqSG%35a/VUvFz#8%510SIu +file:///G%E7R44SI/L0Xsc/c15wyz?8Bs4rN7 +FTP://eQ23LB4U9CX.vcrnx.2fa.k6rjf8b.pe/8L163hbbt/J%26zcQf/lkieT5x/Efa/A2gUk/o%ef9PIBhPODaAn/p8%55Wsfap/BdTfZ4zm%2fbQt/SY7rMh +file:///7RVk/qIRRZ0b/ +FILE:///Rq_/ec93s/HMB24%8esN/%4bO%cayWnOF +File://Yk7ie7.xn--80akhbyknj4f/y4e4%2a0yHu +ftp://4ps9b29prywnt6-1xt9t4cgi8sbwjj6obbw1x-2y-v2tft1eei67i.Hk0u4zwmd7o9z.jp/o4R1sdAnw/Hu408%CB/HdQ6cFhG +ftp://7efqt.LB/EIX~:Q24/b0QhE%751s%F66R7A/IFxxOD2v/uOOPv5jARBJsf +[A645:D622:eb6b:D59B::D48D:f334]/Ulld404y/IM~6P3 +FILE:///%16b72yhVw/2BPPCZg/KwHAJ0X3QT/I49wMwmls2j%15xkYc6qFZ +FTP://octvv.2je8.oJRUDE.02y4htgs.es/zwVuzXoFKJ0k9 +http://[3A16::]/1rhxoXw9Cv/eWk5gHpYJ/v9gRo/un2Ygo91B%A1f2p/15hJ%A5o%A19TLjzzRrGUT +iG4PTCCG.3zti905z3.ci/42j5.oKj/FZmOBY +Http://pclly.36XVKSPBC/Nja5D +148.020.113.014/ASuvNkg/Zcwt4/PjpwkEUVHbjkeKOgL/%f9hibk/NT9kSmJF%1A/5FaP@BkLf/jTre%balt +tnjbgbiparss2x-xav2mitawqn9ema07kfk6kjck.xC1U6J.hm/scUu%E5D/qZ9K%1CX.d3mWJb/-SdvwN/nFS0ZdZDNQA +http://[3173::]/YHDIJlMkv/oFpVHGs/7Dn%61pqA%23/ZnaIIPD%6cj/ +http://i4f8l.sc/WuJNKVuflVGa8/%85hi4B1G/mPs/1KfX%12/WswWA%B3i1OVsF/Z;wC5kkDQ/XIOtrdBl%D9%33 +https://v24gyfj.xfrc5dy6xuz3paev4rggl3xeg3vxzw7cz98pbcgum8xlczt-n.SU/Mb=PxgWX/J04ScMxk8u/oH%A08nv/3oXR85tM/ +Ftp://c82a3i5u.tf/v%D5/%05QNNYI&ssnoF. +file:///MaIzEiaVY/ssIPwkItF%EBIUy +Ukg.sb/Q24uLBUl +HTTP://Aphi-iog2t.PE/SSwgnY7af/VabUxcEU2i/JI%434fkP%7cO#EWmOFU%5cy +file:///FXYZhobB0jX%5BD7PIt8H8u +Http://asn7b.LA/13Qp3t0dY/Mk0ldhZyJP/rRgIZlOu/hqt1qM9NT5tAGD07T +Http://mb2.NI/eOXXAC0MNiEvJ/ul6ydqIPg/3JhlWx21r~sH/ZemaBb7j17X +ftp://7i27:54542/B3rW/LSNLFJ%74J/%e4NHDP1svTU/Kkpr%C1%6cO/2wWp%f4MiYLhgWGSF/u0wNwK0B +ftp://f8X.cat/L7Gj-OSdF/QBrO%f3okEZ/L%bdvAyxC5 +ftp://[6CA9:93a1::]/?y057O5/l9C:/XsBy2so5tX=D%71me/ +file:///%33P.AyK6nB/QkN%011K/iicc3HEIE%C0/v_7Wl%fdzMCBnfC +HTTPS://zv21qs.ekofwyy.f1pd7snnae0n2nzfdclk1sf4hybx97u17piaj5-lul89bxrf775koowj.as/BAc33xOV7 +ftp://ko%5BM@183.207.071.131/tq~2QxL/d%D397GnaQgKtPMOsCp7fyVobgZ/Nhnp4LAKEvQ1V/1xFn%cbR%7BVU3 +https://fiuubt.bc-yrorta.kdn.M8mascygepb0csr.vpifk.G-p35wx.er/4wvko7/Wo9PsbrLI +file:///LRVqPEfRevRI/nHtsA5k4iilQ/22vu%674y +http://jX-U69Z4.3vuws.41h3q22bzs.o3hng9:6629/Qj=CQmh9/%9aCSTfa%0aXvFQ/u0zAICPSGUx/MqP32INW%00mp?ZmIZc=5o1okD&WEDMM6Qnm=0w5T&gajnp=GFwK+Ct8Pds+KRsnyPq+2UFmx+cwnDnvyn+Zf0VFXyk2+Aw67fL +file:///XRDAcY5GGmj3/WoHYehPpF7/HS9LhdHOe%9fS#!SZge2 +file:///UIIGOxv6jvF2%c0/%A8J3%677Gmq8im1zklKhqx/HMhCSY2QcyxvL/ +http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33= +Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9 +file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8 +https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G +file:///enqvF%EFLOBsZhl8h2z +ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A +ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9 +ftp://o6ou6n.N8.yyld.JM:24207/aS15Vk%0eg/M8jcXu%14d/%48odaw +file:///7NToG6xM&SK=k8/wTdaPAFLzqBEJ/zHMDPj/L.fLv57c/z8QYrsKS/CEkA5FEhQXBQi +file:///UWrC%9111nEhh/45FHiTx%98L +http://35.iN13LEQV.z2d.in/%B2GBtdYtQjc4TTr/gLxjU%B3c?3m8B3t%24eK9%b8=kgc0f+ew+uux%7dOI+pbZ+H%9cS&%56mm6=rkQm+dHPh3gGj+1kC +http://nEN5ZN.EG/%0efsf4v30L +file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q +r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q +ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg +6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/ +file:///gVW/nnRNxPfMXKb%72Aq%4A +file:///Fzza388TQ +file:/// +File:///kpiE4WSatjDV/phvv7gyfb%78b +ftp://240.154.225.198/I%39uutdECwM/PViD~qPa +td.KM/0Dkyg/B%65DiABz/wtqGd/i7%cepV%86XkA +077.102.005.039/p53%0bsPeiZaRy/nQHLsKEbNdaX/nT9H%521/Zb7H +https://Pu5aweu-29knkj3k41tw25h7xzm9pck96ey4q0gqzig27u.vLPR1Q4.vg/QANLMxa/gccQ1ekkRDr/?bXRDWO=I%0ap7%f4PB8S&t%a0Uhe1I$j$=Mm +https://J-5ytf.nmp5zuopbj1qbl1ik2c4ihjwu6-q5dhn.ng/GDtBeBZixtl/6sgw9/tmeJ7k3I1hHJfM/2JYRt7towpNjvDWsumYmhu/nBVPkzSo/cBXPb +http://HSZDX$An@ukj35.ve/9dLg7XrzV8g/hXhzX;2/Zw3KKwTP1um2/qej3miaDjj8v +http://sL333Q.Zci48xtb4g6.lu/sQw4ZHF/M%99%1DNl/s58%a2sCxGQ?EgPNZ=qaG'U2CO +file:///W%64hVsq1u9rIuZy/qO8j6EEwj/d48q1%6D/ko0ec%72/pcJo/MZQohRx +Ftp://afq57indwrb0sjhgyczyx.se/%6FKey7AOE/IPWZg3ggMIM6%D48h/XnAuzG +file:///wDwlQVR8i:0/mzefF/D3Pnkoza7Zo5iQdc/ckieGQos4JM#9rqA%DAD4 +9gcwbh3vcmfa0xw-k2.MC/66TaJz%FE/SnDRWAknGcI +Ftp://%cdaTNzNPNu@w6H.V9aps/87/w@rPBGa/he%FBu4vpT +le1u.43cdu0n4.bn/Q0i6uNz/9%275%a3dAS/B%2fpPkCW +ftp://131.173.229.062/1IYcY/mJJ894/%89F%45HHRdA/eGlhL2MXm6Q/heBdvWm%3cVs%04/x3JjEB#2%2cQsgeK +rtubvdk3.PF/L4TR1g%5f6/Caov%FC3vK3ofrH/pz33aV%54 +urlyuqr.ar/tzJzKM/gutrfWqv/IC%24bbmSS%02P?%24JV=zrJilQ+tH%7bh&hbO7Puq8c=K1Qt&ULqdYq= +Https://pFOROCZ9.dRDP.gq/08VkBBPja8cCXZKLa/rEF28NoX/ +https://[5319:CAA9:0242:86EA:8e36:7086:B3E2:ded6]/Jq%C0P@jZ/KoNj84B5AJ=3jGk/7wdasVgHFexe4M/zgEZvK3vh +ftp://Bvc6nmpdhn21400.Vo53pvqm0/u7jz0O3bbFTTegZa +l0q.0b82ck3a.SI/EQf%a6#mhJ%0dfWnfM +http://hr58b8n.bL0/LppkKdZGYdxiHg/2VXeZWR/T4fCmyN579 +http://1x6.yc6g6uw6htmwcrb10t4kwc393g29cctmtdxxz1j.KZ/G9lcwKju/UiH4E +7T6OSH.PF/zfYyqdxITCI0 +https://2diizsrbfh.PK/t1zBYiDPZG8Kx:/pEN4b8xKu +HTTP://r53fl98bazbqhc19-h-r.qif.AW/8sH0%59j%FF7/QPnw69%17Og9V9l/JAn2c7i/%7Fta3x/P%08HRF/ +qvpqmoa.O-0.FI/TDl%E6x1oUoACe/4VUZdMKL8Axud/JEZEF/KOR7Q7?ifYXMx@=&iI'!tR=p&k2Tv=Behew+RFW2c+w8NOK7+?BGH&:TYW.6(=H%B0Jvo9LvAy61V+YjewIUBKHe+lT543+BIss6Rz%25KTjd7+fOp-r+/PvG%fbP9kd4K02Z+IUXHyh&Lb1kab=FDdwA3_Z%81e&iiG=CVrO+1AhtbU1JSvh+Q;ay+Jb8c+%c1L%D4&m?r%0en=8S$wF&5JOA9WI=&kGJ=WjzqGX&Bew@sXE=cl4a+2S8 +http://jykpqk6.sc/VBPT/xNRs7JVoZKE/ +FTP://2w-y60heg64rnrmpyv43tpfhftxolu-5u.lG0BKW.LY/g%7aPAj5j/qxyE/D79g5vu/ +http://Unp.IR/tN;/bCXe/fxSdK%00%CFB5N/D0L1/bjf +[cf65:1F97:24b8:652a:FB12:D0F7:181.134.252.162]/1jXwBjjxpC/0zKR6N%0bhawVF +ftp://090.247.102.174/YZgWR%A1NP/f6YUa8dEOoOk/a7%59Geq +https://Zn.RE:31587/Vam%acYZniEPiY/lBfiLn%F1/dlHe@m0# +FILE:///FojXlCuj/OQXGX/JUHCBAF/TUAe8k7O/fnh8rautFH/e6%C2xGbsfELFVW%df/JKQk/gEO%589e7uMuM/SM%7dz%0chqvt%67/dc4fnbs%F3%5e/4rLtAbS +http://247e/qBmVNrd4AstGuk/JkV%50CBmmp%06/%a5E%34TAY%E7/5WL:W%CB%193Dr=cl9rn&/mA9%651nvah%63hV +qkwlh9jp618.k-x.de/xiraBM/6zj@AcW3NA/%CBeI4RpP5nz/FiWXIm/fy6YJd/n%006lFEE/uT7%284Q;fXK/a52ToS/w6jn4ZU4r8/:B~XHaw?G.cE=osg8k3&iGJ=V4&w1vL=me4QRwj&YFgq=%22zCDTqgmKC +fjrb5z774.SA/PVZsWyA3sMJrb14P%995vIm6/dC5=Hj7?cxCp=bZ(40%15pi +ftp://pd5mz0sw.53t.sent7dh.ki/U%57Qz9g?6/6TOmiq%6F/ +Http://g3t2w4.2AB0B.3eq7q.RE/fvvJYyHjd/%34FK%98WeZ/G5Ux06F2BDF/ +http://7Z0-0PC.txi2srk55gs1venx.uy +https://i6.kzdyaq-v3.9j78y.oq5r.gpm7oh.x1fnc78-tli.5yu2f.3hfnkcvwoms.hWRAX7TAJ.7ei.tt/Ysy-/sRl/LZa6nw8 +Iq7sp.vLK69LN.lr/hjB0EW3t5%36/lSVsKT%3CWsL-%ADA1p%0ffG/M1S;SyAVBO/EvzIxfZpicuo/dOst%DE%E1w +1lg7.sz/X@ENk92CPk/vVYJGN%act +ugk7-paad2cswwq3kd82lp9r7-i93galijy4x4.vatv4ag.va/Eww6Y1XABn/pC3%9BzjH1q:sB%89Mu/WdjiQ32H/LEaekIokSv1%E61s/Y~wQYu9v8yDqSatHO8F +http://Jmury.vc-wuwj.rn0o.ug/EhXMKL%64/CwKXyRnpk +HTTP://V7c6lvas-wtxspcp53z7o-v9dt13mpp7gc9ezt.MG/q986Xs3Fzpo5/6tQRek0/zkdJt%605DYH2j0aVfgcn +[0CFC::]/0611uPvtHJ +file:///viHNVlfm/4BICnFqFz3mXP/1%0dxeFn%AC +file:///ceic16R0Ht/b%AFXzo7oKlnID/v84LSyw/wBfvq3QVf/vuytS9wORE/tYsyN9i/msSNDC4Jt8/nPWzs35yu%ED/zvTeOit/uSVe?PyD +FTP://8GJ0QK.rQ8H0BIQZVFQQHPAWF7EVV12.LU/dLOis5Hvn/YEA%C5Z68E%50hS/Ie1Sx/ +FTP://bGCO.apov3z1nrv.ke/cM4fSVF?%ff/tWLPVByl0/ABCz7EZc3/R2b7U8o9JM6p76 +file:///2%f5tf%F7dSLdlRwws/qnKbcUOCCP72RTJ/WTc=Xn%B88/ +FILE:///n4riCnF +ftp://mQEGW184G.Hv3zhea6.ST/iW6mhdm/G9mpZUib4loe +file:/// +https://A0ea6aeynb4z3fsvnh4wg6h7.9bicz2zg2-695lf1uql14i2sjf6pqh1sae2j3k8iptes.57/jzHSQ%ebP5/%e3%9Chd/#VqMzFZrd%ddpe +6wmlp3ipb.cqi.ikf9wdku.arpa/dMq4GciIqW/aL%10jc%d5d%c4v +file:///lT?KC#nXl!iMB3hl +FTP://P9yyxqsh1rz2q-r7gp.h0W9VBZWGP.tk/gvbKQnzs/q1Gb +file:///7KTju7/x2t7Qen83hFitH +iawuqq99.AX/;aTO9WOuOPwl/UAbRoxCcv4 +http://h-juvh.3gtf/spUbB%2aq/#%9C2/LWN& +vj021lv-xpcrzcaibfgk0.ad/dVYoNrxc5/NVH90Y7CCv%4E/vITM8z%C4?P9Y6IZlhse=7w1CwndaDA%79PY+r4Wm+esuV +http://%d3fV6o@knpyxaoxorjk0xthy4c56-idtz3.i91eof5.mt/MM0jI8/mviceY%E9KnCQrwqA/xTTC@R/bgzg%6CfrsDT/uN8jUqZIRPdu9a27A/aNc%f4l1h9UUax#t4W~aw +qc6iz4vjp42.9IZ.l87y.4m79dnm6i.tqhva6e.dumzoy.GG/aNgCtk310/ltjBeHJh5uJx/XMIgU=CSzwD3D/ +http://p7E5E0.hhvqt56.ug/2p6%2Cb~bL/JIlK:TS/KKKGy +file:///3%aexrb7UdZ5GpR4ZIfoxwL/vQV%4a2zQxki/QRji6gHpMGgBaM/d%71A2CTpZv-kF0tD/Ig6roS8m4/~aA64OxN2yNDZ/fLLcgp%d0/He%98%b6JWoLAm/_aKE52/bcn8%06hs~If/IV9oQt%A1K +f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7 +https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/ +Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE +Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9 +t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x +ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR +file:///XoCg%EDVf/A3ibJYjU +i44X.a8H-WP.zgmnrjxq.NE/oL42aLwl/h1unIUx2m5mhir/ZjNqL;n +file:///KSPSz0d%734OBRur/v2feKz%7aC/SfV1syp +http://29SB.j6/ojVDhx/%A7e34T8%01L%41BNV?6uRxM%DFd=qg9jmHtW5R&EeR=%f9,mnV.cGVNclEM54f+efsLBpEc+3V7mIJi+Dng2-Qk9&t=VWC!+5gUmI&c4c0sX%51=%03?a3mDKm+4rHPsfb%dc +96.79.198.95/8JJUovS/ +file:///.LxM7EsLzp%d2/sOKzUh/IVX5Mw-PVormR +5r.uL9CQEBDLX.bn/?3z283zb=k&q%d8u%aeOKQs=s2Ixcyjmlg&%52=Fc68M+%F9JLUS+4XTt7ypy%881+knwx%3CF+CUc1ZNLx)K8Ht&Bks=*woVYK?GE&vv=P+b+W%134Flc6+%2e2w5%cfPu%5BXUS+PAAvb+@e/E +http://ol7ctcj1x.Ugk.na/jnDQG9WhW/r1cIpcqfGNMDWto0/DfPQlP +ftp://ico390kww0.it/g&kOEETBwQ0Xnfaz/pSA4oQJ/nU1WwWgH/u9TK%34Z/x5hXHtQAb +HTTP://iEYF-043APHCKLC7PX.qB28RKI5NNRTNJJ41MVKDI53GHXIMLM.BV/QBykbXcYpFg/zgpKZ/pVe2L5cYl0X1%37bmI2D/NIdWj_%EC6VE56mu%64M1sh%bfvNe/ +ftp://vb5vs.P5f5jmxq.sn:10748/gx%54N7WDo@FP%a9/aFd0z2V/6OCUikUdhs/F89CFSH6XHi9Pgt/CzM6Y3s0UZ/u8xukwK;type=d +File:///B5dOvjHOOe/oUJYD5/zgi4jw%54XPx=S4NV8R21Bo3u%d5/Mbd0rcFk/%5cPig5 +FTP://ebibm0spm7.cat/aalird/1v6GldpVgXA/9akBrbVRE/FbH97%67/YfhOfgG/gPiGQb%D6?AodiI#nTfAhiF1 +http://[9396:d59e:191::f7aa]/isqQk3jC/js7gnxrTJLFX/ +HTTP://k5ifny.sa:32595/8XvVVW6Tp37x/IF0IkevEa9jqkw/58g3p/MZB%94sVPjmF7/wZD0BUp?N6P1o=nH:%5840TZNN%37eJ+AJXoM5t7+UhR&%3FCC(O96dC=e2Zqj-YxOMwv +2hr.p5v.6aqidmeffi.flfqfx2znf.cup605.v6ktei.mi6.AQ/ky~LSgBJ/3JZhLix/blFeDQRn +gtf7abvdn9i7cr2e.YE/-1vj3Mw/P%CEXiCFd2a9/vm +http://3rsqw6jt.cv/n5e9YJBevO5c%6e4rW%a8/iKy-raSDu/.j6BTI6/CZR%f7I=Qmfr%dd/#xTHGb9RTWP%c9H31p3 +file:///S0Vmb2/JccbhGwccE=w/sgSbbJh/2OjHXikwMAVk/V1l0~FYdw +file:///5fXz1pJg/G%A6MIr2J/6gwHl%1C%55Xx/xHPZg7hEg5BzqAVzK.gM65L +File:///SxZ0jN1/C7FaB/Q63Jxn/QGzG%CEcYzLq7sWLWF/tD%3c1aukYV +file:///T8krlfICzWYr%e6/xGDI6sWJ/jCXF%87zmV6 +ftp://csanc.mz:27249/Q4ci9eH/uQLFb8ZVrjYbaCS8/sNzv%8DY1Xapc +file:///P7Ub83hzju +HTTP://q6-aoovoq.j-joev5ivayrom1t474xlqxrfro.xn--wgbh1c/WiS76Kh&O/IDDo916%22Vp4/iZYdp?%66lk%24ke=&OGXRBNTxne-Rc1i9b1=b2DcK&Lyuxv=&%5bF= +file:/// +2cc16zv4u31wx-edyjiy.cz/voFy:f8~/9kCAM1/1i8r969t&%53/V;exvHAKlZm5g/J85xEKDBR4yY/@%8dUYyVS%4e%3B%B2m/W5AXsrDE0i/#ivl39=VdW +https://73ll5al.MO:10068/5K%AAf0p/#5deD$x1 +FILE:///a0esBQEE/ +qnta8.f9284.5pvu.af/tHEFme/OOQl%E9GOt/xuKnPxLGVEf%D8#LfL +File:///Vg9klGYqV%f0f9p +[1112:D95A::f9fa:5258:6AD4:3c08]/tAHstaKl7bvDJ/Hm3zObt/qSQiJ1FD/ff6EP/YLR%71gk/Qm%98XlJqp/B5%31GicO +http://[f34d:a4fc:b932::631B:2C2E]/F8CJ0o2L5/hNITi9 +http://fp8bh.zm/R5WFY9BBHOmi3/OyhE6XN/7tZGprtgW#hrKj +mAIE.mXK.qq.3WVWRXC8BASM2NX8GRC-L7O.nz/l%E8SjQ/D8iYe/2Qi&C3RMJppB%88b +https://smj0v/Z8B/%96%A4mzAT/eixQJ/v%D3HDtup +ftp://J-b0a7i1grxbx.gt/MuPMg3Ly/r2iyJo4R4opO1Xj%C6 +vbhx1cl9dgl-asht.lDN0ESMI.RO/A474Sw/mcZtSSvta/ZvpyTJ/OFCSmNJ +file:///pedpH/COpc9b/gtm%d0EBmRz +[B91A:258f:095f:5755:86C9:7989:2DC3:B052]/%ecPvKuwpKpSQ9ANsta/%ac=jmcQsb48Rfo/bWIMfqk/dUQF5ms%d7/6Em91E&z78/uGC9e%53/Cleb%23zyGMVzOe/Rg4teS +Http://[725A:9A3E:2F98::9109:5272]/ijhUpBG-1FS%73%D3 +gmamwxo2.0z8rwjft28enmc.p-5uyn.u6E6AXVBP.ph/gBkpM4WFysjoV/X591ak/tIRMD.t5y766HT%5EX/RSb0a/Nw +https://mxfwd.gg/uwsX4/vnVUhsd/igwlpT%bahLI4;P0 +https://9g5pjef-db.Mq0tfjbmqomp84hi.rf97xmi3834.403gi.TC/sLVqu3UG4/OYh%98SQXVXf7Cp/j%deBNpZoEfAD60RV?wv%90PcN9VQR4g1=H9Q5pv&4C=aZ%a7l&B5hpDGtJ5E=%85NY +Zg2x0pwfg3xo38fwn-5rriv520uccxjuyrxov9cig.fcr1xxh8.cat/hQOVnH-6u03Wc/pqtgVxVOnlza/6I7b3Cv/8L%20%820/2GVQbVTA/FoUjDrsNT +file:///aQa%A8K1SpUF3R/DRHzEQarZC/WpL%4a~dPnH +FILE:///7TVlhAH/kRBTpgn2/HbYFSHYnrazY5Pq +FILE:///wC97%71cxvYq/%16?cNGP/ +file:///u%7BQA%909Et%edmf6X/J%44H591v4iAHpgc/qeuedAPm7Moi/dE5xiL8W/%52DLIO%B1vY4h/A%1DIi3 +Ftp://3ZBZ/YmeJ68Qq/%E8%74X5e%18/QNyU/ +https://R@lyd1.xtccruqswon.GR/oHPO%79jfl1/rFfct/TI4I5pfjn +file://Rcpx7se8pzp4sj8ooxrlfyi.cpj--z.tl/ZQtA5b0%8F%665G/RTr%2BytU/4C.hmyu8/F1hcJ/PiHi4c%16VEN/66dIi +ftp://wDIXDXTT.vg/eCSU%14/7My9QiLZjNwKRh1/pd16vIBrmG/sXqjHnSFyE%03HA65WCMRaJGunYbT +http://[fcf7:4e45:3CD7:4B2B::]/ZbLeVZi/mjJ6/LMTBU/V4%e0nMMUsY#'aLkxlcFi5 +ftp://k2.jALPBG.XN--MGBERP4A5D4AR/NyVb%E0rdacdy/KQxWB%0DFc/Ruh62/qApiRp%fcc7NqG5P/FQd6Yw8Hi +ftp://sjfzvidjcj.ae:55965/r7feW9uA/33qU0/BKlBWEwBw/w3nSd +ftp://2k5.lfssxj9iatcd3056j-rq0/Bq8-ZY8byN/Skg1r%290%40%23/X51QAJ7U/H7Ir4nHaQ8?QOW +http://ip0176.JM/LthE/E04n2pcGJV?P8=dCpb%e3q +ftp://072.017.130.122:58513/6P9dqEIAxnvathxK/GHoR0X%5F%8fU/%ffANo7hT%dcKY%dc%B3%75pXy +[3157:621E::]/CmIefnv.v91v/I%E6OmZLafDS/a7JoSqx80BC9/iSPk18UXH/g6xdyYNSlT8/o34wEX?MLP%993E=%1Fao&nRDo=6svN8+d%4Bq%30jky%75psOKb+h +FTP://zbtd.0doxocs/sDrr5d5i/%6cJnyS/5K8mb;TYPE=D +http://1vkic.cmd-efq.st/%937ikPpb/eZh_3dIzXbtNFVxL9nQ1/7bVwDiamdDs;8zgSZ +file:///YTllDP/IhzDW/%00H9e1IWG4%42%93bP/UCdd~o +ftp://ksd4b3w04c5nk5aasoepqdby-9w.sl/pNe8wJ2LkrJZ/XJSanvU/ +http://oPYQ.nd-egq1mkgtuwt4ei1ax.GQ/JRpv +ftp://171.235.253.31/gop3Q%bcUoW1/38aPN? +File:///XoULHUnTn/zYp/#SlAGu +0kx1j6uf.QA/lhgydNvB/jU%B4oWUd%842;n/zo%63SywbGAgc/c2LB/wV8n/ +FILE:///kcboy@/9goeE7Q +tD6HUNLHK3.u-06.FR/WwW%7f/1HS0pUTG +Http://c82m23a-5oprsol87jurs142tzex3957m9nrufva0sc6gdo3pajic8po.H5m3wt.1RU:11878/Odij%A65n/Am~mzHC/#ArdWk8 +Http://cd1.es/w~Uc%455aE_/wVJKfr0/X3vnA/ImG6Z +http://5ect9i8665yca.FJ/ylKD5bCODpHQ/lbunoK/%98004LI_w/HwTFV/4@O9_DiwGb0Ig9#B8z%90jjivO +file:///IDE/mEZee3/1B5W9drK +http://wka3.GM/%95yhyVy9#FFld%0CZGoiP +file:///nAL4tAgn/UK?mpt4IE/.2JW4Ej%28uiG/LulMqnbE5 +ftp://973k1fnytm6y9hx87p42k.1whc75.PS:59063/nxryc0E/ooGHQtw3ik5/6fU4vZmZNZ10If#iFXkFxd +File:///YTIL%AADxyn/exqQCc/HrBwtj3/DIOgKT4YUu +http://3ucol3f.lr77xtr.LK/FNsRpDDW=/76bEzBTI/q30mQZ/ +9sb.7mct69t.ar/WpXcM8498S4F#k@L:'L +ftp://3qn.XN--P1AI/PdBsWGhCy/QSZ%06xb6atX%7eXtqSy +file:///t%48r6pvw/gTme80:slEt/ciBvu19 +File:///8rjryYe +https://[887d:5086:CAA6::DA5B:192.032.127.177]/ +File:///v%2CCgt3%32kh5ZJx/~kf8WDLeR3XmmY6ap/.DEZNJ-ylM +file:///KNINXVO67tBU/VWJdbMVH%a7uqRO9%ad/55Wlt5O41e?/YGhF4Fm +file:///zYYquoqz/%240zKPi/@k9J&epm2dka +7JUE8WA7CLBX6ETD8KUU16AFZHHS234NORX.tep69aqao2.int/iZjrUNXtQfBaF/Z%A87tU/XfvTnCVEY%00/FUyeI05%f4#?hZ +file:///1?Msuc%BD1/G1%33Ppp/F2Sv%0EJIBnPzEUu32/81nqxxTk1HPO/7pyYlewH7gyw +HTTPS://hdtgt38onqh18-617otg7tn-ut6f49po3gaajt47.m4O26.rwko060q21o.Am497x0kow-u.TN/nZX955o/JtBhKlvv3r +ftp://28.118.125.16/3j69z80kruR/TXIM6gQFdZTCI/T52CULszlqMQ#%C3OT__%57 +ftp://y8K1P5I8E/c2Xa7CmI%d6TWC +225.022.162.113/ZF58s/%CE%56BA5rQPOLU/AUNP8rG/w8SHG%d0FVsZX8dC +X6eygmy.1a-mtt.ki/WC9%a6/GH9mNozOi +94h6rdisa-eh.CH:8242/I8Ik5%42881r/EsVYPHYT/Jw7%3A2%2778ggZ8u%60 +Http://89.pa/%65ssgG1L:fKtE/PrmY6WoXW/oYH2AfHjf/uVaFyqn%ee0o%4fAh3 +file:///KwM8U1%EBR6J/K.asJbs0/i1vCxd/ZthOZxt0IKQEH/#x:Q8vtaIw +http://rP6.Ewrowee5k83.COM/5CId/KVp%FE +ftp://l8AAQ4XL0X0HO6MF7.9d.tw/%98Vb%117Uy4/KyUMl9 +Q293qtnuw.vi/6fi1J47ebQ/d2EC4A5OM%FF9_tUNs/dk=?YyGXS=&El=i&Go%cb=fb8&7W95=Cg49VW7B+B3dDs+f'fhi2+6QLTS%bbuJ+IN8+1PE7QyfjCX7tY%7D+cGm4+JkozC,0y+SEO%ac&V1pkpm0GF=0%46pvcEyU2G+2%F5kBuG +2pu1.mv/3uiG%445F~s/%5CTa0YXuNMsqV/AwE3d +file:///jIjyqNR/CBgOXsf%8fYiqCR/ +Voiuuc65jm4ven-9li9.mii5.0h5xt6.KE/qachnQB/nsC%4ai/juYvC3yTiCp%06S8I/LLVvQY#p1jmTyx@W +Ftp://ydhhq20m.MY/%ADNIfcLl66t1fl/v4%a60h/N6My%9AKXUvToMFxY/ +14.21M1I.NU/iqlGVazIWPCvV/oelkORYd3Iwsdy%0D/LcdN7U +file:/// +https://07zje.j84g-9lx-673h.vwr.km/h2Dv%1BFR%9d/NV05FON%c9/klLPUVUcp/LRlEGREG3H +[836e:5fb9:0cda::D9A5]/n2j/Kjy0BzJ7Cj/GoW1ksyHG%B5A8tw;v/hIg4F;R%2Ax8nL/d1aHG5Vsb/VNMIiMx +[E69:a743:5C18:C43F:780d:FDD0:EBC8:2ce9]/uAWRrcx +ftp://B3fvr.l5GW6REKV.GI/0qT%dbwWVXZ/3kdb0/kBQuFu/R@9WXH0 +Ftp://a4gdplaw.TP/zyf2c37ZfY/QaiwZ3l/CUi9.ado/ +8L.vg/LjRJZ/z7/Fkg9dwmTDSp +T7wos.u6I.cJP-5HQQCA.9dutej.SG/6McEZ0 +jJ0D1X6C5CCNWYGOCI4NNFC5A5NYJZTCW65DHS.d1yxpq.TC/EQ%DBYuIdBv +File:///YGxWV18/%B2bnYvE/COmzr%B0YLEB8/%75L%c5ym2Hw +HTTP://nzhfr.Mlrs1k026k.KN/~bhI#qqgVS5YR +https://z9z6ip.INT/1%1dXkN1P/KI52I/yo%FD13SoZz0?:z'X3xwoS=1y&lmDOOEVzwHn2j=xfbMj%67cy#bKedfyI1 +FTP://aysc5.8i8kj7.cu/Ule%55%F0l/HV%7FNXdQfhjf0/ +file:///UZg7IFvJd/U%6cAH%59cS/dQjA9gM3RIJ/cW7Kuo/lBGa1%B3Hjf2aN&/ +file:///TPkfDWADgMp/9cr6zwO%38cZPtrql/w3GqL/nrvKR6Kq91#s5F4qQMjYx9 +http://1co-4k.zzzqb.XN--KGBECHTV/WRGpnKFny/eBiU%BDapp/0cb5bJ5%24J8a#N*cE%e4BmH3Jse?2 +n7q2q9b.3-ve593.eb368oe.si/xsA7jCLE%5CRj/gEfwCC/W21RJFHtG7td/fSZIiv/6mJkJcnid/xFjV%DF8pXhf:H/vh4Z3%efgdOJkeT6sTC/wUOxqbX +ftp://[7D66::]/m:wnkiFBKJR/7c8a3te/mQqS6ZDWbfTXtZ9 +FILE:///%41PSndZFnAZNuF35izYcj9Jmt/aoJ8K6/nGtfymyBi/ +008.245.185.106/0Aq3gb85/6TZk7/PVTk%b1G80 +ftp://90.188.10.180/fgsPUVSAEgMuLwrpxg/8QEjGiNEHN/pxjBgdVV/bkiEKy +5yxzap84dz3lccndx3xoj0zcwepy9ujq4bk-ckyo63.si/%E89rzFXG/htVDvVdD11S/SLLVce1/%5bgcDSkD +file:///Mr +dm83f2l.vvlpnpob.7si.cr/RFT%18uMgARxsP/8%61%7cO/eZtPUg%e5FavR0XRe9wZZ?c94ub=63r5 +file:///cdgSAblie +http://[5b83::58CE:d882:36F7:8b56:11D4:f42f]/9mbBwV%C4/AI2q64JsNqHO?tZ3=nATs%3CQ&lbSzuIb=/IJtfPRbcu +ftp://gOD0KB6HB8JDGK56.l-V4OW.sj/KqqiLzCu%6a3jexLbLB/%6dBHZb%29z72YF/ +http://s65E1E.TR/5sj4rIdUt%CF4F +ftp://[0f52:d55d:5574:ee10::dc96]/dPEbp7/PG0Nfo/MVx3/%5Fzz8%CFXb +bdctmj.vzaax2fe.j8S2.ojfq-b1m454.g7I.uy/o0%28WV/Bv9nDwD +https://k233JLHW6N.cCA13HZAXR.laiu78y.fleptcf.brva6c.osod.GS/OB5inpGTj=gGI/YNi3_gNnIg/J8UObWz6z +ftp://enokmi/r3%690T0H5mfdRq +http://s59w.cg/nJoM7yv/Z2T9Xof0hNGhl/N0%6b5Sbrbtjj/ +ftp://qytw0h.hkdt2rm.gd/3a1WJDglP%cfZ +Q-2pgsvifg.yr2ix-c4avrjwva.kn/_zD8ad/%8AVwQwOG/JMC314h/rO0qj%88?w0XEY=JUigA33U&f2=n3tXrMH74ApC&fx%BE0=b%d5mgX%7F&1gjjJpHG=vLHCZ0Z8&sYQBW%FFAIs='&zD=GTnVzkf8Yn%a3L&Xm%b9F%32EcwWl8=GUq +File:///spqq/8F2dG +1Z73HWVULIKOO5WJ.rEJGR9.nsscy.gf/rHEt;i5T/%50ZjYYJ3M%4dR/WlW0C48ocnb/NRA~0M# +078.104.235.053/8KqfxznOtxC/ycYiTG3%11zP2%A1/hhbuX9Z%d403wES6/P0gg5%94 +FTP://58vs5.g0.tHI.gq/N4HSp%95jtMMNr/bpH36W/cC3oAe1C/Sp7gxd/XO7JSqE +http://e8CYICG-3GD1Z7A0V121.Ya0j.Wy.CM/BLyz1kmpRF/nb6u%52/GpXGTv19#9?bwz +File:///Mze0xLtXpPFW&x/_%0aYP7o4Fm/5&809/fsvOYyn~zvJbT +file://V-jo70zmqrppoeyva0hm6x10y.UK/#3O9f0OYdx +file:///K4BV8xTq%ccORyFI/8PzAVSZeBNFX%adT +071.247.240.193/%94VOUi%ac +27r2mghslc2b.Dwbpiqi8q.gTYSL3Z.am/RU80/KFcctLv/R8tG8d51EaD&pno5r7pDR#GWY +mdfr2j.1FZFG4.VN/Xn6l%6dLWufM/I4FHTzlnWx%7BoI/ueeKx%03mfSA/%9a3PMEt.iSdeTVFgSnLi%C84m/6dh +http://H4jk06c6mtprgjywnc40mjri05a.VA/7B%C0h%4fCjj80/TrN5HugANCZu/eMVdn4en/QUSLGhe?7yjqzvzv2r%b0I=&p%C32*HvmS%39g=wb8u&lTvA=FCGNF46U+?Ak.vpCAV%ceiK0f +file:///cVjI9Ue/siOD/jynyp9%3FmBx +http://u8ic-x8o.UY/G9pZcTp/JI58N +file:///cCOIlZV8ms/Y%e97nfvexWwxq%00/iPxdyY/snHA2QZT%10 +ftp://53.151.134.240/uZqGXLUIu-J/=%0C2pO/PvL0%19MpQBv/ +FILE:///Kywof5D5q/0TRS/zayrkrnENB +file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ +mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs +g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P +file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB +file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL +file:///mJM%a1/jv5%53QDqE/bFMu0CBp +[a0e6::]/YR5lwpHlG5BPjr2XT/Pq%e4kWAmZ/ucI10P1 +File:///8YorWt/#ToazT-v +http://2igfcm3qy.wlcgdxv-xat059qnx15a7qp-p-p5oph1c8.GP/hS4Aqy7SmODbaOH +3s81j.TJ/pS9Jzw8:NWryq/%00Kh1/Y7Rfoo7haw?pYq7Efg= +HTTP://k59s6i5o.my/v9%93qqGOWZ6RN/cdz6V4ly7nM9A/F4EhM0N2%53H/d%C4wWTDspWU/zfpMcIDWp#oO%6fSILRH +lvh-kt.TN/xZghTR/yDiD0a/P5D2%37rFa?rseH*%33ubfv3=%36ntM9MP,+97RbF5&F3Ia3L=%3djrAi%f7E2%65iQ+Uc43&y;Ikw=vdfmJW&sE_%F6xpm=XFIfCsT&k@ctNa=%47KDJKEw&d=am6K&%25!BjLNa=iqs.l +http://Lhe7w4f06qt8tif2af1k6s552hlbk.mfce.cc/DEqiQf/GLpkeKZAxhSO4m +Zy-iit.Cth-tuvx4.au/dl6DMUqP/wAeKXt6 +File:///35GJ%C8m6ubg/kpI4iEEx +dbe.gkg.EDU/cJ%fbQ3k7pwp5/arlH%DCD +Ftp://e8ni0.5etxvrjvn491/tP8r:UC/faEdqs4P/v4zJax4 +https://4PI.gg/fFtQoVp/b6Jf55/YEc2l7dE%CA +http://gpu16lz.LS/9e%daJrwQfHEpFvsZ3jx/c4STIJ/CmvEGAUx9f/ +file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc +HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt +5.Piba4ac.JE/55M1H/AZXdj +m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/ +ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/ +hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ +Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD +file:///NJvRsBjo/IECCGBvb +http://8-6wji0x.tCVT41X.k1PS.15p.SH/e%daVn5b%f6/GpIJ%65e6/VpeXUmg#FRgJm0E +ftp://nx4kcydiztae7fr0y-2kfppteds.gq06u.cr/RITrTqm/VqRIYR/6psgA0%dfpfg/gcLyL1/xa%72QCL;type=i +file:///M0WBSuI2qsMuKSfOzj5S/2N7x7nZg/BLtq%72VxjcR/5%EAn1%c6TYYPGe/Lb5Mtu +http://94MNP6XNH.0mgqklz3t9g2xl89x81-a3hifmff89nahy62jeyhuhe8lhkuafizl.GQ/Ajpa4Z1D0o/aVv748s/NAIWCkWCD2hj/7MZS5c79DmL4/ieQ%21gw?oEPqIN=Pm9nPx54%c1&j1y=C +ftp://rKI.COOP/v0pdu1zj/ir2UM4X/7k04jhOKPVN/7ua%E5y8p/bl~yS +d-IJA.PS/drbtmJGFEbR0OzDD/wMV2C/krWmMUV85/0AFhGe9 +[D1BF:D02E:140C:4B9F:c86e:9fdf:077.173.119.180]/A07Ox%86Oae/yhjXUMut +http://A.bi/J1GPah/OT741dJ/Jh3Z0xb3 +ftp://6VMV.t680F6.ijsru3.bm/vlJmkK/go28Jr/qUtmHmqhj/ykeAVxYoe +HTTPS://oi%32Yp.@a4mk0.Teyu0lojs62d8l96qiym2v477ixatleasrgft4ttpbfel9r.BW +x37MULG.514yrp5.Vrd68eeufzt.VA/fFMWutSw0d/Gr%BFun3/JH6%DESQV8f#gn+NM2 +http://2.88.82.235/6bhV%BFGDy%ABd/g84ly25/;4AeID# +https://a860jcplfoodo0yq401cdf9.1ZE2P/NLArIzMZ%8B/6UiHWMMGS79/?4N=4U%1dM0qA31&faSM=0q2RaEJu5QT+vzNMp+XR%7dI4dQ+x+%0BawIYp%dbcBiOZ*Sc +ftp://lb.NP:46239/xwyAL/m74%9fqj4gttFLg/ +s086j1-9.Nowi9s.fm/16zr3s/mvzfyWbB5/&1mzA:X-3 +eigz5dhw.jynsrju0t044lcc.3c3bfm.int/%ffoZ_kP%5cO1ls76B/pQbPDb4s%4E6i/bqqrZ%b7j0uhrgIHd/eBdSEwfGrX/PSmYMzg0%6F?Qr%92y11b3=&L;5CV=zJao%31Tmm +65-ihklk4j6m.f3CFA.7kj.qa9rcww7uefzkpxbf87ni28b4a1i9rjqy9a.5texnqlc9.cu/p%CDK%b1%449LH/IiLqpww/HmACJI/r46TA4 +133.38.197.20/pbgvKM6W%BCEBN/Cvcu0&#idQDycc +https://4I2GL/cGtyrs/%A8m5%3fekPsTRWlB2?rn=63P,EJu+SQ1W+uPySU8pvA+%f2+m+CwuUokAVfo+3nzWcQ+S+iXvEuhcv+d$h%7fy%cfMB +HTTP://a0br.o0gvxf.kp/zZkWq5hfxy/q0x-g0In#bd%1anKx27 +ftp://[1327::117.246.244.220]/%91y4%09/ +ktefq.GB/uTzbgV/9nYvIs%8412/ynKYs/YwBOWmj +File:///08bP/cw3Ydr5Cyow%273h:O3Bcok/0hIP@/ +[018E:4459:9892:3770:3826:71D8::]/UcHNufii29UtPW%56WQ1%20V/ybjTB/oUWWQ?yUg1%cb4A=wk+hOic7f7Sw +ftp://1o2z/4UWsX/uSzHOw3JTrqy/TqZhkQk%62gZ/FpK/ +Http://kZYPZSRN.1m.UA/QN9n3Nw8kPAgkCB/SzdVcxryKou7mMG#p6at77 +http://se9g.s7-5qnlmsi0npbr8ouxuey3y66swspkl.y4.st/xfP7%066uXWuOu/clIFhy +ftp://D4j9grnngs4a61b.im/f35gw%53rTeI5/#Ff7A0YMs9RG8t +https://zujspr.cr/zy14P7FG3/Oxznfe/P2zpT%38S%FFVfP95Lh/nJJgzX/kcVuHCzV?Y5vMC=3X4n%9dMqeGjM+OjgETPdf%23b1+6H%47F+waIQ&,ZxQh4G%8AZv=ic+fQWQN+0y%523JTe0Ti#OA0m6iC +http://141.171.118.17/VLnEb4Y +https://sla.aowts.MQ/KbP3AV@wXFSgz/TauvS9f2/zvGpvN.e8a2Kw1ho?jYRUP=L_IAzw&cj0ux=xz&lrA%8bS56%A9=SX7NjQ +file:/// +FTP://h6.MG/XPmpsZk1h%0B +http://Dh4mlm:8000/k9TYvw/EWxlz4%97lBf9oK57N=Z#Pm63s +https://8-lno5.KM/Uco2E%dbYPx~/MzKrkZ/rDpXB7OWtD?Wb1W=bKJazR+yRD6c+qwe+H3bo2ACXXzkVX+PdfgOJ1Sqm40+X%3D)%AEgm8I9&inwrA=%FCe+%f9Xo4S+JrcmiNbPwa7P94J&fMCr;NellUf8=K&lhgC1k=%32CPUA6&%dexj,m=l +http://bske9znh5z.mq/rF739Qhneaet/NTfzZn +http://B7z94v/ +FTP://p9s.hh313n.6k3.DO/xaRRXPre +File:///Sn7Qzu4cDoJY/6AdR%8ccbeeFmXy/KRXtibcbXtTaLZt-bb/PISQN%777zoI +FILE:///IfZ6yalAm/BoIjbMXLnlo +file:///kFKgAORyDOV +file:///f0l1v94Rmms/zIVjJg%338Fy/5tMPO618wd +FILE:///fpbiT?6/%0B7dUkWR5r%AErqLW/v2n%bet%b3wV8Yzi80OJ.SguK/vBMyQaKiH8/Wy3l7r/D%B8Vp%51GgmqIBUHA/9gn1:46Xok/NcNIZ/FIK%359u%57/%35NvYIQIN/ +FTP://22A1D0QMF.cmcve.CC/cvkZF/H%4EkZr%39EjtfIO/LPx46D%5AgqR9 +File:///0Lld-DX/&Qmx07f/Zp%21ldGQq +http://rlch.COOP/%bcKE55hwH6/CKHB%2Ak/Qzsn2Rn1p3RUc3H +http://h6d5js.edu/IO%34xTQYL/OtYPRaY5/e0ILXZt/jNP2%07otUg/vGyq3xN/DC8P4ckE/JGfiUR5EfFk/vSlxbi5dKL8d/6JwRI +FTP://Sho0e4ay9e.XN--KGBECHTV:41333/6_5S71YpwTC +file:///HrmxzTn/sozw%db8Jz/x0czCVWgklrbV1Kf@IK/Um%78PuxjtjI/ +FTP://9m4b5lf0.Y5dnwnduzx9wha22ayztin-t7hng5b62e07rzsv55325xgdrzwx.gov/pmG%45dhnQZ +ftp://t2ik0rgw.krjz72-l.xn--mgbaam7a8h/I%19KxMhY/FSau72W7/WkW/vYKyDkhzNiu&Bput +FTP://[221d::]/BOKtvhabe/b%78z/piR8RBZb +Http://5zwdz3h27.q9l27mto-5v0i3i1yu8oyl.TN/wk91N/X32rxh/cmM%01iQPnCulto/ +FTP://gWUFGOXE8EW.1g9vse.xn--wgbh1c/ncQo%42ihY/Tyk216/;type=d#J4A9HEH +FTP://5wudd.ga:36706/W5a2PQ/%98Oin@%D5hjD/POMMY0b/HhPA4HL;type=i +file:///E01b%6ew/8QW%66%16Un/PWDGTFrQUHJ#dk&o~V40 +ftp://p78orte1aiif9.zk-l-n5drgvx2kj6i9e034ck587-utyikjhal.qE5RJ031K2FAN-35.v71jyg8l/wgwpnw5/1WPLlSc8/3RZzlIEZMlC8/ytaOFdSuPKO%72T +tri9.Fyhn.SU/YlvVjSi3M/ylMdK88iRo%d8/cuHyS5Am1oeQ/XM40zgdj/q%9CLKm9Q/IOwvLrlTi?nDUET=e95%a3qf&dSTE=X5aY&pWtb=&AS48RI=71Z91stUL8Oc&z1%B6=fVvMzZUyI+Niwre%5FXyVRF&QtAo=5 +Ftp://Kroc.Ls4-tkd7.sg:58219/9tq-FJyL?Qb/e0alokGZ2/MKTHP3Wsw +pmg4ty.m59480p2f69.fV.COM/X98xZ.E/cTleUeS/9P6zeVQjfd30/eVVvE4/Zyxm1SSqe9u/WP%a5hS +6P.BD/du%F8CoA/W0jyU5x6HXyVB/EOpU%0BP%BET/TBlhd%772ObORj/PNPXkVHaEY +http://5BCY.X3.SG/N~63s98IV2/?KuYCn%3160U5h:%BCU%DD='6uk3OyUbosbcu+l7U89Ozt12K+P/VK4+GhwEZ+D7Z5ByEYxG&8=#aa7R7i~K +https://38yyrnu.UY/8Kl08k%157n9p/TEeDKN/qQnmQFd +http://5PXM48/G%9fUxcBwBjXI0/1UJen/MF%30I6/eOsMzFMiM +Http://s8AL.rc94r4iftx7qeg4cbjjv5.za/mYk9UAydyn4q@w/T7K/dd%8aIXPp +Http://130.165.027.114/o8bwef/X%70neu3uGKY/NU%f8xTKW0;hTKK/V;%edBnJYWG0MI/ZlDMtVPK7?k1N:WnR=%3DNffenC%67+sf(z0U!mZFe+6YqpF0Ei4l&kea=&pv=0FrYO&%69j0HYlx=HVIq&sWgaQHZnyxp;=%97SOx&QbgYd=72tO&ugOWlP=TaHT&Zg5o=c,2tzpy&Xr=Nltupn6k&nxkPS%10oJY%74jL8=5c%58%77#E92Lme88eh +sat8a.cc/n:G5Bs4/%92Qx7YH/%933F68jWsdw/mgMLj/b9uFtDS/fCBe=77/LYHeH +file:///8NiXGOZYq +ftp://[14A4::]/6gQ%83ppX66/Fm%0fhsGDdq86c52B2AReDTW/CGafhb/4LAIXfs6vOHd/DHtw5%A1 +http://astx.i8o5jdypn1ly.LC +Ftp://7j.N@Ptavog8.gh/%FDJUUJB/nrC6%4as/AM2BxLCU:fGwm +file:///LD3OAKQVR +http://jVVR4GZ.BG/XELY1/P=cusbVv5o +HTTP://4fx.3kt642w.GF/k4Nruf/hyO_xzJ%982n/BhxTVE5LR/VT7cIG%66726zz/YQCAvC/eTYPd%2Af%18tPt6Y +ftp://1py.jhl5-h.53.39PN2C.xN.ps/Q6kM9aOm7 +1MRTJ51.mh/OT +file:///RlgHP4tRuBYzCPY/ +http://[8F09:703a:5b45:F653:AB26::]/C51LFNl/tS8p/yG8y53@Wb?eBrhL=%f0Rj:Vl#%11Z +FILE:///TmzdtWFH/1WP2R%b3nSKls +http://5o0a8epm-rx6n67ta82256jav-nk4.lb/HbOqUc/TIVeqJ7Ohp/BjDwRDKJ/JZO +File:///AvnO.7k/P0YrByEN2yEm9%1646/QKj7fR2/%1F0JYW0y/qscsiKGeGfPA/1rkuJyne%12/ +File:///1Hm4/bcNXO0cG%45XJo4RK4/SQGEP5/ELAGqI +file://4jc3bg.zs/WfjCr2aeWME/Nv4A4B/invk2d1h +Vj1.Ngq.LI/FR2%b7RU_z%a1Tf2vy/rysXmZ0/ +Ftp://wkws.yi8srfw.tm/sWvr8nVIPq3lD%16r71KGXZx/zTdcV/N%02%6ER5gChmS/uxEJA26q +Https://cf3-0aw-g8zmm-k.AO/mYGm9AqQW%E4q?6u=&rX= +8vv-rhcodmrr42jd6zmrnl7xa.F1igvm2.RO?rQOIRt=Q&Z8=1WyCZjZv83+lpB%7a +Http://009.130.112.154:65403/z6iLA6cr/%3edXQdq1/yHKzFjDA3nAKTr/Ot4A3f%4DIzccRDaDQcC +hwpmi.upmzdzzhsrz.e469.ee/SXdNeY7NHR6/Vr6%FDr +http://[C7E7:57e7:b08c:9FCD:4B77:4de1:229.020.164.172]/LnIzKLn/StXMmto +Http://2-6SB2KV8V8MV290SIC08D9J7-IRM9FTPC8ZZ.hwo9el74qqv1.zm/tr9K2BSFkbU-A8wJR/CGEL_82/cnMuBB%a3j34 +file:///fUtCm%b6qNK/lltu?NvBAhM/sJ8pOm:/jJ18OTM6U%f5v%3f/ +http://76OXC.pn.GA:15181/OPErhH1cHtl1ba/eIPkR6%1EG/8fVd02k/Ky%b0D5izq4k +ftp://154.108.127.0/vGpMboeazp05/usfmVeitt0pf3o/Ue4OMVT/sJ9BAYSLje +ftp://ivbv0.zCR-0J.lku/6m26/7tElM/%b2%0BI.Ft5AjDVp/oWyMVmsG/3%8E1FE8Y/0zdIl/m3otUSQeI7 +file:///0Y7NWf4qwhw9wXP/6ll5YWM55W%9050rPeqawX%F9/HleEmM +5LUX-O.q-33d.tn/smzXQJn3H/81mg%4de_/jb%97hT +http://84W32/CCKpkt/c0bqCnoQ5Y +ftp://nyqaz.MT/0OfOsU7S1H9BM/OjhdD/izbR4txUY +8wo2j2c1z9s.ef2ki0mlvvnjm5vfyu.t5a-yb41uykgo5kn1qxzffhz667dty8mytg6ir7os9hoxwm2.mw/%39FEVmD/%a4qRT5W5qW.yR/8XB9NHyB/ +http://rbf6ezzlhpe.hk/%0DK8/IXXJAsC?mV8vvDI8K=6t9%6EG1Dt+M7N+D5n@Vd79n%d8E+gj+ofnZ%16loobN+f3-S+e,IH&lnh= +wu3w.0J5.lv/m9IZaWkw5/xY2%54pNYS9HL/Nhfns/e%bat2cKM/cUXgRzm2Srdt/2s2u/9h8zjwh929Bnp +https://209.73.217.17/dJvsqDH/RH6Ok_eSc8wO5/BOJws6/9f0DvXJ4/?%ea'Fx=P&6h3zz3eGCtK=4MF76p7Em +jfajtdt5k6gu11la2jbih.MA/zcaTNUL/3q%31eLT%bc3S/L6v2rt/WtbA0%45~TIvPD +ftp://Defi-z.gr:16993/=7IIaMpVy3OLs/QtQD7qF5Vr/=RVbNDH8/y3oUHmX.v/Td%dcbiGlArA%720 +ftp://[544f:e60a::8772:D633:DA1F:081.021.019.189]:62615/%CB6Wy1K/X%0EcoPQ/IgnCMLPynfx/fdFHb +ftp://1INQM6.4y.RO/ +Http://T778hd416.g9r96v.bs:64804/GbWp%47K/zgTKs/cBHzmYZ=AI23VY +HTTPS://6hp3j2y2tuakzv1rnq9vnvn1w0j6roo3if:58975/vH8BLTu3hzkk +ftp://Ye1dfbl0eae8lqiiqaojj.JO/8EjAq0TzD:/Bz3Pm2qyWo/ZX58A2/yjn%9F3xJZjsVhw +66.242.9.138/CYHK1bGpZ/5yyVD%cbC +nHZMBEJWO.ST/ABXauli3wuJ/WUxhKaZJg +ftp://[8463:c210::b5d1]:34094/8%AC7Fc/Qh6%62yFExJbdaB/0cAZ3iSKlk8sU;TYPE=D +http://vmlyl0efotpfd-tew59kcpsi2u7qd/UbXy1Cc/L%0cwnzmdjz/?iy=N16BnPMu1+eYFk%f6CB3z+s4Re5v8+MFTU+k+JDiN_+F1k&C%D0k=F78u+euh%1E1uzTGQio&bL_2omAu=iEEs+goL%b8g6+Y%3FBcek%102&WCz=e!Fg+MUif8Yba0k+uX+A91YO,Um+%70i%818Fpz2&6fP=HlD+%91pW+%f2HR6zs8zrE10ZPH+bWA.BB6k+Df3w:X85xDnDjSiPY+AyDpuSl4VEVTJzA3g&OtUR6= +http://bCNNCLT.gxa2sbn/lAFakp +D19f.oD5.bb/xUG6W8VxTcjMG/jYMuWlVMygf/UtIwE13c/%a9wzpO%AFxQ9 +q8HY2P.r5T.AU/nc0Iq%28QAF/#yOD3%b3UA%d79e%1EmJp3 +dPY3X09.AC/STpa%97U%b53yKP4Te/%71KZZvIC#nA1W2z +ftp://3gb.xgjm/wF%ado0cM/u%0DmCW8L/d9Ss%61dKQ +6m.56xkyt.32O.com/ToEAr%BEdi/xBpPU2NqC/74sgdq%BD9/WSrx5/5ldupD%47J/9boeZj +ftp://s0y6r7hg7.XN--KGBECHTV/xQizIlOK9/uxho7%bd/RvxbFGQ4o/O%42UeWF?/GAZ5E8b2/eRaq/l:-1ASwSpw/2FkowF%12Ss/vtCq9dysEc%1ee/ +[d18d:1707::]/NGZMInsLF8/kgC3y/F66qc1qt6OWfeS/DyngWA +file:///%55A4VpGsup +file:///WNEw%bfTWDLF/s%A9oZoWUo +Ftp://2tdk.Ube6velthhhx8o.GM/bUH4XycSEKkTE +ftp://7kxk4ujzz.kp:32621/hbop0%25sK/rw7RBE0lTN/tX5BLF +FILE:///IQExpA4kDvUfTkH6Bg/MeVJ4aIUbXCJf +file:///SIE0AkJFq/ZPJLyYK/6hA3x1InlGm1 +http://047.014.184.200/Z_QdOwjzfBue4Nt/aEn/xuEQD/cXlnoxHIK%7d8h/1%eegEk7E0/8Ejku@r1Z/UZ4gG/%484zOJsP%1b/Lc1okbWRzN5UJ +Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L +FILE://155.24.106.255/3VEZIT7 +d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ +lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET +l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C +FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k +212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt= +http://[ea5::]/eIdv5xl/5qhxlOvzw%018f/N3RQQKCz/WzUnsSg8KA3/7ohHZCp +file:///g_T81EaNw2nJB/1yUUT +http://2XXY0MZ.fwa.791ck-2gx.bd/uO6FW?ZS5jE:=m: +https://[8368:F154::f99f]/Y3h8FgzTYYpzn/zHFhQECC/CGtX/8v_~jn3Kn diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/CollationTestBase.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/CollationTestBase.java new file mode 100644 index 0000000..7d1abcb --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/CollationTestBase.java @@ -0,0 +1,316 @@ +package org.apache.lucene.collation; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermRangeFilter; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Document; +import org.apache.lucene.util.IndexableBinaryStringTools; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; + +public abstract class CollationTestBase extends LuceneTestCase { + + protected String firstRangeBeginningOriginal = "\u062F"; + protected String firstRangeEndOriginal = "\u0698"; + + protected String secondRangeBeginningOriginal = "\u0633"; + protected String secondRangeEndOriginal = "\u0638"; + + /** + * Convenience method to perform the same function as CollationKeyFilter. + * + * @param keyBits the result from + * collator.getCollationKey(original).toByteArray() + * @return The encoded collation key for the original String + */ + protected String encodeCollationKey(byte[] keyBits) { + // Ensure that the backing char[] array is large enough to hold the encoded + // Binary String + int encodedLength = IndexableBinaryStringTools.getEncodedLength(keyBits, 0, keyBits.length); + char[] encodedBegArray = new char[encodedLength]; + IndexableBinaryStringTools.encode(keyBits, 0, keyBits.length, encodedBegArray, 0, encodedLength); + return new String(encodedBegArray); + } + + public void testFarsiRangeFilterCollating(Analyzer analyzer, String firstBeg, + String firstEnd, String secondBeg, + String secondEnd) throws Exception { + RAMDirectory ramDir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig( + TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + doc.add(new Field("content", "\u0633\u0627\u0628", + Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("body", "body", + Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + writer.close(); + IndexSearcher searcher = new IndexSearcher(ramDir, true); + Query query = new TermQuery(new Term("body","body")); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a TermRangeFilter with a Farsi + // Collator (or an Arabic one for the case when Farsi searcher not + // supported). + ScoreDoc[] result = searcher.search + (query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).scoreDocs; + assertEquals("The index Term should not be included.", 0, result.length); + + result = searcher.search + (query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).scoreDocs; + assertEquals("The index Term should be included.", 1, result.length); + + searcher.close(); + } + + public void testFarsiRangeQueryCollating(Analyzer analyzer, String firstBeg, + String firstEnd, String secondBeg, + String secondEnd) throws Exception { + RAMDirectory ramDir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig( + TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a TermRangeQuery with a Farsi + // Collator (or an Arabic one for the case when Farsi is not supported). + doc.add(new Field("content", "\u0633\u0627\u0628", + Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + IndexSearcher searcher = new IndexSearcher(ramDir, true); + + Query query = new TermRangeQuery("content", firstBeg, firstEnd, true, true); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, hits.length); + + query = new TermRangeQuery("content", secondBeg, secondEnd, true, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, hits.length); + searcher.close(); + } + + public void testFarsiTermRangeQuery(Analyzer analyzer, String firstBeg, + String firstEnd, String secondBeg, String secondEnd) throws Exception { + + RAMDirectory farsiIndex = new RAMDirectory(); + IndexWriter writer = new IndexWriter(farsiIndex, new IndexWriterConfig( + TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + doc.add(new Field("content", "\u0633\u0627\u0628", + Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("body", "body", + Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(farsiIndex, true); + IndexSearcher search = newSearcher(reader); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a TermRangeQuery + // with a Farsi Collator (or an Arabic one for the case when Farsi is + // not supported). + Query csrq + = new TermRangeQuery("content", firstBeg, firstEnd, true, true); + ScoreDoc[] result = search.search(csrq, null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, result.length); + + csrq = new TermRangeQuery + ("content", secondBeg, secondEnd, true, true); + result = search.search(csrq, null, 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, result.length); + search.close(); + } + + // Test using various international locales with accented characters (which + // sort differently depending on locale) + // + // Copied (and slightly modified) from + // org.apache.lucene.search.TestSort.testInternationalSort() + // + // TODO: this test is really fragile. there are already 3 different cases, + // depending upon unicode version. + public void testCollationKeySort(Analyzer usAnalyzer, + Analyzer franceAnalyzer, + Analyzer swedenAnalyzer, + Analyzer denmarkAnalyzer, + String usResult, + String frResult, + String svResult, + String dkResult) throws Exception { + RAMDirectory indexStore = new RAMDirectory(); + PerFieldAnalyzerWrapper analyzer + = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + analyzer.addAnalyzer("US", usAnalyzer); + analyzer.addAnalyzer("France", franceAnalyzer); + analyzer.addAnalyzer("Sweden", swedenAnalyzer); + analyzer.addAnalyzer("Denmark", denmarkAnalyzer); + IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig( + TEST_VERSION_CURRENT, analyzer)); + + // document data: + // the tracer field is used to determine which document was hit + String[][] sortData = new String[][] { + // tracer contents US France Sweden (sv_SE) Denmark (da_DK) + { "A", "x", "p\u00EAche", "p\u00EAche", "p\u00EAche", "p\u00EAche" }, + { "B", "y", "HAT", "HAT", "HAT", "HAT" }, + { "C", "x", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9", "p\u00E9ch\u00E9" }, + { "D", "y", "HUT", "HUT", "HUT", "HUT" }, + { "E", "x", "peach", "peach", "peach", "peach" }, + { "F", "y", "H\u00C5T", "H\u00C5T", "H\u00C5T", "H\u00C5T" }, + { "G", "x", "sin", "sin", "sin", "sin" }, + { "H", "y", "H\u00D8T", "H\u00D8T", "H\u00D8T", "H\u00D8T" }, + { "I", "x", "s\u00EDn", "s\u00EDn", "s\u00EDn", "s\u00EDn" }, + { "J", "y", "HOT", "HOT", "HOT", "HOT" }, + }; + + for (int i = 0 ; i < sortData.length ; ++i) { + Document doc = new Document(); + doc.add(new Field("tracer", sortData[i][0], + Field.Store.YES, Field.Index.NO)); + doc.add(new Field("contents", sortData[i][1], + Field.Store.NO, Field.Index.ANALYZED)); + if (sortData[i][2] != null) + doc.add(new Field("US", sortData[i][2], + Field.Store.NO, Field.Index.ANALYZED)); + if (sortData[i][3] != null) + doc.add(new Field("France", sortData[i][3], + Field.Store.NO, Field.Index.ANALYZED)); + if (sortData[i][4] != null) + doc.add(new Field("Sweden", sortData[i][4], + Field.Store.NO, Field.Index.ANALYZED)); + if (sortData[i][5] != null) + doc.add(new Field("Denmark", sortData[i][5], + Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + writer.optimize(); + writer.close(); + Searcher searcher = new IndexSearcher(indexStore, true); + + Sort sort = new Sort(); + Query queryX = new TermQuery(new Term ("contents", "x")); + Query queryY = new TermQuery(new Term ("contents", "y")); + + sort.setSort(new SortField("US", SortField.STRING)); + assertMatches(searcher, queryY, sort, usResult); + + sort.setSort(new SortField("France", SortField.STRING)); + assertMatches(searcher, queryX, sort, frResult); + + sort.setSort(new SortField("Sweden", SortField.STRING)); + assertMatches(searcher, queryY, sort, svResult); + + sort.setSort(new SortField("Denmark", SortField.STRING)); + assertMatches(searcher, queryY, sort, dkResult); + } + + // Make sure the documents returned by the search match the expected list + // Copied from TestSort.java + private void assertMatches(Searcher searcher, Query query, Sort sort, + String expectedResult) throws IOException { + ScoreDoc[] result = searcher.search(query, null, 1000, sort).scoreDocs; + StringBuilder buff = new StringBuilder(10); + int n = result.length; + for (int i = 0 ; i < n ; ++i) { + Document doc = searcher.doc(result[i].doc); + String[] v = doc.getValues("tracer"); + for (int j = 0 ; j < v.length ; ++j) { + buff.append(v[j]); + } + } + assertEquals(expectedResult, buff.toString()); + } + + public void assertThreadSafe(final Analyzer analyzer) throws Exception { + int numTestPoints = 100; + int numThreads = _TestUtil.nextInt(random, 3, 5); + final HashMap map = new HashMap(); + + // create a map up front. + // then with multiple threads, generate sort keys for all the keys in the map + // and ensure they are the same as the ones we produced in serial fashion. + + for (int i = 0; i < numTestPoints; i++) { + String term = _TestUtil.randomSimpleString(random); + TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term)); + CharTermAttribute encodedBytes = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + assertTrue(ts.incrementToken()); + // ensure we make a copy of the actual bytes too + map.put(term, encodedBytes.toString()); + } + + Thread threads[] = new Thread[numThreads]; + for (int i = 0; i < numThreads; i++) { + threads[i] = new Thread() { + @Override + public void run() { + try { + for (Map.Entry mapping : map.entrySet()) { + String term = mapping.getKey(); + String expected = mapping.getValue(); + TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term)); + CharTermAttribute encodedBytes = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + assertTrue(ts.incrementToken()); + assertEquals(expected, encodedBytes.toString()); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }; + } + for (int i = 0; i < numThreads; i++) { + threads[i].start(); + } + for (int i = 0; i < numThreads; i++) { + threads[i].join(); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java new file mode 100644 index 0000000..c421623 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java @@ -0,0 +1,91 @@ +package org.apache.lucene.collation; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.analysis.Analyzer; + +import java.text.Collator; +import java.util.Locale; + + +public class TestCollationKeyAnalyzer extends CollationTestBase { + // the sort order of Ø versus U depends on the version of the rules being used + // for the inherited root locale: Ø's order isnt specified in Locale.US since + // its not used in english. + private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0; + + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + private Collator collator = Collator.getInstance(new Locale("ar")); + private Analyzer analyzer = new CollationKeyAnalyzer(collator); + + private String firstRangeBeginning = encodeCollationKey + (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); + private String firstRangeEnd = encodeCollationKey + (collator.getCollationKey(firstRangeEndOriginal).toByteArray()); + private String secondRangeBeginning = encodeCollationKey + (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); + private String secondRangeEnd = encodeCollationKey + (collator.getCollationKey(secondRangeEndOriginal).toByteArray()); + + public void testFarsiRangeFilterCollating() throws Exception { + testFarsiRangeFilterCollating + (analyzer, firstRangeBeginning, firstRangeEnd, + secondRangeBeginning, secondRangeEnd); + } + + public void testFarsiRangeQueryCollating() throws Exception { + testFarsiRangeQueryCollating + (analyzer, firstRangeBeginning, firstRangeEnd, + secondRangeBeginning, secondRangeEnd); + } + + public void testFarsiTermRangeQuery() throws Exception { + testFarsiTermRangeQuery + (analyzer, firstRangeBeginning, firstRangeEnd, + secondRangeBeginning, secondRangeEnd); + } + + public void testCollationKeySort() throws Exception { + Analyzer usAnalyzer + = new CollationKeyAnalyzer(Collator.getInstance(Locale.US)); + Analyzer franceAnalyzer + = new CollationKeyAnalyzer(Collator.getInstance(Locale.FRANCE)); + Analyzer swedenAnalyzer + = new CollationKeyAnalyzer(Collator.getInstance(new Locale("sv", "se"))); + Analyzer denmarkAnalyzer + = new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk"))); + + // The ICU Collator and Sun java.text.Collator implementations differ in their + // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US. + testCollationKeySort + (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, + oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF"); + } + + public void testThreadSafe() throws Exception { + int iters = 20 * RANDOM_MULTIPLIER; + for (int i = 0; i < iters; i++) { + Collator collator = Collator.getInstance(Locale.GERMAN); + collator.setStrength(Collator.PRIMARY); + assertThreadSafe(new CollationKeyAnalyzer(collator)); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java new file mode 100644 index 0000000..6be3784 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java @@ -0,0 +1,100 @@ +package org.apache.lucene.collation; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.KeywordTokenizer; + +import java.text.Collator; +import java.util.Locale; +import java.io.Reader; + + +public class TestCollationKeyFilter extends CollationTestBase { + // the sort order of Ø versus U depends on the version of the rules being used + // for the inherited root locale: Ø's order isnt specified in Locale.US since + // its not used in english. + boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0; + + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + private Collator collator = Collator.getInstance(new Locale("ar")); + private Analyzer analyzer = new TestAnalyzer(collator); + + private String firstRangeBeginning = encodeCollationKey + (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); + private String firstRangeEnd = encodeCollationKey + (collator.getCollationKey(firstRangeEndOriginal).toByteArray()); + private String secondRangeBeginning = encodeCollationKey + (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); + private String secondRangeEnd = encodeCollationKey + (collator.getCollationKey(secondRangeEndOriginal).toByteArray()); + + + public final class TestAnalyzer extends Analyzer { + private Collator _collator; + + TestAnalyzer(Collator collator) { + _collator = collator; + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new KeywordTokenizer(reader); + result = new CollationKeyFilter(result, _collator); + return result; + } + } + + public void testFarsiRangeFilterCollating() throws Exception { + testFarsiRangeFilterCollating + (analyzer, firstRangeBeginning, firstRangeEnd, + secondRangeBeginning, secondRangeEnd); + } + + public void testFarsiRangeQueryCollating() throws Exception { + testFarsiRangeQueryCollating + (analyzer, firstRangeBeginning, firstRangeEnd, + secondRangeBeginning, secondRangeEnd); + } + + public void testFarsiTermRangeQuery() throws Exception { + testFarsiTermRangeQuery + (analyzer, firstRangeBeginning, firstRangeEnd, + secondRangeBeginning, secondRangeEnd); + } + + public void testCollationKeySort() throws Exception { + Analyzer usAnalyzer = new TestAnalyzer(Collator.getInstance(Locale.US)); + Analyzer franceAnalyzer + = new TestAnalyzer(Collator.getInstance(Locale.FRANCE)); + Analyzer swedenAnalyzer + = new TestAnalyzer(Collator.getInstance(new Locale("sv", "se"))); + Analyzer denmarkAnalyzer + = new TestAnalyzer(Collator.getInstance(new Locale("da", "dk"))); + + // The ICU Collator and Sun java.text.Collator implementations differ in their + // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US. + testCollationKeySort + (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, + oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF"); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestBinaryDocument.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestBinaryDocument.java new file mode 100644 index 0000000..20d6db6 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestBinaryDocument.java @@ -0,0 +1,115 @@ +package org.apache.lucene.document; + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests {@link Document} class. + */ +public class TestBinaryDocument extends LuceneTestCase { + + String binaryValStored = "this text will be stored as a byte array in the index"; + String binaryValCompressed = "this text will be also stored and compressed as a byte array in the index"; + + public void testBinaryFieldInIndex() + throws Exception + { + Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes()); + Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); + + try { + // binary fields with store off are not allowed + new Field("fail", binaryValStored.getBytes(), Field.Store.NO); + fail(); + } + catch (IllegalArgumentException iae) { + } + + Document doc = new Document(); + + doc.add(binaryFldStored); + + doc.add(stringFldStored); + + /** test for field count */ + assertEquals(2, doc.fields.size()); + + /** add the doc to a ram index */ + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + writer.addDocument(doc); + + /** open a reader and fetch the document */ + IndexReader reader = writer.getReader(); + Document docFromReader = reader.document(0); + assertTrue(docFromReader != null); + + /** fetch the binary stored field and compare it's content with the original one */ + String binaryFldStoredTest = new String(docFromReader.getBinaryValue("binaryStored")); + assertTrue(binaryFldStoredTest.equals(binaryValStored)); + + /** fetch the string field and compare it's content with the original one */ + String stringFldStoredTest = docFromReader.get("stringStored"); + assertTrue(stringFldStoredTest.equals(binaryValStored)); + + writer.close(); + reader.close(); + + reader = IndexReader.open(dir, false); + /** delete the document from index */ + reader.deleteDocument(0); + assertEquals(0, reader.numDocs()); + + reader.close(); + dir.close(); + } + + public void testCompressionTools() throws Exception { + Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes())); + Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.compressString(binaryValCompressed)); + + Document doc = new Document(); + + doc.add(binaryFldCompressed); + doc.add(stringFldCompressed); + + /** add the doc to a ram index */ + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + writer.addDocument(doc); + + /** open a reader and fetch the document */ + IndexReader reader = writer.getReader(); + Document docFromReader = reader.document(0); + assertTrue(docFromReader != null); + + /** fetch the binary compressed field and compare it's content with the original one */ + String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed"))); + assertTrue(binaryFldCompressedTest.equals(binaryValCompressed)); + assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed)); + + writer.close(); + reader.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java new file mode 100644 index 0000000..be6fb93 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java @@ -0,0 +1,199 @@ +package org.apache.lucene.document; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.TimeZone; +import java.util.Locale; + +import org.apache.lucene.util.LuceneTestCase; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public class TestDateTools extends LuceneTestCase { + + public void testStringToDate() throws ParseException { + + Date d = null; + d = DateTools.stringToDate("2004"); + assertEquals("2004-01-01 00:00:00:000", isoFormat(d)); + d = DateTools.stringToDate("20040705"); + assertEquals("2004-07-05 00:00:00:000", isoFormat(d)); + d = DateTools.stringToDate("200407050910"); + assertEquals("2004-07-05 09:10:00:000", isoFormat(d)); + d = DateTools.stringToDate("20040705091055990"); + assertEquals("2004-07-05 09:10:55:990", isoFormat(d)); + + try { + d = DateTools.stringToDate("97"); // no date + fail(); + } catch(ParseException e) { /* expected exception */ } + try { + d = DateTools.stringToDate("200401011235009999"); // no date + fail(); + } catch(ParseException e) { /* expected exception */ } + try { + d = DateTools.stringToDate("aaaa"); // no date + fail(); + } catch(ParseException e) { /* expected exception */ } + + } + + public void testStringtoTime() throws ParseException { + long time = DateTools.stringToTime("197001010000"); + Calendar cal = new GregorianCalendar(); + cal.clear(); + cal.set(1970, 0, 1, // year=1970, month=january, day=1 + 0, 0, 0); // hour, minute, second + cal.set(Calendar.MILLISECOND, 0); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + assertEquals(cal.getTime().getTime(), time); + cal.set(1980, 1, 2, // year=1980, month=february, day=2 + 11, 5, 0); // hour, minute, second + cal.set(Calendar.MILLISECOND, 0); + time = DateTools.stringToTime("198002021105"); + assertEquals(cal.getTime().getTime(), time); + } + + public void testDateAndTimetoString() throws ParseException { + Calendar cal = new GregorianCalendar(); + cal.clear(); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + cal.set(2004, 1, 3, // year=2004, month=february(!), day=3 + 22, 8, 56); // hour, minute, second + cal.set(Calendar.MILLISECOND, 333); + + String dateString; + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.YEAR); + assertEquals("2004", dateString); + assertEquals("2004-01-01 00:00:00:000", isoFormat(DateTools.stringToDate(dateString))); + + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.MONTH); + assertEquals("200402", dateString); + assertEquals("2004-02-01 00:00:00:000", isoFormat(DateTools.stringToDate(dateString))); + + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.DAY); + assertEquals("20040203", dateString); + assertEquals("2004-02-03 00:00:00:000", isoFormat(DateTools.stringToDate(dateString))); + + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.HOUR); + assertEquals("2004020322", dateString); + assertEquals("2004-02-03 22:00:00:000", isoFormat(DateTools.stringToDate(dateString))); + + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.MINUTE); + assertEquals("200402032208", dateString); + assertEquals("2004-02-03 22:08:00:000", isoFormat(DateTools.stringToDate(dateString))); + + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.SECOND); + assertEquals("20040203220856", dateString); + assertEquals("2004-02-03 22:08:56:000", isoFormat(DateTools.stringToDate(dateString))); + + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.MILLISECOND); + assertEquals("20040203220856333", dateString); + assertEquals("2004-02-03 22:08:56:333", isoFormat(DateTools.stringToDate(dateString))); + + // date before 1970: + cal.set(1961, 2, 5, // year=1961, month=march(!), day=5 + 23, 9, 51); // hour, minute, second + cal.set(Calendar.MILLISECOND, 444); + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.MILLISECOND); + assertEquals("19610305230951444", dateString); + assertEquals("1961-03-05 23:09:51:444", isoFormat(DateTools.stringToDate(dateString))); + + dateString = DateTools.dateToString(cal.getTime(), DateTools.Resolution.HOUR); + assertEquals("1961030523", dateString); + assertEquals("1961-03-05 23:00:00:000", isoFormat(DateTools.stringToDate(dateString))); + + // timeToString: + cal.set(1970, 0, 1, // year=1970, month=january, day=1 + 0, 0, 0); // hour, minute, second + cal.set(Calendar.MILLISECOND, 0); + dateString = DateTools.timeToString(cal.getTime().getTime(), + DateTools.Resolution.MILLISECOND); + assertEquals("19700101000000000", dateString); + + cal.set(1970, 0, 1, // year=1970, month=january, day=1 + 1, 2, 3); // hour, minute, second + cal.set(Calendar.MILLISECOND, 0); + dateString = DateTools.timeToString(cal.getTime().getTime(), + DateTools.Resolution.MILLISECOND); + assertEquals("19700101010203000", dateString); + } + + public void testRound() { + Calendar cal = new GregorianCalendar(); + cal.clear(); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + cal.set(2004, 1, 3, // year=2004, month=february(!), day=3 + 22, 8, 56); // hour, minute, second + cal.set(Calendar.MILLISECOND, 333); + Date date = cal.getTime(); + assertEquals("2004-02-03 22:08:56:333", isoFormat(date)); + + Date dateYear = DateTools.round(date, DateTools.Resolution.YEAR); + assertEquals("2004-01-01 00:00:00:000", isoFormat(dateYear)); + + Date dateMonth = DateTools.round(date, DateTools.Resolution.MONTH); + assertEquals("2004-02-01 00:00:00:000", isoFormat(dateMonth)); + + Date dateDay = DateTools.round(date, DateTools.Resolution.DAY); + assertEquals("2004-02-03 00:00:00:000", isoFormat(dateDay)); + + Date dateHour = DateTools.round(date, DateTools.Resolution.HOUR); + assertEquals("2004-02-03 22:00:00:000", isoFormat(dateHour)); + + Date dateMinute = DateTools.round(date, DateTools.Resolution.MINUTE); + assertEquals("2004-02-03 22:08:00:000", isoFormat(dateMinute)); + + Date dateSecond = DateTools.round(date, DateTools.Resolution.SECOND); + assertEquals("2004-02-03 22:08:56:000", isoFormat(dateSecond)); + + Date dateMillisecond = DateTools.round(date, DateTools.Resolution.MILLISECOND); + assertEquals("2004-02-03 22:08:56:333", isoFormat(dateMillisecond)); + + // long parameter: + long dateYearLong = DateTools.round(date.getTime(), DateTools.Resolution.YEAR); + assertEquals("2004-01-01 00:00:00:000", isoFormat(new Date(dateYearLong))); + + long dateMillisecondLong = DateTools.round(date.getTime(), DateTools.Resolution.MILLISECOND); + assertEquals("2004-02-03 22:08:56:333", isoFormat(new Date(dateMillisecondLong))); + } + + private String isoFormat(Date date) { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.US); + sdf.setTimeZone(TimeZone.getTimeZone("GMT")); + return sdf.format(date); + } + + public void testDateToolsUTC() throws Exception { + // Sun, 30 Oct 2005 00:00:00 +0000 -- the last second of 2005's DST in Europe/London + long time = 1130630400; + try { + TimeZone.setDefault(TimeZone.getTimeZone(/* "GMT" */ "Europe/London")); + String d1 = DateTools.dateToString(new Date(time*1000), DateTools.Resolution.MINUTE); + String d2 = DateTools.dateToString(new Date((time+3600)*1000), DateTools.Resolution.MINUTE); + assertFalse("different times", d1.equals(d2)); + assertEquals("midnight", DateTools.stringToTime(d1), time*1000); + assertEquals("later", DateTools.stringToTime(d2), (time+3600)*1000); + } finally { + TimeZone.setDefault(null); + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestDocument.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestDocument.java new file mode 100644 index 0000000..a89415b --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestDocument.java @@ -0,0 +1,282 @@ +package org.apache.lucene.document; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests {@link Document} class. + */ +public class TestDocument extends LuceneTestCase { + + String binaryVal = "this text will be stored as a byte array in the index"; + String binaryVal2 = "this text will be also stored as a byte array in the index"; + + public void testBinaryField() throws Exception { + Document doc = new Document(); + Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, + Field.Index.NO); + Fieldable binaryFld = new Field("binary", binaryVal.getBytes()); + Fieldable binaryFld2 = new Field("binary", binaryVal2.getBytes()); + + doc.add(stringFld); + doc.add(binaryFld); + + assertEquals(2, doc.fields.size()); + + assertTrue(binaryFld.isBinary()); + assertTrue(binaryFld.isStored()); + assertFalse(binaryFld.isIndexed()); + assertFalse(binaryFld.isTokenized()); + + String binaryTest = new String(doc.getBinaryValue("binary")); + assertTrue(binaryTest.equals(binaryVal)); + + String stringTest = doc.get("string"); + assertTrue(binaryTest.equals(stringTest)); + + doc.add(binaryFld2); + + assertEquals(3, doc.fields.size()); + + byte[][] binaryTests = doc.getBinaryValues("binary"); + + assertEquals(2, binaryTests.length); + + binaryTest = new String(binaryTests[0]); + String binaryTest2 = new String(binaryTests[1]); + + assertFalse(binaryTest.equals(binaryTest2)); + + assertTrue(binaryTest.equals(binaryVal)); + assertTrue(binaryTest2.equals(binaryVal2)); + + doc.removeField("string"); + assertEquals(2, doc.fields.size()); + + doc.removeFields("binary"); + assertEquals(0, doc.fields.size()); + } + + /** + * Tests {@link Document#removeField(String)} method for a brand new Document + * that has not been indexed yet. + * + * @throws Exception on error + */ + public void testRemoveForNewDocument() throws Exception { + Document doc = makeDocumentWithFields(); + assertEquals(8, doc.fields.size()); + doc.removeFields("keyword"); + assertEquals(6, doc.fields.size()); + doc.removeFields("doesnotexists"); // removing non-existing fields is + // siltenlty ignored + doc.removeFields("keyword"); // removing a field more than once + assertEquals(6, doc.fields.size()); + doc.removeField("text"); + assertEquals(5, doc.fields.size()); + doc.removeField("text"); + assertEquals(4, doc.fields.size()); + doc.removeField("text"); + assertEquals(4, doc.fields.size()); + doc.removeField("doesnotexists"); // removing non-existing fields is + // siltenlty ignored + assertEquals(4, doc.fields.size()); + doc.removeFields("unindexed"); + assertEquals(2, doc.fields.size()); + doc.removeFields("unstored"); + assertEquals(0, doc.fields.size()); + doc.removeFields("doesnotexists"); // removing non-existing fields is + // siltenlty ignored + assertEquals(0, doc.fields.size()); + } + + public void testConstructorExceptions() { + new Field("name", "value", Field.Store.YES, Field.Index.NO); // okay + new Field("name", "value", Field.Store.NO, Field.Index.NOT_ANALYZED); // okay + try { + new Field("name", "value", Field.Store.NO, Field.Index.NO); + fail(); + } catch (IllegalArgumentException e) { + // expected exception + } + new Field("name", "value", Field.Store.YES, Field.Index.NO, + Field.TermVector.NO); // okay + try { + new Field("name", "value", Field.Store.YES, Field.Index.NO, + Field.TermVector.YES); + fail(); + } catch (IllegalArgumentException e) { + // expected exception + } + } + + /** + * Tests {@link Document#getValues(String)} method for a brand new Document + * that has not been indexed yet. + * + * @throws Exception on error + */ + public void testGetValuesForNewDocument() throws Exception { + doAssert(makeDocumentWithFields(), false); + } + + /** + * Tests {@link Document#getValues(String)} method for a Document retrieved + * from an index. + * + * @throws Exception on error + */ + public void testGetValuesForIndexedDocument() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + writer.addDocument(makeDocumentWithFields()); + IndexReader reader = writer.getReader(); + + IndexSearcher searcher = newSearcher(reader); + + // search for something that does exists + Query query = new TermQuery(new Term("keyword", "test1")); + + // ensure that queries return expected results without DateFilter first + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + doAssert(searcher.doc(hits[0].doc), true); + writer.close(); + searcher.close(); + reader.close(); + dir.close(); + } + + private Document makeDocumentWithFields() { + Document doc = new Document(); + doc.add(new Field("keyword", "test1", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(new Field("keyword", "test2", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(new Field("text", "test1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("text", "test2", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("unindexed", "test1", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("unindexed", "test2", Field.Store.YES, Field.Index.NO)); + doc + .add(new Field("unstored", "test1", Field.Store.NO, + Field.Index.ANALYZED)); + doc + .add(new Field("unstored", "test2", Field.Store.NO, + Field.Index.ANALYZED)); + return doc; + } + + private void doAssert(Document doc, boolean fromIndex) { + String[] keywordFieldValues = doc.getValues("keyword"); + String[] textFieldValues = doc.getValues("text"); + String[] unindexedFieldValues = doc.getValues("unindexed"); + String[] unstoredFieldValues = doc.getValues("unstored"); + + assertTrue(keywordFieldValues.length == 2); + assertTrue(textFieldValues.length == 2); + assertTrue(unindexedFieldValues.length == 2); + // this test cannot work for documents retrieved from the index + // since unstored fields will obviously not be returned + if (!fromIndex) { + assertTrue(unstoredFieldValues.length == 2); + } + + assertTrue(keywordFieldValues[0].equals("test1")); + assertTrue(keywordFieldValues[1].equals("test2")); + assertTrue(textFieldValues[0].equals("test1")); + assertTrue(textFieldValues[1].equals("test2")); + assertTrue(unindexedFieldValues[0].equals("test1")); + assertTrue(unindexedFieldValues[1].equals("test2")); + // this test cannot work for documents retrieved from the index + // since unstored fields will obviously not be returned + if (!fromIndex) { + assertTrue(unstoredFieldValues[0].equals("test1")); + assertTrue(unstoredFieldValues[1].equals("test2")); + } + } + + public void testFieldSetValue() throws Exception { + + Field field = new Field("id", "id1", Field.Store.YES, + Field.Index.NOT_ANALYZED); + Document doc = new Document(); + doc.add(field); + doc.add(new Field("keyword", "test", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + writer.addDocument(doc); + field.setValue("id2"); + writer.addDocument(doc); + field.setValue("id3"); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + + Query query = new TermQuery(new Term("keyword", "test")); + + // ensure that queries return expected results without DateFilter first + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + int result = 0; + for (int i = 0; i < 3; i++) { + Document doc2 = searcher.doc(hits[i].doc); + Field f = doc2.getField("id"); + if (f.stringValue().equals("id1")) result |= 1; + else if (f.stringValue().equals("id2")) result |= 2; + else if (f.stringValue().equals("id3")) result |= 4; + else fail("unexpected id field"); + } + writer.close(); + searcher.close(); + reader.close(); + dir.close(); + assertEquals("did not see all IDs", 7, result); + } + + public void testFieldSetValueChangeBinary() { + Field field1 = new Field("field1", new byte[0]); + Field field2 = new Field("field2", "", Field.Store.YES, + Field.Index.ANALYZED); + try { + field1.setValue("abc"); + fail("did not hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + try { + field2.setValue(new byte[0]); + fail("did not hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java new file mode 100644 index 0000000..12734d9 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java @@ -0,0 +1,82 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +public class TestNumberTools extends LuceneTestCase { + public void testNearZero() { + for (int i = -100; i <= 100; i++) { + for (int j = -100; j <= 100; j++) { + subtestTwoLongs(i, j); + } + } + } + + public void testMax() { + // make sure the constants convert to their equivalents + assertEquals(Long.MAX_VALUE, NumberTools + .stringToLong(NumberTools.MAX_STRING_VALUE)); + assertEquals(NumberTools.MAX_STRING_VALUE, NumberTools + .longToString(Long.MAX_VALUE)); + + // test near MAX, too + for (long l = Long.MAX_VALUE; l > Long.MAX_VALUE - 10000; l--) { + subtestTwoLongs(l, l - 1); + } + } + + public void testMin() { + // make sure the constants convert to their equivelents + assertEquals(Long.MIN_VALUE, NumberTools + .stringToLong(NumberTools.MIN_STRING_VALUE)); + assertEquals(NumberTools.MIN_STRING_VALUE, NumberTools + .longToString(Long.MIN_VALUE)); + + // test near MIN, too + for (long l = Long.MIN_VALUE; l < Long.MIN_VALUE + 10000; l++) { + subtestTwoLongs(l, l + 1); + } + } + + private static void subtestTwoLongs(long i, long j) { + // convert to strings + String a = NumberTools.longToString(i); + String b = NumberTools.longToString(j); + + // are they the right length? + assertEquals(NumberTools.STR_SIZE, a.length()); + assertEquals(NumberTools.STR_SIZE, b.length()); + + // are they the right order? + if (i < j) { + assertTrue(a.compareTo(b) < 0); + } else if (i > j) { + assertTrue(a.compareTo(b) > 0); + } else { + assertEquals(a, b); + } + + // can we convert them back to longs? + long i2 = NumberTools.stringToLong(a); + long j2 = NumberTools.stringToLong(b); + + assertEquals(i, i2); + assertEquals(j, j2); + } +} \ No newline at end of file diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/Test2BTerms.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/Test2BTerms.java new file mode 100644 index 0000000..ceeb535 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/Test2BTerms.java @@ -0,0 +1,219 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.*; +import org.apache.lucene.store.*; +import org.apache.lucene.search.*; +import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.document.*; +import java.io.File; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.junit.Ignore; + +// Best to run this test w/ plenty of RAM (because of the +// terms index): +// +// ant compile-test +// +// java -server -Xmx8g -d64 -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/test-framework:./build/classes/java -Dlucene.version=4.0-dev -Dtests.directory=MMapDirectory -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.Test2BTerms +// + +public class Test2BTerms extends LuceneTestCase { + + private final class MyTokenStream extends TokenStream { + + private final int tokensPerDoc; + private int tokenCount; + private final CharTermAttribute charTerm; + private final static int TOKEN_LEN = 5; + private final char[] chars; + public final List savedTerms = new ArrayList(); + private int nextSave; + + public MyTokenStream(int tokensPerDoc) { + super(); + this.tokensPerDoc = tokensPerDoc; + charTerm = addAttribute(CharTermAttribute.class); + chars = charTerm.resizeBuffer(TOKEN_LEN); + charTerm.setLength(TOKEN_LEN); + nextSave = _TestUtil.nextInt(random, 500000, 1000000); + } + + @Override + public boolean incrementToken() { + if (tokenCount >= tokensPerDoc) { + return false; + } + _TestUtil.randomFixedLengthUnicodeString(random, chars, 0, TOKEN_LEN); + tokenCount++; + if (--nextSave == 0) { + final String s = new String(chars, 0, TOKEN_LEN); + System.out.println("TEST: save term=" + s + " [" + toHexString(s) + "]"); + savedTerms.add(s); + nextSave = _TestUtil.nextInt(random, 500000, 1000000); + } + return true; + } + + @Override + public void reset() { + tokenCount = 0; + } + } + + @Ignore("Takes ~4 hours to run on a fast machine!!") + public void test2BTerms() throws IOException { + + final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000; + + final int TERMS_PER_DOC = _TestUtil.nextInt(random, 100000, 1000000); + + List savedTerms = null; + + MockDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BTerms")); + dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER); + dir.setCheckIndexOnClose(false); // don't double-checkindex + //Directory dir = newFSDirectory(new File("/p/lucene/indices/2bindex")); + + if (true) { + + IndexWriter w = new IndexWriter(dir, + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + .setRAMBufferSizeMB(256.0) + .setMergeScheduler(new ConcurrentMergeScheduler()) + .setMergePolicy(newLogMergePolicy(false, 10)) + .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); + + MergePolicy mp = w.getConfig().getMergePolicy(); + if (mp instanceof LogByteSizeMergePolicy) { + // 1 petabyte: + ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024); + } + + Document doc = new Document(); + + final MyTokenStream ts = new MyTokenStream(TERMS_PER_DOC); + Field field = new Field("field", ts); + field.setOmitTermFreqAndPositions(true); + field.setOmitNorms(true); + doc.add(field); + //w.setInfoStream(System.out); + final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC); + + System.out.println("TERMS_PER_DOC=" + TERMS_PER_DOC); + System.out.println("numDocs=" + numDocs); + + for(int i=0;i bigOrdTerms = new ArrayList(savedTerms.subList(numSavedTerms-10, numSavedTerms)); + System.out.println("TEST: test big ord terms..."); + testSavedTerms(r, bigOrdTerms); + System.out.println("TEST: test all saved terms..."); + testSavedTerms(r, savedTerms); + r.close(); + + System.out.println("TEST: now CheckIndex..."); + CheckIndex.Status status = _TestUtil.checkIndex(dir); + final long tc = status.segmentInfos.get(0).termIndexStatus.termCount; + assertTrue("count " + tc + " is not > " + Integer.MAX_VALUE, tc > Integer.MAX_VALUE); + dir.close(); + } + + private List findTerms(IndexReader r) throws IOException { + System.out.println("TEST: findTerms"); + final TermEnum termEnum = r.terms(); + final List savedTerms = new ArrayList(); + int nextSave = _TestUtil.nextInt(random, 500000, 1000000); + while(termEnum.next()) { + if (--nextSave == 0) { + savedTerms.add(termEnum.term().text()); + System.out.println("TEST: add " + termEnum.term()); + nextSave = _TestUtil.nextInt(random, 500000, 1000000); + } + } + return savedTerms; + } + + private String toHexString(String s) { + byte[] bytes; + try { + bytes = s.getBytes("UTF-8"); + } catch (UnsupportedEncodingException uee) { + throw new RuntimeException(uee); + } + StringBuilder sb = new StringBuilder(); + for(byte b : bytes) { + if (sb.length() > 0) { + sb.append(' '); + } + sb.append(Integer.toHexString(b&0xFF)); + } + return sb.toString(); + } + + private void testSavedTerms(IndexReader r, List terms) throws IOException { + System.out.println("TEST: run " + terms.size() + " terms on reader=" + r); + IndexSearcher s = new IndexSearcher(r); + Collections.shuffle(terms); + boolean failed = false; + for(int iter=0;iter<10*terms.size();iter++) { + final String term = terms.get(random.nextInt(terms.size())); + System.out.println("TEST: search " + term + " [" + toHexString(term) + "]"); + final long t0 = System.currentTimeMillis(); + final int count = s.search(new TermQuery(new Term("field", term)), 1).totalHits; + if (count <= 0) { + System.out.println(" FAILED: count=" + count); + failed = true; + } + final long t1 = System.currentTimeMillis(); + System.out.println(" took " + (t1-t0) + " millis"); + + final TermEnum termEnum = r.terms(new Term("field", term)); + final String text = termEnum.term().text(); + if (!term.equals(text)) { + System.out.println(" FAILED: wrong term: got " + text + " [" + toHexString(text) + "]"); + failed = true; + } + } + assertFalse(failed); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestAddIndexes.java new file mode 100755 index 0000000..8838522 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -0,0 +1,1048 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; + +import org.apache.lucene.search.PhraseQuery; + +public class TestAddIndexes extends LuceneTestCase { + + public void testSimpleCase() throws IOException { + // main directory + Directory dir = newDirectory(); + // two auxiliary directories + Directory aux = newDirectory(); + Directory aux2 = newDirectory(); + + IndexWriter writer = null; + + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)) + .setOpenMode(OpenMode.CREATE)); + // add 100 documents + addDocs(writer, 100); + assertEquals(100, writer.maxDoc()); + writer.close(); + + writer = newWriter( + aux, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.CREATE). + setMergePolicy(newLogMergePolicy(false)) + ); + // add 40 documents in separate files + addDocs(writer, 40); + assertEquals(40, writer.maxDoc()); + writer.close(); + + writer = newWriter(aux2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + // add 50 documents in compound files + addDocs2(writer, 50); + assertEquals(50, writer.maxDoc()); + writer.close(); + + // test doc count before segments are merged + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + assertEquals(100, writer.maxDoc()); + writer.addIndexes(new Directory[] { aux, aux2 }); + assertEquals(190, writer.maxDoc()); + writer.close(); + + // make sure the old index is correct + verifyNumDocs(aux, 40); + + // make sure the new index is correct + verifyNumDocs(dir, 190); + + // now add another set in. + Directory aux3 = newDirectory(); + writer = newWriter(aux3, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + // add 40 documents + addDocs(writer, 40); + assertEquals(40, writer.maxDoc()); + writer.close(); + + // test doc count before segments are merged/index is optimized + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + assertEquals(190, writer.maxDoc()); + writer.addIndexes(new Directory[] { aux3 }); + assertEquals(230, writer.maxDoc()); + writer.close(); + + // make sure the new index is correct + verifyNumDocs(dir, 230); + + verifyTermDocs(dir, new Term("content", "aaa"), 180); + + verifyTermDocs(dir, new Term("content", "bbb"), 50); + + // now optimize it. + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + writer.optimize(); + writer.close(); + + // make sure the new index is correct + verifyNumDocs(dir, 230); + + verifyTermDocs(dir, new Term("content", "aaa"), 180); + + verifyTermDocs(dir, new Term("content", "bbb"), 50); + + // now add a single document + Directory aux4 = newDirectory(); + writer = newWriter(aux4, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDocs2(writer, 1); + writer.close(); + + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + assertEquals(230, writer.maxDoc()); + writer.addIndexes(new Directory[] { aux4 }); + assertEquals(231, writer.maxDoc()); + writer.close(); + + verifyNumDocs(dir, 231); + + verifyTermDocs(dir, new Term("content", "bbb"), 51); + dir.close(); + aux.close(); + aux2.close(); + aux3.close(); + aux4.close(); + } + + public void testWithPendingDeletes() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + + setUpDirs(dir, aux); + IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + writer.setInfoStream(VERBOSE ? System.out : null); + writer.setInfoStream(VERBOSE ? System.out : null); + writer.addIndexes(aux); + + // Adds 10 docs, then replaces them with another 10 + // docs, so 10 pending deletes: + for (int i = 0; i < 20; i++) { + Document doc = new Document(); + doc.add(newField("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("content", "bbb " + i, Field.Store.NO, + Field.Index.ANALYZED)); + writer.updateDocument(new Term("id", "" + (i%10)), doc); + } + // Deletes one of the 10 added docs, leaving 9: + PhraseQuery q = new PhraseQuery(); + q.add(new Term("content", "bbb")); + q.add(new Term("content", "14")); + writer.deleteDocuments(q); + + writer.optimize(); + writer.commit(); + + verifyNumDocs(dir, 1039); + verifyTermDocs(dir, new Term("content", "aaa"), 1030); + verifyTermDocs(dir, new Term("content", "bbb"), 9); + + writer.close(); + dir.close(); + aux.close(); + } + + public void testWithPendingDeletes2() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + + setUpDirs(dir, aux); + IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + // Adds 10 docs, then replaces them with another 10 + // docs, so 10 pending deletes: + for (int i = 0; i < 20; i++) { + Document doc = new Document(); + doc.add(newField("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED)); + writer.updateDocument(new Term("id", "" + (i%10)), doc); + } + + writer.addIndexes(new Directory[] {aux}); + + // Deletes one of the 10 added docs, leaving 9: + PhraseQuery q = new PhraseQuery(); + q.add(new Term("content", "bbb")); + q.add(new Term("content", "14")); + writer.deleteDocuments(q); + + writer.optimize(); + writer.commit(); + + verifyNumDocs(dir, 1039); + verifyTermDocs(dir, new Term("content", "aaa"), 1030); + verifyTermDocs(dir, new Term("content", "bbb"), 9); + + writer.close(); + dir.close(); + aux.close(); + } + + public void testWithPendingDeletes3() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + + setUpDirs(dir, aux); + IndexWriter writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + + // Adds 10 docs, then replaces them with another 10 + // docs, so 10 pending deletes: + for (int i = 0; i < 20; i++) { + Document doc = new Document(); + doc.add(newField("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("content", "bbb " + i, Field.Store.NO, + Field.Index.ANALYZED)); + writer.updateDocument(new Term("id", "" + (i%10)), doc); + } + + // Deletes one of the 10 added docs, leaving 9: + PhraseQuery q = new PhraseQuery(); + q.add(new Term("content", "bbb")); + q.add(new Term("content", "14")); + writer.deleteDocuments(q); + + writer.addIndexes(new Directory[] {aux}); + + writer.optimize(); + writer.commit(); + + verifyNumDocs(dir, 1039); + verifyTermDocs(dir, new Term("content", "aaa"), 1030); + verifyTermDocs(dir, new Term("content", "bbb"), 9); + + writer.close(); + dir.close(); + aux.close(); + } + + // case 0: add self or exceed maxMergeDocs, expect exception + public void testAddSelf() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + + IndexWriter writer = null; + + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + // add 100 documents + addDocs(writer, 100); + assertEquals(100, writer.maxDoc()); + writer.close(); + + writer = newWriter( + aux, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.CREATE). + setMaxBufferedDocs(1000). + setMergePolicy(newLogMergePolicy(false)) + ); + // add 140 documents in separate files + addDocs(writer, 40); + writer.close(); + writer = newWriter( + aux, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.CREATE). + setMaxBufferedDocs(1000). + setMergePolicy(newLogMergePolicy(false)) + ); + addDocs(writer, 100); + writer.close(); + + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + try { + // cannot add self + writer.addIndexes(new Directory[] { aux, dir }); + assertTrue(false); + } + catch (IllegalArgumentException e) { + assertEquals(100, writer.maxDoc()); + } + writer.close(); + + // make sure the index is correct + verifyNumDocs(dir, 100); + dir.close(); + aux.close(); + } + + // in all the remaining tests, make the doc count of the oldest segment + // in dir large so that it is never merged in addIndexes() + // case 1: no tail segments + public void testNoTailSegments() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + + setUpDirs(dir, aux); + + IndexWriter writer = newWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMaxBufferedDocs(10). + setMergePolicy(newLogMergePolicy(4)) + ); + addDocs(writer, 10); + + writer.addIndexes(new Directory[] { aux }); + assertEquals(1040, writer.maxDoc()); + assertEquals(1000, writer.getDocCount(0)); + writer.close(); + + // make sure the index is correct + verifyNumDocs(dir, 1040); + dir.close(); + aux.close(); + } + + // case 2: tail segments, invariants hold, no copy + public void testNoCopySegments() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + + setUpDirs(dir, aux); + + IndexWriter writer = newWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMaxBufferedDocs(9). + setMergePolicy(newLogMergePolicy(4)) + ); + addDocs(writer, 2); + + writer.addIndexes(new Directory[] { aux }); + assertEquals(1032, writer.maxDoc()); + assertEquals(1000, writer.getDocCount(0)); + writer.close(); + + // make sure the index is correct + verifyNumDocs(dir, 1032); + dir.close(); + aux.close(); + } + + // case 3: tail segments, invariants hold, copy, invariants hold + public void testNoMergeAfterCopy() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + + setUpDirs(dir, aux); + + IndexWriter writer = newWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMaxBufferedDocs(10). + setMergePolicy(newLogMergePolicy(4)) + ); + + writer.addIndexes(new Directory[] { aux, new MockDirectoryWrapper(random, new RAMDirectory(aux)) }); + assertEquals(1060, writer.maxDoc()); + assertEquals(1000, writer.getDocCount(0)); + writer.close(); + + // make sure the index is correct + verifyNumDocs(dir, 1060); + dir.close(); + aux.close(); + } + + // case 4: tail segments, invariants hold, copy, invariants not hold + public void testMergeAfterCopy() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + + setUpDirs(dir, aux); + + IndexReader reader = IndexReader.open(aux, false); + for (int i = 0; i < 20; i++) { + reader.deleteDocument(i); + } + assertEquals(10, reader.numDocs()); + reader.close(); + + IndexWriter writer = newWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMaxBufferedDocs(4). + setMergePolicy(newLogMergePolicy(4)) + ); + + writer.addIndexes(new Directory[] { aux, new MockDirectoryWrapper(random, new RAMDirectory(aux)) }); + assertEquals(1020, writer.maxDoc()); + assertEquals(1000, writer.getDocCount(0)); + writer.close(); + dir.close(); + aux.close(); + } + + // case 5: tail segments, invariants not hold + public void testMoreMerges() throws IOException { + // main directory + Directory dir = newDirectory(); + // auxiliary directory + Directory aux = newDirectory(); + Directory aux2 = newDirectory(); + + setUpDirs(dir, aux); + + IndexWriter writer = newWriter( + aux2, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.CREATE). + setMaxBufferedDocs(100). + setMergePolicy(newLogMergePolicy(10)) + ); + writer.setInfoStream(VERBOSE ? System.out : null); + writer.addIndexes(aux); + assertEquals(30, writer.maxDoc()); + writer.close(); + + IndexReader reader = IndexReader.open(aux, false); + for (int i = 0; i < 27; i++) { + reader.deleteDocument(i); + } + assertEquals(3, reader.numDocs()); + reader.close(); + + reader = IndexReader.open(aux2, false); + for (int i = 0; i < 8; i++) { + reader.deleteDocument(i); + } + assertEquals(22, reader.numDocs()); + reader.close(); + + writer = newWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMaxBufferedDocs(6). + setMergePolicy(newLogMergePolicy(4)) + ); + + writer.addIndexes(new Directory[] { aux, aux2 }); + assertEquals(1040, writer.maxDoc()); + assertEquals(1000, writer.getDocCount(0)); + writer.close(); + dir.close(); + aux.close(); + aux2.close(); + } + + private IndexWriter newWriter(Directory dir, IndexWriterConfig conf) + throws IOException { + conf.setMergePolicy(new LogDocMergePolicy()); + final IndexWriter writer = new IndexWriter(dir, conf); + return writer; + } + + private void addDocs(IndexWriter writer, int numDocs) throws IOException { + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + } + } + + private void addDocs2(IndexWriter writer, int numDocs) throws IOException { + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(newField("content", "bbb", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + } + } + + private void verifyNumDocs(Directory dir, int numDocs) throws IOException { + IndexReader reader = IndexReader.open(dir, true); + assertEquals(numDocs, reader.maxDoc()); + assertEquals(numDocs, reader.numDocs()); + reader.close(); + } + + private void verifyTermDocs(Directory dir, Term term, int numDocs) + throws IOException { + IndexReader reader = IndexReader.open(dir, true); + TermDocs termDocs = reader.termDocs(term); + int count = 0; + while (termDocs.next()) + count++; + assertEquals(numDocs, count); + reader.close(); + } + + private void setUpDirs(Directory dir, Directory aux) throws IOException { + IndexWriter writer = null; + + writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(1000)); + // add 1000 documents in 1 segment + addDocs(writer, 1000); + assertEquals(1000, writer.maxDoc()); + assertEquals(1, writer.getSegmentCount()); + writer.close(); + + writer = newWriter( + aux, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.CREATE). + setMaxBufferedDocs(1000). + setMergePolicy(newLogMergePolicy(false, 10)) + ); + // add 30 documents in 3 segments + for (int i = 0; i < 3; i++) { + addDocs(writer, 10); + writer.close(); + writer = newWriter( + aux, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMaxBufferedDocs(1000). + setMergePolicy(newLogMergePolicy(false, 10)) + ); + } + assertEquals(30, writer.maxDoc()); + assertEquals(3, writer.getSegmentCount()); + writer.close(); + } + + // LUCENE-1270 + public void testHangOnClose() throws IOException { + + Directory dir = newDirectory(); + LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); + lmp.setUseCompoundFile(false); + lmp.setMergeFactor(100); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(5).setMergePolicy(lmp)); + + Document doc = new Document(); + doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + for(int i=0;i<60;i++) + writer.addDocument(doc); + + Document doc2 = new Document(); + doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, + Field.Index.NO)); + doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, + Field.Index.NO)); + doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, + Field.Index.NO)); + doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, + Field.Index.NO)); + for(int i=0;i<10;i++) + writer.addDocument(doc2); + writer.close(); + + Directory dir2 = newDirectory(); + lmp = new LogByteSizeMergePolicy(); + lmp.setMinMergeMB(0.0001); + lmp.setUseCompoundFile(false); + lmp.setMergeFactor(4); + writer = new IndexWriter(dir2, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)) + .setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(lmp)); + writer.addIndexes(new Directory[] {dir}); + writer.close(); + dir.close(); + dir2.close(); + } + + // TODO: these are also in TestIndexWriter... add a simple doc-writing method + // like this to LuceneTestCase? + private void addDoc(IndexWriter writer) throws IOException + { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + private abstract class RunAddIndexesThreads { + + Directory dir, dir2; + final static int NUM_INIT_DOCS = 17; + IndexWriter writer2; + final List failures = new ArrayList(); + volatile boolean didClose; + final IndexReader[] readers; + final int NUM_COPY; + final static int NUM_THREADS = 5; + final Thread[] threads = new Thread[NUM_THREADS]; + + public RunAddIndexesThreads(int numCopy) throws Throwable { + NUM_COPY = numCopy; + dir = new MockDirectoryWrapper(random, new RAMDirectory()); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(2)); + for (int i = 0; i < NUM_INIT_DOCS; i++) + addDoc(writer); + writer.close(); + + dir2 = newDirectory(); + writer2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer2.setInfoStream(VERBOSE ? System.out : null); + writer2.commit(); + + + readers = new IndexReader[NUM_COPY]; + for(int i=0;i.cfs.zip *"; cd to + // build/test/index.nocfs and run "zip + // index..nocfs.zip *". Then move those 2 zip + // files to your trunk checkout and add them to the + // oldNames array. + + /* + public void testCreateCFS() throws IOException { + createIndex("index.cfs", true, false); + } + + public void testCreateNoCFS() throws IOException { + createIndex("index.nocfs", false, false); + } + */ + + /* + // These are only needed for the special upgrade test to verify + // that also optimized indexes are correctly upgraded by IndexUpgrader. + // You don't need them to be build for non-3.1 (the test is happy with just one + // "old" segment format, version is unimportant: + + public void testCreateOptimizedCFS() throws IOException { + createIndex("index.optimized.cfs", true, true); + } + + public void testCreateOptimizedNoCFS() throws IOException { + createIndex("index.optimized.nocfs", false, true); + } + */ + + final String[] oldNames = {"19.cfs", + "19.nocfs", + "20.cfs", + "20.nocfs", + "21.cfs", + "21.nocfs", + "22.cfs", + "22.nocfs", + "23.cfs", + "23.nocfs", + "24.cfs", + "24.nocfs", + "29.cfs", + "29.nocfs", + "30.cfs", + "30.nocfs", + "31.cfs", + "31.nocfs", + }; + + final String[] oldOptimizedNames = {"31.optimized.cfs", + "31.optimized.nocfs", + }; + + private void assertCompressedFields29(Directory dir, boolean shouldStillBeCompressed) throws IOException { + int count = 0; + final int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2; + // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields: + final int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.length; + + IndexReader reader = IndexReader.open(dir, true); + try { + // look into sub readers and check if raw merge is on/off + List readers = new ArrayList(); + ReaderUtil.gatherSubReaders(readers, reader); + for (IndexReader ir : readers) { + final FieldsReader fr = ((SegmentReader) ir).getFieldsReader(); + assertTrue("for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other way round for a trunk index", + shouldStillBeCompressed != fr.canReadRawDocs()); + } + + // test that decompression works correctly + for(int i=0; i 0; + final int shouldSize = shouldStillBeCompressed ? + compressedSize : + (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH); + assertEquals("size incorrect", shouldSize, actualSize); + if (!shouldStillBeCompressed) { + assertFalse("uncompressed field should have another size than recorded in index", compressedSize == actualSize); + } + } + } + assertEquals("correct number of tests", 34 * 2, count); + } finally { + reader.close(); + } + } + + public void testUpgrade29Compression() throws IOException { + int hasTested29 = 0; + + for(int i=0;i fields = d.getFields(); + if (!oldName.startsWith("19.") && + !oldName.startsWith("20.") && + !oldName.startsWith("21.") && + !oldName.startsWith("22.")) { + + if (d.getField("content3") == null) { + final int numFields = oldName.startsWith("29.") ? 7 : 5; + assertEquals(numFields, fields.size()); + Field f = d.getField("id"); + assertEquals(""+i, f.stringValue()); + + f = d.getField("utf8"); + assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); + + f = d.getField("autf8"); + assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue()); + + f = d.getField("content2"); + assertEquals("here is more content with aaa aaa aaa", f.stringValue()); + + f = d.getField("fie\u2C77ld"); + assertEquals("field with non-ascii name", f.stringValue()); + } + + TermFreqVector tfv = reader.getTermFreqVector(i, "utf8"); + assertNotNull("docID=" + i + " index=" + indexDir.getName(), tfv); + assertTrue(tfv instanceof TermPositionVector); + } + } else + // Only ID 7 is deleted + assertEquals(7, i); + } + + ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; + + // First document should be #21 since it's norm was + // increased: + Document d = searcher.doc(hits[0].doc); + assertEquals("didn't get the right document first", "21", d.get("id")); + + testHits(hits, 34, searcher.getIndexReader()); + + if (!oldName.startsWith("19.") && + !oldName.startsWith("20.") && + !oldName.startsWith("21.") && + !oldName.startsWith("22.")) { + // Test on indices >= 2.3 + hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).scoreDocs; + assertEquals(34, hits.length); + hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).scoreDocs; + assertEquals(34, hits.length); + hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).scoreDocs; + assertEquals(34, hits.length); + } + + searcher.close(); + dir.close(); + } + + private int compare(String name, String v) { + int v0 = Integer.parseInt(name.substring(0, 2)); + int v1 = Integer.parseInt(v); + return v0 - v1; + } + + /* Open pre-lockless index, add docs, do a delete & + * setNorm, and search */ + public void changeIndexWithAdds(Random random, File oldIndexDir, String origOldName) throws IOException { + Directory dir = newFSDirectory(oldIndexDir); + // open writer + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); + writer.setInfoStream(VERBOSE ? System.out : null); + // add 10 docs + for(int i=0;i<10;i++) { + addDoc(writer, 35+i); + } + + // make sure writer sees right total -- writer seems not to know about deletes in .del? + final int expected; + if (compare(origOldName, "24") < 0) { + expected = 44; + } else { + expected = 45; + } + assertEquals("wrong doc count", expected, writer.numDocs()); + writer.close(); + + // make sure searching sees right # hits + IndexSearcher searcher = new IndexSearcher(dir, true); + ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; + Document d = searcher.doc(hits[0].doc); + assertEquals("wrong first document", "21", d.get("id")); + testHits(hits, 44, searcher.getIndexReader()); + searcher.close(); + + // make sure we can do delete & setNorm against this + // pre-lockless segment: + IndexReader reader = IndexReader.open(dir, false); + searcher = newSearcher(reader); + Term searchTerm = new Term("id", "6"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("wrong delete count", 1, delCount); + reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", (float) 2.0); + reader.close(); + searcher.close(); + + // make sure they "took": + searcher = new IndexSearcher(dir, true); + hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; + assertEquals("wrong number of hits", 43, hits.length); + d = searcher.doc(hits[0].doc); + assertEquals("wrong first document", "22", d.get("id")); + testHits(hits, 43, searcher.getIndexReader()); + searcher.close(); + + // optimize + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); + writer.optimize(); + writer.close(); + + searcher = new IndexSearcher(dir, true); + hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; + assertEquals("wrong number of hits", 43, hits.length); + d = searcher.doc(hits[0].doc); + testHits(hits, 43, searcher.getIndexReader()); + assertEquals("wrong first document", "22", d.get("id")); + searcher.close(); + + dir.close(); + } + + /* Open pre-lockless index, add docs, do a delete & + * setNorm, and search */ + public void changeIndexNoAdds(Random random, File oldIndexDir) throws IOException { + + Directory dir = newFSDirectory(oldIndexDir); + + // make sure searching sees right # hits + IndexSearcher searcher = new IndexSearcher(dir, true); + ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; + assertEquals("wrong number of hits", 34, hits.length); + Document d = searcher.doc(hits[0].doc); + assertEquals("wrong first document", "21", d.get("id")); + searcher.close(); + + // make sure we can do a delete & setNorm against this + // pre-lockless segment: + IndexReader reader = IndexReader.open(dir, false); + Term searchTerm = new Term("id", "6"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("wrong delete count", 1, delCount); + reader.setNorm(22, "content", (float) 2.0); + reader.close(); + + // make sure they "took": + searcher = new IndexSearcher(dir, true); + hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; + assertEquals("wrong number of hits", 33, hits.length); + d = searcher.doc(hits[0].doc); + assertEquals("wrong first document", "22", d.get("id")); + testHits(hits, 33, searcher.getIndexReader()); + searcher.close(); + + // optimize + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); + writer.optimize(); + writer.close(); + + searcher = new IndexSearcher(dir, true); + hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; + assertEquals("wrong number of hits", 33, hits.length); + d = searcher.doc(hits[0].doc); + assertEquals("wrong first document", "22", d.get("id")); + testHits(hits, 33, searcher.getIndexReader()); + searcher.close(); + + dir.close(); + } + + public File createIndex(String dirName, boolean doCFS, boolean optimized) throws IOException { + // we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes: + File indexDir = new File(LuceneTestCase.TEMP_DIR, dirName); + _TestUtil.rmDir(indexDir); + Directory dir = newFSDirectory(indexDir); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10); + ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS); + if (doCFS) { + ((LogMergePolicy) conf.getMergePolicy()).setNoCFSRatio(1.0); + } + IndexWriter writer = new IndexWriter(dir, conf); + + for(int i=0;i<35;i++) { + addDoc(writer, i); + } + assertEquals("wrong doc count", 35, writer.maxDoc()); + if (optimized) { + writer.optimize(); + } + writer.close(); + + if (!optimized) { + // open fresh writer so we get no prx file in the added segment + conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10); + ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(doCFS); + writer = new IndexWriter(dir, conf); + addNoProxDoc(writer); + writer.close(); + + // Delete one doc so we get a .del file: + IndexReader reader = IndexReader.open(dir, false); + Term searchTerm = new Term("id", "7"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("didn't delete the right number of documents", 1, delCount); + + // Set one norm so we get a .s0 file: + reader.setNorm(21, "content", (float) 1.5); + reader.close(); + } + + dir.close(); + + return indexDir; + } + + /* Verifies that the expected file names were produced */ + + public void testExactFileNames() throws IOException { + + String outputDirName = "lucene.backwardscompat0.index"; + File outputDir = _TestUtil.getTempDir(outputDirName); + _TestUtil.rmDir(outputDir); + + try { + Directory dir = newFSDirectory(outputDir); + + LogMergePolicy mergePolicy = newLogMergePolicy(true, 10); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(16.0) + .setMergePolicy(mergePolicy); + IndexWriter writer = new IndexWriter(dir, conf); + for(int i=0;i<35;i++) { + addDoc(writer, i); + } + assertEquals("wrong doc count", 35, writer.maxDoc()); + writer.close(); + + // Delete one doc so we get a .del file: + IndexReader reader = IndexReader.open(dir, false); + Term searchTerm = new Term("id", "7"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("didn't delete the right number of documents", 1, delCount); + + // Set one norm so we get a .s0 file: + reader.setNorm(21, "content", (float) 1.5); + reader.close(); + + // The numbering of fields can vary depending on which + // JRE is in use. On some JREs we see content bound to + // field 0; on others, field 1. So, here we have to + // figure out which field number corresponds to + // "content", and then set our expected file names below + // accordingly: + CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); + FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); + int contentFieldIndex = -1; + for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + + private void addDoc(IndexWriter writer, int id) throws IOException + { + Document doc = new Document(); + doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + /* This was used in 2.9 to generate an index with compressed field: + if (id % 2 == 0) { + doc.add(new Field("compressed", TEXT_TO_COMPRESS, Field.Store.COMPRESS, Field.Index.NOT_ANALYZED)); + doc.add(new Field("compressedSize", Integer.toString(TEXT_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED)); + } else { + doc.add(new Field("compressed", BINARY_TO_COMPRESS, Field.Store.COMPRESS)); + doc.add(new Field("compressedSize", Integer.toString(BINARY_COMPRESSED_LENGTH), Field.Store.YES, Field.Index.NOT_ANALYZED)); + } + */ + // add numeric fields, to test if later versions preserve encoding + doc.add(new NumericField("trieInt", 4).setIntValue(id)); + doc.add(new NumericField("trieLong", 4).setLongValue(id)); + writer.addDocument(doc); + } + + private void addNoProxDoc(IndexWriter writer) throws IOException { + Document doc = new Document(); + Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED); + f.setOmitTermFreqAndPositions(true); + doc.add(f); + f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO); + f.setOmitTermFreqAndPositions(true); + doc.add(f); + writer.addDocument(doc); + } + + static final String TEXT_TO_COMPRESS = "this is a compressed field and should appear in 3.0 as an uncompressed field after merge"; + // FieldSelectorResult.SIZE returns compressed size for compressed fields, which are internally handled as binary; + // do it in the same way like FieldsWriter, do not use CompressionTools.compressString() for compressed fields: + /* This was used in 2.9 to generate an index with compressed field: + static final int TEXT_COMPRESSED_LENGTH; + static { + try { + TEXT_COMPRESSED_LENGTH = CompressionTools.compress(TEXT_TO_COMPRESS.getBytes("UTF-8")).length; + } catch (Exception e) { + throw new RuntimeException(); + } + } + */ + static final byte[] BINARY_TO_COMPRESS = new byte[]{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}; + /* This was used in 2.9 to generate an index with compressed field: + static final int BINARY_COMPRESSED_LENGTH = CompressionTools.compress(BINARY_TO_COMPRESS).length; + */ + + public void testNumericFields() throws Exception { + for(int i=0;i= 3.0 + if (oldNames[i].compareTo("30.") < 0) continue; + + File oldIndexDir = _TestUtil.getTempDir(oldNames[i]); + _TestUtil.unzip(getDataFile("index." + oldNames[i] + ".zip"), oldIndexDir); + Directory dir = newFSDirectory(oldIndexDir); + IndexSearcher searcher = new IndexSearcher(dir, true); + + for (int id=10; id<15; id++) { + ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs; + assertEquals("wrong number of hits", 1, hits.length); + Document d = searcher.doc(hits[0].doc); + assertEquals(String.valueOf(id), d.get("id")); + + hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs; + assertEquals("wrong number of hits", 1, hits.length); + d = searcher.doc(hits[0].doc); + assertEquals(String.valueOf(id), d.get("id")); + } + + // check that also lower-precision fields are ok + ScoreDoc[] hits = searcher.search(NumericRangeQuery.newIntRange("trieInt", 4, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs; + assertEquals("wrong number of hits", 34, hits.length); + + hits = searcher.search(NumericRangeQuery.newLongRange("trieLong", 4, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs; + assertEquals("wrong number of hits", 34, hits.length); + + // check decoding into field cache + int[] fci = FieldCache.DEFAULT.getInts(searcher.getIndexReader(), "trieInt"); + for (int val : fci) { + assertTrue("value in id bounds", val >= 0 && val < 35); + } + + long[] fcl = FieldCache.DEFAULT.getLongs(searcher.getIndexReader(), "trieLong"); + for (long val : fcl) { + assertTrue("value in id bounds", val >= 0L && val < 35L); + } + + searcher.close(); + dir.close(); + _TestUtil.rmDir(oldIndexDir); + } + } + + private int checkAllSegmentsUpgraded(Directory dir) throws IOException { + final SegmentInfos infos = new SegmentInfos(); + infos.read(dir); + if (VERBOSE) { + System.out.println("checkAllSegmentsUpgraded: " + infos); + } + for (SegmentInfo si : infos) { + assertEquals(Constants.LUCENE_MAIN_VERSION, si.getVersion()); + } + return infos.size(); + } + + private int getNumberOfSegments(Directory dir) throws IOException { + final SegmentInfos infos = new SegmentInfos(); + infos.read(dir); + return infos.size(); + } + + public void testUpgradeOldIndex() throws Exception { + List names = new ArrayList(oldNames.length + oldOptimizedNames.length); + names.addAll(Arrays.asList(oldNames)); + names.addAll(Arrays.asList(oldOptimizedNames)); + for(String name : names) { + if (VERBOSE) { + System.out.println("testUpgradeOldIndex: index=" +name); + } + File oldIndxeDir = _TestUtil.getTempDir(name); + _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir); + Directory dir = newFSDirectory(oldIndxeDir); + + new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false) + .upgrade(); + + checkAllSegmentsUpgraded(dir); + + dir.close(); + _TestUtil.rmDir(oldIndxeDir); + } + } + + public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception { + for (String name : oldOptimizedNames) { + if (VERBOSE) { + System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name); + } + File oldIndxeDir = _TestUtil.getTempDir(name); + _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir); + Directory dir = newFSDirectory(oldIndxeDir); + + assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir)); + + // create a bunch of dummy segments + int id = 40; + RAMDirectory ramDir = new RAMDirectory(); + for (int i = 0; i < 3; i++) { + // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge: + MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) + .setMergePolicy(mp); + IndexWriter w = new IndexWriter(ramDir, iwc); + // add few more docs: + for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) { + addDoc(w, id++); + } + w.close(false); + } + + // add dummy segments (which are all in current version) to optimized index + MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null) + .setMergePolicy(mp); + IndexWriter w = new IndexWriter(dir, iwc); + w.setInfoStream(VERBOSE ? System.out : null); + w.addIndexes(ramDir); + w.close(false); + + // determine count of segments in modified index + final int origSegCount = getNumberOfSegments(dir); + + new IndexUpgrader(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null), VERBOSE ? System.out : null, false) + .upgrade(); + + final int segCount = checkAllSegmentsUpgraded(dir); + assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged", + origSegCount, segCount); + + dir.close(); + _TestUtil.rmDir(oldIndxeDir); + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestByteSlices.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestByteSlices.java new file mode 100644 index 0000000..07c6e78 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestByteSlices.java @@ -0,0 +1,118 @@ +package org.apache.lucene.index; + +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestByteSlices extends LuceneTestCase { + + private static class ByteBlockAllocator extends ByteBlockPool.Allocator { + ArrayList freeByteBlocks = new ArrayList(); + + /* Allocate another byte[] from the shared pool */ + @Override + synchronized byte[] getByteBlock() { + final int size = freeByteBlocks.size(); + final byte[] b; + if (0 == size) + b = new byte[DocumentsWriter.BYTE_BLOCK_SIZE]; + else + b = freeByteBlocks.remove(size-1); + return b; + } + + /* Return a byte[] to the pool */ + @Override + synchronized void recycleByteBlocks(byte[][] blocks, int start, int end) { + for(int i=start;i blocks) { + final int size = blocks.size(); + for(int i=0;i 0); + final List onlySegments = new ArrayList(); + onlySegments.add("_0"); + + assertTrue(checker.checkIndex(onlySegments).clean == true); + dir.close(); + } + + public void testLuceneConstantVersion() throws IOException { + // common-build.xml sets lucene.version + final String version = System.getProperty("lucene.version"); + assertNotNull( "null version", version); + assertTrue("Invalid version: "+version, + version.equals(Constants.LUCENE_MAIN_VERSION+"-SNAPSHOT") || + version.equals(Constants.LUCENE_MAIN_VERSION)); + assertTrue(version + " should start with: "+Constants.LUCENE_VERSION, + Constants.LUCENE_VERSION.startsWith(version)); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestCompoundFile.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestCompoundFile.java new file mode 100644 index 0000000..39eb2f6 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestCompoundFile.java @@ -0,0 +1,672 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.File; + +import org.apache.lucene.util.LuceneTestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.SimpleFSDirectory; +import org.apache.lucene.store._TestHelper; +import org.apache.lucene.util._TestUtil; + + +public class TestCompoundFile extends LuceneTestCase +{ + /** Main for running test case by itself. */ + public static void main(String args[]) { + TestRunner.run (new TestSuite(TestCompoundFile.class)); +// TestRunner.run (new TestCompoundFile("testSingleFile")); +// TestRunner.run (new TestCompoundFile("testTwoFiles")); +// TestRunner.run (new TestCompoundFile("testRandomFiles")); +// TestRunner.run (new TestCompoundFile("testClonedStreamsClosing")); +// TestRunner.run (new TestCompoundFile("testReadAfterClose")); +// TestRunner.run (new TestCompoundFile("testRandomAccess")); +// TestRunner.run (new TestCompoundFile("testRandomAccessClones")); +// TestRunner.run (new TestCompoundFile("testFileNotFound")); +// TestRunner.run (new TestCompoundFile("testReadPastEOF")); + +// TestRunner.run (new TestCompoundFile("testIWCreate")); + + } + + + private Directory dir; + + + @Override + public void setUp() throws Exception { + super.setUp(); + File file = _TestUtil.getTempDir("testIndex"); + // use a simple FSDir here, to be sure to have SimpleFSInputs + dir = new SimpleFSDirectory(file,null); + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + /** Creates a file of the specified size with random data. */ + private void createRandomFile(Directory dir, String name, int size) + throws IOException + { + IndexOutput os = dir.createOutput(name); + for (int i=0; i 0) { + int readLen = (int) Math.min(remainder, expectedBuffer.length); + expected.readBytes(expectedBuffer, 0, readLen); + test.readBytes(testBuffer, 0, readLen); + assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, + testBuffer, 0, readLen); + remainder -= readLen; + } + } + + + private void assertSameStreams(String msg, + IndexInput expected, + IndexInput actual, + long seekTo) + throws IOException + { + if(seekTo >= 0 && seekTo < expected.length()) + { + expected.seek(seekTo); + actual.seek(seekTo); + assertSameStreams(msg + ", seek(mid)", expected, actual); + } + } + + + + private void assertSameSeekBehavior(String msg, + IndexInput expected, + IndexInput actual) + throws IOException + { + // seek to 0 + long point = 0; + assertSameStreams(msg + ", seek(0)", expected, actual, point); + + // seek to middle + point = expected.length() / 2l; + assertSameStreams(msg + ", seek(mid)", expected, actual, point); + + // seek to end - 2 + point = expected.length() - 2; + assertSameStreams(msg + ", seek(end-2)", expected, actual, point); + + // seek to end - 1 + point = expected.length() - 1; + assertSameStreams(msg + ", seek(end-1)", expected, actual, point); + + // seek to the end + point = expected.length(); + assertSameStreams(msg + ", seek(end)", expected, actual, point); + + // seek past end + point = expected.length() + 1; + assertSameStreams(msg + ", seek(end+1)", expected, actual, point); + } + + + private void assertEqualArrays(String msg, + byte[] expected, + byte[] test, + int start, + int len) + { + assertNotNull(msg + " null expected", expected); + assertNotNull(msg + " null test", test); + + for (int i=start; i= 157); + reader.close(); + dir.close(); + } + + public void testCrashAfterClose() throws IOException { + + IndexWriter writer = initIndex(random, false); + MockDirectoryWrapper dir = (MockDirectoryWrapper) writer.getDirectory(); + + writer.close(); + dir.crash(); + + /* + String[] l = dir.list(); + Arrays.sort(l); + for(int i=0;i commits) throws IOException { + final IndexCommit firstCommit = commits.get(0); + long last = SegmentInfos.generationFromSegmentsFileName(firstCommit.getSegmentsFileName()); + assertEquals(last, firstCommit.getGeneration()); + long lastVersion = firstCommit.getVersion(); + long lastTimestamp = firstCommit.getTimestamp(); + for(int i=1;i last); + assertTrue("SegmentInfos versions are out-of-order", nowVersion > lastVersion); + assertTrue("SegmentInfos timestamps are out-of-order: now=" + nowTimestamp + " vs last=" + lastTimestamp, nowTimestamp >= lastTimestamp); + assertEquals(now, commit.getGeneration()); + last = now; + lastVersion = nowVersion; + lastTimestamp = nowTimestamp; + } + } + + class KeepAllDeletionPolicy implements IndexDeletionPolicy { + int numOnInit; + int numOnCommit; + Directory dir; + public void onInit(List commits) throws IOException { + verifyCommitOrder(commits); + numOnInit++; + } + public void onCommit(List commits) throws IOException { + IndexCommit lastCommit = commits.get(commits.size()-1); + IndexReader r = IndexReader.open(dir, true); + assertEquals("lastCommit.isOptimized()=" + lastCommit.isOptimized() + " vs IndexReader.isOptimized=" + r.isOptimized(), r.isOptimized(), lastCommit.isOptimized()); + r.close(); + verifyCommitOrder(commits); + numOnCommit++; + } + } + + /** + * This is useful for adding to a big index when you know + * readers are not using it. + */ + class KeepNoneOnInitDeletionPolicy implements IndexDeletionPolicy { + int numOnInit; + int numOnCommit; + public void onInit(List commits) throws IOException { + verifyCommitOrder(commits); + numOnInit++; + // On init, delete all commit points: + for (final IndexCommit commit : commits) { + commit.delete(); + assertTrue(commit.isDeleted()); + } + } + public void onCommit(List commits) throws IOException { + verifyCommitOrder(commits); + int size = commits.size(); + // Delete all but last one: + for(int i=0;i seen = new HashSet(); + + public KeepLastNDeletionPolicy(int numToKeep) { + this.numToKeep = numToKeep; + } + + public void onInit(List commits) throws IOException { + if (VERBOSE) { + System.out.println("TEST: onInit"); + } + verifyCommitOrder(commits); + numOnInit++; + // do no deletions on init + doDeletes(commits, false); + } + + public void onCommit(List commits) throws IOException { + if (VERBOSE) { + System.out.println("TEST: onCommit"); + } + verifyCommitOrder(commits); + doDeletes(commits, true); + } + + private void doDeletes(List commits, boolean isCommit) { + + // Assert that we really are only called for each new + // commit: + if (isCommit) { + String fileName = ((IndexCommit) commits.get(commits.size()-1)).getSegmentsFileName(); + if (seen.contains(fileName)) { + throw new RuntimeException("onCommit was called twice on the same commit point: " + fileName); + } + seen.add(fileName); + numOnCommit++; + } + int size = commits.size(); + for(int i=0;i commits) throws IOException { + verifyCommitOrder(commits); + onCommit(commits); + } + + public void onCommit(List commits) throws IOException { + verifyCommitOrder(commits); + + IndexCommit lastCommit = commits.get(commits.size()-1); + + // Any commit older than expireTime should be deleted: + double expireTime = dir.fileModified(lastCommit.getSegmentsFileName())/1000.0 - expirationTimeSeconds; + + for (final IndexCommit commit : commits) { + double modTime = dir.fileModified(commit.getSegmentsFileName())/1000.0; + if (commit != lastCommit && modTime < expireTime) { + commit.delete(); + numDelete += 1; + } + } + } + } + + /* + * Test "by time expiration" deletion policy: + */ + public void testExpirationTimeDeletionPolicy() throws IOException, InterruptedException { + + final double SECONDS = 2.0; + + Directory dir = newDirectory(); + ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(dir, SECONDS); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)) + .setIndexDeletionPolicy(policy); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + setUseCompoundFile(mp, true); + } + IndexWriter writer = new IndexWriter(dir, conf); + writer.close(); + + final int ITER = 9; + + long lastDeleteTime = 0; + for(int i=0;i 0); + + // Then simplistic check: just verify that the + // segments_N's that still exist are in fact within SECONDS + // seconds of the last one's mod time, and, that I can + // open a reader on each: + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + + String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + gen); + dir.deleteFile(IndexFileNames.SEGMENTS_GEN); + + boolean oneSecondResolution = true; + + while(gen > 0) { + try { + IndexReader reader = IndexReader.open(dir, true); + reader.close(); + fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + gen); + + // if we are on a filesystem that seems to have only + // 1 second resolution, allow +1 second in commit + // age tolerance: + long modTime = dir.fileModified(fileName); + oneSecondResolution &= (modTime % 1000) == 0; + final long leeway = (long) ((SECONDS + (oneSecondResolution ? 1.0:0.0))*1000); + + assertTrue("commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted ", lastDeleteTime - modTime <= leeway); + } catch (IOException e) { + // OK + break; + } + + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + gen--; + } + + dir.close(); + } + + /* + * Test a silly deletion policy that keeps all commits around. + */ + public void testKeepAllDeletionPolicy() throws IOException { + for(int pass=0;pass<2;pass++) { + + if (VERBOSE) { + System.out.println("TEST: cycle pass=" + pass); + } + + boolean useCompoundFile = (pass % 2) != 0; + + // Never deletes a commit + KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(); + + Directory dir = newDirectory(); + policy.dir = dir; + + IndexWriterConfig conf = newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setIndexDeletionPolicy(policy).setMaxBufferedDocs(10) + .setMergeScheduler(new SerialMergeScheduler()); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + setUseCompoundFile(mp, useCompoundFile); + } + IndexWriter writer = new IndexWriter(dir, conf); + for(int i=0;i<107;i++) { + addDoc(writer); + } + writer.close(); + + final boolean isOptimized; + { + IndexReader r = IndexReader.open(dir); + isOptimized = r.isOptimized(); + r.close(); + } + if (!isOptimized) { + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, + new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode( + OpenMode.APPEND).setIndexDeletionPolicy(policy); + mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + setUseCompoundFile(mp, true); + } + if (VERBOSE) { + System.out.println("TEST: open writer for optimize"); + } + writer = new IndexWriter(dir, conf); + writer.setInfoStream(VERBOSE ? System.out : null); + writer.optimize(); + writer.close(); + } + assertEquals(isOptimized ? 0:1, policy.numOnInit); + + // If we are not auto committing then there should + // be exactly 2 commits (one per close above): + assertEquals(1 + (isOptimized ? 0:1), policy.numOnCommit); + + // Test listCommits + Collection commits = IndexReader.listCommits(dir); + // 2 from closing writer + assertEquals(1 + (isOptimized ? 0:1), commits.size()); + + // Make sure we can open a reader on each commit: + for (final IndexCommit commit : commits) { + IndexReader r = IndexReader.open(commit, null, false); + r.close(); + } + + // Simplistic check: just verify all segments_N's still + // exist, and, I can open a reader on each: + dir.deleteFile(IndexFileNames.SEGMENTS_GEN); + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + while(gen > 0) { + IndexReader reader = IndexReader.open(dir, true); + reader.close(); + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + gen--; + + if (gen > 0) { + // Now that we've removed a commit point, which + // should have orphan'd at least one index file. + // Open & close a writer and assert that it + // actually removed something: + int preCount = dir.listAll().length; + writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setOpenMode( + OpenMode.APPEND).setIndexDeletionPolicy(policy)); + writer.close(); + int postCount = dir.listAll().length; + assertTrue(postCount < preCount); + } + } + + dir.close(); + } + } + + /* Uses KeepAllDeletionPolicy to keep all commits around, + * then, opens a new IndexWriter on a previous commit + * point. */ + public void testOpenPriorSnapshot() throws IOException { + // Never deletes a commit + KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(); + + Directory dir = newDirectory(); + policy.dir = dir; + + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setIndexDeletionPolicy(policy). + setMaxBufferedDocs(2). + setMergePolicy(newLogMergePolicy(10)) + ); + for(int i=0;i<10;i++) { + addDoc(writer); + if ((1+i)%2 == 0) + writer.commit(); + } + writer.close(); + + Collection commits = IndexReader.listCommits(dir); + assertEquals(5, commits.size()); + IndexCommit lastCommit = null; + for (final IndexCommit commit : commits) { + if (lastCommit == null || commit.getGeneration() > lastCommit.getGeneration()) + lastCommit = commit; + } + assertTrue(lastCommit != null); + + // Now add 1 doc and optimize + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setIndexDeletionPolicy(policy)); + addDoc(writer); + assertEquals(11, writer.numDocs()); + writer.optimize(); + writer.close(); + + assertEquals(6, IndexReader.listCommits(dir).size()); + + // Now open writer on the commit just before optimize: + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setIndexDeletionPolicy(policy).setIndexCommit(lastCommit)); + assertEquals(10, writer.numDocs()); + + // Should undo our rollback: + writer.rollback(); + + IndexReader r = IndexReader.open(dir, true); + // Still optimized, still 11 docs + assertTrue(r.isOptimized()); + assertEquals(11, r.numDocs()); + r.close(); + + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setIndexDeletionPolicy(policy).setIndexCommit(lastCommit)); + assertEquals(10, writer.numDocs()); + // Commits the rollback: + writer.close(); + + // Now 8 because we made another commit + assertEquals(7, IndexReader.listCommits(dir).size()); + + r = IndexReader.open(dir, true); + // Not optimized because we rolled it back, and now only + // 10 docs + assertTrue(!r.isOptimized()); + assertEquals(10, r.numDocs()); + r.close(); + + // Reoptimize + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(policy)); + writer.optimize(); + writer.close(); + + r = IndexReader.open(dir, true); + assertTrue(r.isOptimized()); + assertEquals(10, r.numDocs()); + r.close(); + + // Now open writer on the commit just before optimize, + // but this time keeping only the last commit: + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexCommit(lastCommit)); + assertEquals(10, writer.numDocs()); + + // Reader still sees optimized index, because writer + // opened on the prior commit has not yet committed: + r = IndexReader.open(dir, true); + assertTrue(r.isOptimized()); + assertEquals(10, r.numDocs()); + r.close(); + + writer.close(); + + // Now reader sees unoptimized index: + r = IndexReader.open(dir, true); + assertTrue(!r.isOptimized()); + assertEquals(10, r.numDocs()); + r.close(); + + dir.close(); + } + + + /* Test keeping NO commit points. This is a viable and + * useful case eg where you want to build a big index and + * you know there are no readers. + */ + public void testKeepNoneOnInitDeletionPolicy() throws IOException { + for(int pass=0;pass<2;pass++) { + + boolean useCompoundFile = (pass % 2) != 0; + + KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(); + + Directory dir = newDirectory(); + + IndexWriterConfig conf = newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy) + .setMaxBufferedDocs(10); + MergePolicy mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + setUseCompoundFile(mp, useCompoundFile); + } + IndexWriter writer = new IndexWriter(dir, conf); + for(int i=0;i<107;i++) { + addDoc(writer); + } + writer.close(); + + conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy); + mp = conf.getMergePolicy(); + if (mp instanceof LogMergePolicy) { + setUseCompoundFile(mp, true); + } + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + assertEquals(1, policy.numOnInit); + // If we are not auto committing then there should + // be exactly 2 commits (one per close above): + assertEquals(2, policy.numOnCommit); + + // Simplistic check: just verify the index is in fact + // readable: + IndexReader reader = IndexReader.open(dir, true); + reader.close(); + + dir.close(); + } + } + + /* + * Test a deletion policy that keeps last N commits. + */ + public void testKeepLastNDeletionPolicy() throws IOException { + final int N = 5; + + for(int pass=0;pass<2;pass++) { + + boolean useCompoundFile = (pass % 2) != 0; + + Directory dir = newDirectory(); + + KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); + + for(int j=0;j 0); + assertEquals(N, policy.numOnInit); + assertEquals(N+1, policy.numOnCommit); + + // Simplistic check: just verify only the past N segments_N's still + // exist, and, I can open a reader on each: + dir.deleteFile(IndexFileNames.SEGMENTS_GEN); + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + for(int i=0;i files; + + /** Set the test case. This test case needs + * a few text files created in the current working directory. + */ + @Override + public void setUp() throws Exception { + super.setUp(); + workDir = _TestUtil.getTempDir("TestDoc"); + workDir.mkdirs(); + + indexDir = _TestUtil.getTempDir("testIndex"); + indexDir.mkdirs(); + + Directory directory = newFSDirectory(indexDir); + directory.close(); + + files = new LinkedList(); + files.add(createOutput("test.txt", + "This is the first test file" + )); + + files.add(createOutput("test2.txt", + "This is the second test file" + )); + } + + private File createOutput(String name, String text) throws IOException { + FileWriter fw = null; + PrintWriter pw = null; + + try { + File f = new File(workDir, name); + if (f.exists()) f.delete(); + + fw = new FileWriter(f); + pw = new PrintWriter(fw); + pw.println(text); + return f; + + } finally { + if (pw != null) pw.close(); + if (fw != null) fw.close(); + } + } + + + /** This test executes a number of merges and compares the contents of + * the segments created when using compound file or not using one. + * + * TODO: the original test used to print the segment contents to System.out + * for visual validation. To have the same effect, a new method + * checkSegment(String name, ...) should be created that would + * assert various things about the segment. + */ + public void testIndexAndMerge() throws Exception { + StringWriter sw = new StringWriter(); + PrintWriter out = new PrintWriter(sw, true); + + Directory directory = newFSDirectory(indexDir); + IndexWriter writer = new IndexWriter( + directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.CREATE). + setMaxBufferedDocs(-1). + setMergePolicy(newLogMergePolicy(10)) + ); + + SegmentInfo si1 = indexDoc(writer, "test.txt"); + printSegment(out, si1); + + SegmentInfo si2 = indexDoc(writer, "test2.txt"); + printSegment(out, si2); + writer.close(); + + SegmentInfo siMerge = merge(si1, si2, "merge", false); + printSegment(out, siMerge); + + SegmentInfo siMerge2 = merge(si1, si2, "merge2", false); + printSegment(out, siMerge2); + + SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false); + printSegment(out, siMerge3); + + directory.close(); + out.close(); + sw.close(); + String multiFileOutput = sw.getBuffer().toString(); + //System.out.println(multiFileOutput); + + sw = new StringWriter(); + out = new PrintWriter(sw, true); + + directory = newFSDirectory(indexDir); + writer = new IndexWriter( + directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.CREATE). + setMaxBufferedDocs(-1). + setMergePolicy(newLogMergePolicy(10)) + ); + + si1 = indexDoc(writer, "test.txt"); + printSegment(out, si1); + + si2 = indexDoc(writer, "test2.txt"); + printSegment(out, si2); + writer.close(); + + siMerge = merge(si1, si2, "merge", true); + printSegment(out, siMerge); + + siMerge2 = merge(si1, si2, "merge2", true); + printSegment(out, siMerge2); + + siMerge3 = merge(siMerge, siMerge2, "merge3", true); + printSegment(out, siMerge3); + + directory.close(); + out.close(); + sw.close(); + String singleFileOutput = sw.getBuffer().toString(); + + assertEquals(multiFileOutput, singleFileOutput); + } + + private SegmentInfo indexDoc(IndexWriter writer, String fileName) + throws Exception + { + File file = new File(workDir, fileName); + Document doc = new Document(); + doc.add(new Field("contents", new FileReader(file))); + writer.addDocument(doc); + writer.commit(); + return writer.newestSegment(); + } + + + private SegmentInfo merge(SegmentInfo si1, SegmentInfo si2, String merged, boolean useCompoundFile) + throws Exception { + SegmentReader r1 = SegmentReader.get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + SegmentReader r2 = SegmentReader.get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + + SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, merged, null, null, new FieldInfos()); + + merger.add(r1); + merger.add(r2); + merger.merge(); + r1.close(); + r2.close(); + + final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, + false, true, + merger.fieldInfos().hasProx(), + merger.fieldInfos().hasVectors()); + + if (useCompoundFile) { + Collection filesToDelete = merger.createCompoundFile(merged + ".cfs", info); + info.setUseCompoundFile(true); + for (final String fileToDelete : filesToDelete) + si1.dir.deleteFile(fileToDelete); + } + + return info; + } + + + private void printSegment(PrintWriter out, SegmentInfo si) + throws Exception { + SegmentReader reader = SegmentReader.get(true, si, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + + for (int i = 0; i < reader.numDocs(); i++) + out.println(reader.document(i)); + + TermEnum tis = reader.terms(); + while (tis.next()) { + out.print(tis.term()); + out.println(" DF=" + tis.docFreq()); + + TermPositions positions = reader.termPositions(tis.term()); + try { + while (positions.next()) { + out.print(" doc=" + positions.doc()); + out.print(" TF=" + positions.freq()); + out.print(" pos="); + out.print(positions.nextPosition()); + for (int j = 1; j < positions.freq(); j++) + out.print("," + positions.nextPosition()); + out.println(""); + } + } finally { + positions.close(); + } + } + tis.close(); + reader.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestFilterIndexReader.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestFilterIndexReader.java new file mode 100644 index 0000000..9609689 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestFilterIndexReader.java @@ -0,0 +1,139 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.util.LuceneTestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +import java.io.IOException; + +public class TestFilterIndexReader extends LuceneTestCase { + + private static class TestReader extends FilterIndexReader { + + /** Filter that only permits terms containing 'e'.*/ + private static class TestTermEnum extends FilterTermEnum { + public TestTermEnum(TermEnum termEnum) { + super(termEnum); + } + + /** Scan for terms containing the letter 'e'.*/ + @Override + public boolean next() throws IOException { + while (in.next()) { + if (in.term().text().indexOf('e') != -1) + return true; + } + return false; + } + } + + /** Filter that only returns odd numbered documents. */ + private static class TestTermPositions extends FilterTermPositions { + public TestTermPositions(TermPositions in) { + super(in); + } + + /** Scan for odd numbered documents. */ + @Override + public boolean next() throws IOException { + while (in.next()) { + if ((in.doc() % 2) == 1) + return true; + } + return false; + } + } + + public TestReader(IndexReader reader) { + super(reader); + } + + /** Filter terms with TestTermEnum. */ + @Override + public TermEnum terms() throws IOException { + return new TestTermEnum(in.terms()); + } + + /** Filter positions with TestTermPositions. */ + @Override + public TermPositions termPositions() throws IOException { + return new TestTermPositions(in.termPositions()); + } + } + + + /** Main for running test case by itself. */ + public static void main(String args[]) { + TestRunner.run (new TestSuite(TestIndexReader.class)); + } + + /** + * Tests the IndexReader.getFieldNames implementation + * @throws Exception on error + */ + public void testFilterIndexReader() throws Exception { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + Document d1 = new Document(); + d1.add(newField("default","one two", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(d1); + + Document d2 = new Document(); + d2.add(newField("default","one three", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(d2); + + Document d3 = new Document(); + d3.add(newField("default","two four", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(d3); + + writer.close(); + + IndexReader reader = new TestReader(IndexReader.open(directory, true)); + TermEnum terms = reader.terms(); + while (terms.next()) { + assertTrue(terms.term().text().indexOf('e') != -1); + } + terms.close(); + + TermPositions positions = reader.termPositions(new Term("default", "one")); + while (positions.next()) { + assertTrue((positions.doc() % 2) == 1); + } + + int NUM_DOCS = 3; + + TermDocs td = reader.termDocs(null); + for(int i=0;i getFileNames() throws IOException { return null; } + @Override public void delete() {} + @Override public long getGeneration() { return 0; } + @Override public long getTimestamp() throws IOException { return 1;} + @Override public Map getUserData() throws IOException { return null; } + @Override public boolean isDeleted() { return false; } + @Override public boolean isOptimized() { return false; } + }; + + IndexCommit ic2 = new IndexCommit() { + @Override public String getSegmentsFileName() { return "b"; } + @Override public long getVersion() { return 12; } + @Override public Directory getDirectory() { return dir; } + @Override public Collection getFileNames() throws IOException { return null; } + @Override public void delete() {} + @Override public long getGeneration() { return 0; } + @Override public long getTimestamp() throws IOException { return 1;} + @Override public Map getUserData() throws IOException { return null; } + @Override public boolean isDeleted() { return false; } + @Override public boolean isOptimized() { return false; } + }; + + assertEquals(ic1, ic2); + assertEquals("hash codes are not equals", ic1.hashCode(), ic2.hashCode()); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexFileDeleter.java new file mode 100644 index 0000000..e44b9fd --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -0,0 +1,240 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; + +import java.io.*; +import java.util.*; + +/* + Verify we can read the pre-2.1 file format, do searches + against it, and add documents to it. +*/ + +public class TestIndexFileDeleter extends LuceneTestCase { + + public void testDeleteLeftoverFiles() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + dir.setPreventDoubleWrite(false); + IndexWriterConfig conf = newIndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) + .setMaxBufferedDocs(10); + LogMergePolicy mergePolicy = newLogMergePolicy(true, 10); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + conf.setMergePolicy(mergePolicy); + + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(10). + setMergePolicy(mergePolicy) + ); + + int i; + for(i=0;i<35;i++) { + addDoc(writer, i); + } + mergePolicy.setUseCompoundFile(false); + for(;i<45;i++) { + addDoc(writer, i); + } + writer.close(); + + // Delete one doc so we get a .del file: + IndexReader reader = IndexReader.open(dir, false); + Term searchTerm = new Term("id", "7"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("didn't delete the right number of documents", 1, delCount); + + // Set one norm so we get a .s0 file: + reader.setNorm(21, "content", (float) 1.5); + reader.close(); + + // Now, artificially create an extra .del file & extra + // .s0 file: + String[] files = dir.listAll(); + + /* + for(int j=0;j dif = difFiles(files, files2); + + if (!Arrays.equals(files, files2)) { + fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.length-files.length) + " files but only deleted " + (filesPre.length - files2.length) + "; expected files:\n " + asString(files) + "\n actual files:\n " + asString(files2)+"\ndif: "+dif); + } + } + + private static Set difFiles(String[] files1, String[] files2) { + Set set1 = new HashSet(); + Set set2 = new HashSet(); + Set extra = new HashSet(); + + for (int x=0; x < files1.length; x++) { + set1.add(files1[x]); + } + for (int x=0; x < files2.length; x++) { + set2.add(files2[x]); + } + Iterator i1 = set1.iterator(); + while (i1.hasNext()) { + String o = i1.next(); + if (!set2.contains(o)) { + extra.add(o); + } + } + Iterator i2 = set2.iterator(); + while (i2.hasNext()) { + String o = i2.next(); + if (!set1.contains(o)) { + extra.add(o); + } + } + return extra; + } + + private String asString(String[] l) { + String s = ""; + for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + + public void copyFile(Directory dir, String src, String dest) throws IOException { + IndexInput in = dir.openInput(src); + IndexOutput out = dir.createOutput(dest); + byte[] b = new byte[1024]; + long remainder = in.length(); + while(remainder > 0) { + int len = (int) Math.min(b.length, remainder); + in.readBytes(b, 0, len); + out.writeBytes(b, len); + remainder -= len; + } + in.close(); + out.close(); + } + + private void addDoc(IndexWriter writer, int id) throws IOException + { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexInput.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexInput.java new file mode 100644 index 0000000..4b82e5a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexInput.java @@ -0,0 +1,150 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMDirectory; + +import java.io.IOException; + +public class TestIndexInput extends LuceneTestCase { + + static final byte[] READ_TEST_BYTES = new byte[] { + (byte) 0x80, 0x01, + (byte) 0xFF, 0x7F, + (byte) 0x80, (byte) 0x80, 0x01, + (byte) 0x81, (byte) 0x80, 0x01, + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07, + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07, + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F, + 0x06, 'L', 'u', 'c', 'e', 'n', 'e', + + // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") + 0x02, (byte) 0xC2, (byte) 0xBF, + 0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF, + 'c', 'e', (byte) 0xC2, (byte) 0xBF, + 'n', 'e', + + // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") + 0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0, + 0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, + 'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, + 'n', 'e', + + // surrogate pairs + // (U+1D11E "MUSICAL SYMBOL G CLEF") + // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE") + 0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, + 0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, + (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, + 0x0E, 'L', 'u', + (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, + 'c', 'e', + (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, + 'n', 'e', + + // null bytes + 0x01, 0x00, + 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e', + }; + + private void checkReads(IndexInput is) throws IOException { + assertEquals(128,is.readVInt()); + assertEquals(16383,is.readVInt()); + assertEquals(16384,is.readVInt()); + assertEquals(16385,is.readVInt()); + assertEquals(Integer.MAX_VALUE, is.readVInt()); + assertEquals((long) Integer.MAX_VALUE, is.readVLong()); + assertEquals(Long.MAX_VALUE, is.readVLong()); + assertEquals("Lucene",is.readString()); + + assertEquals("\u00BF",is.readString()); + assertEquals("Lu\u00BFce\u00BFne",is.readString()); + + assertEquals("\u2620",is.readString()); + assertEquals("Lu\u2620ce\u2620ne",is.readString()); + + assertEquals("\uD834\uDD1E",is.readString()); + assertEquals("\uD834\uDD1E\uD834\uDD60",is.readString()); + assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne",is.readString()); + + assertEquals("\u0000",is.readString()); + assertEquals("Lu\u0000ce\u0000ne",is.readString()); + } + + // this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput + public void testBufferedIndexInputRead() throws IOException { + final IndexInput is = new MockIndexInput(READ_TEST_BYTES); + checkReads(is); + is.close(); + } + + // this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly + public void testRawIndexInputRead() throws IOException { + final RAMDirectory dir = new RAMDirectory(); + final IndexOutput os = dir.createOutput("foo"); + os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length); + os.close(); + final IndexInput is = dir.openInput("foo"); + checkReads(is); + is.close(); + dir.close(); + } + + /** + * Expert + * + * @throws IOException + */ + public void testSkipChars() throws IOException { + byte[] bytes = new byte[]{(byte) 0x80, 0x01, + (byte) 0xFF, 0x7F, + (byte) 0x80, (byte) 0x80, 0x01, + (byte) 0x81, (byte) 0x80, 0x01, + 0x06, 'L', 'u', 'c', 'e', 'n', 'e', + }; + String utf8Str = "\u0634\u1ea1"; + byte [] utf8Bytes = utf8Str.getBytes("UTF-8"); + byte [] theBytes = new byte[bytes.length + 1 + utf8Bytes.length]; + System.arraycopy(bytes, 0, theBytes, 0, bytes.length); + theBytes[bytes.length] = (byte)utf8Str.length();//Add in the number of chars we are storing, which should fit in a byte for this test + System.arraycopy(utf8Bytes, 0, theBytes, bytes.length + 1, utf8Bytes.length); + IndexInput is = new MockIndexInput(theBytes); + assertEquals(128, is.readVInt()); + assertEquals(16383, is.readVInt()); + assertEquals(16384, is.readVInt()); + assertEquals(16385, is.readVInt()); + int charsToRead = is.readVInt();//number of chars in the Lucene string + assertTrue(0x06 + " does not equal: " + charsToRead, 0x06 == charsToRead); + is.skipChars(3); + char [] chars = new char[3];//there should be 6 chars remaining + is.readChars(chars, 0, 3); + String tmpStr = new String(chars); + assertTrue(tmpStr + " is not equal to " + "ene", tmpStr.equals("ene" ) == true); + //Now read the UTF8 stuff + charsToRead = is.readVInt() - 1;//since we are skipping one + is.skipChars(1); + assertTrue(utf8Str.length() - 1 + " does not equal: " + charsToRead, utf8Str.length() - 1 == charsToRead); + chars = new char[charsToRead]; + is.readChars(chars, 0, charsToRead); + tmpStr = new String(chars); + assertTrue(tmpStr + " is not equal to " + utf8Str.substring(1), tmpStr.equals(utf8Str.substring(1)) == true); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReader.java new file mode 100644 index 0000000..ac03b9e --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReader.java @@ -0,0 +1,1332 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.Set; +import java.util.SortedSet; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.SetBasedFieldSelector; +import org.apache.lucene.index.IndexReader.FieldOption; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.NoSuchDirectoryException; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.LockReleaseFailedException; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestIndexReader extends LuceneTestCase { + + public void testCommitUserData() throws Exception { + Directory d = newDirectory(); + + Map commitUserData = new HashMap(); + commitUserData.put("foo", "fighters"); + + // set up writer + IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(2)); + for(int i=0;i<27;i++) + addDocumentWithFields(writer); + writer.close(); + + IndexReader r = IndexReader.open(d, false); + r.deleteDocument(5); + r.flush(commitUserData); + r.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(d); + IndexReader r2 = IndexReader.open(d, false); + IndexCommit c = r.getIndexCommit(); + assertEquals(c.getUserData(), commitUserData); + + assertEquals(sis.getCurrentSegmentFileName(), c.getSegmentsFileName()); + + assertTrue(c.equals(r.getIndexCommit())); + + // Change the index + writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setOpenMode( + OpenMode.APPEND).setMaxBufferedDocs(2)); + for(int i=0;i<7;i++) + addDocumentWithFields(writer); + writer.close(); + + IndexReader r3 = r2.reopen(); + assertFalse(c.equals(r3.getIndexCommit())); + assertFalse(r2.getIndexCommit().isOptimized()); + r3.close(); + + writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)) + .setOpenMode(OpenMode.APPEND)); + writer.optimize(); + writer.close(); + + r3 = r2.reopen(); + assertTrue(r3.getIndexCommit().isOptimized()); + r2.close(); + r3.close(); + d.close(); + } + + public void testIsCurrent() throws Exception { + Directory d = newDirectory(); + IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDocumentWithFields(writer); + writer.close(); + // set up reader: + IndexReader reader = IndexReader.open(d, false); + assertTrue(reader.isCurrent()); + // modify index by adding another document: + writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + addDocumentWithFields(writer); + writer.close(); + assertFalse(reader.isCurrent()); + // re-create index: + writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + addDocumentWithFields(writer); + writer.close(); + assertFalse(reader.isCurrent()); + reader.close(); + d.close(); + } + + /** + * Tests the IndexReader.getFieldNames implementation + * @throws Exception on error + */ + public void testGetFieldNames() throws Exception { + Directory d = newDirectory(); + // set up writer + IndexWriter writer = new IndexWriter( + d, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + ); + + Document doc = new Document(); + doc.add(new Field("keyword","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("text","test1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("unindexed","test1", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("unstored","test1", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + + writer.close(); + // set up reader + IndexReader reader = IndexReader.open(d, false); + Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); + assertTrue(fieldNames.contains("keyword")); + assertTrue(fieldNames.contains("text")); + assertTrue(fieldNames.contains("unindexed")); + assertTrue(fieldNames.contains("unstored")); + reader.close(); + // add more documents + writer = new IndexWriter( + d, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMergePolicy(newLogMergePolicy()) + ); + // want to get some more segments here + int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor(); + for (int i = 0; i < 5*mergeFactor; i++) { + doc = new Document(); + doc.add(new Field("keyword","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("text","test1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("unindexed","test1", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("unstored","test1", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + // new fields are in some different segments (we hope) + for (int i = 0; i < 5*mergeFactor; i++) { + doc = new Document(); + doc.add(new Field("keyword2","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("text2","test1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("unindexed2","test1", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("unstored2","test1", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + // new termvector fields + for (int i = 0; i < 5*mergeFactor; i++) { + doc = new Document(); + doc.add(new Field("tvnot","tvnot", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(new Field("termvector","termvector", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.add(new Field("tvoffset","tvoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); + doc.add(new Field("tvposition","tvposition", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); + doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + } + + writer.close(); + // verify fields again + reader = IndexReader.open(d, false); + fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); + assertEquals(13, fieldNames.size()); // the following fields + assertTrue(fieldNames.contains("keyword")); + assertTrue(fieldNames.contains("text")); + assertTrue(fieldNames.contains("unindexed")); + assertTrue(fieldNames.contains("unstored")); + assertTrue(fieldNames.contains("keyword2")); + assertTrue(fieldNames.contains("text2")); + assertTrue(fieldNames.contains("unindexed2")); + assertTrue(fieldNames.contains("unstored2")); + assertTrue(fieldNames.contains("tvnot")); + assertTrue(fieldNames.contains("termvector")); + assertTrue(fieldNames.contains("tvposition")); + assertTrue(fieldNames.contains("tvoffset")); + assertTrue(fieldNames.contains("tvpositionoffset")); + + // verify that only indexed fields were returned + fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED); + assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields + assertTrue(fieldNames.contains("keyword")); + assertTrue(fieldNames.contains("text")); + assertTrue(fieldNames.contains("unstored")); + assertTrue(fieldNames.contains("keyword2")); + assertTrue(fieldNames.contains("text2")); + assertTrue(fieldNames.contains("unstored2")); + assertTrue(fieldNames.contains("tvnot")); + assertTrue(fieldNames.contains("termvector")); + assertTrue(fieldNames.contains("tvposition")); + assertTrue(fieldNames.contains("tvoffset")); + assertTrue(fieldNames.contains("tvpositionoffset")); + + // verify that only unindexed fields were returned + fieldNames = reader.getFieldNames(IndexReader.FieldOption.UNINDEXED); + assertEquals(2, fieldNames.size()); // the following fields + assertTrue(fieldNames.contains("unindexed")); + assertTrue(fieldNames.contains("unindexed2")); + + // verify index term vector fields + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR); + assertEquals(1, fieldNames.size()); // 1 field has term vector only + assertTrue(fieldNames.contains("termvector")); + + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION); + assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors + assertTrue(fieldNames.contains("tvposition")); + + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET); + assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors + assertTrue(fieldNames.contains("tvoffset")); + + fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET); + assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors + assertTrue(fieldNames.contains("tvpositionoffset")); + reader.close(); + d.close(); + } + + public void testTermVectors() throws Exception { + Directory d = newDirectory(); + // set up writer + IndexWriter writer = new IndexWriter( + d, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy()) + ); + // want to get some more segments here + // new termvector fields + int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor(); + for (int i = 0; i < 5 * mergeFactor; i++) { + Document doc = new Document(); + doc.add(new Field("tvnot","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(new Field("termvector","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.add(new Field("tvoffset","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); + doc.add(new Field("tvposition","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); + doc.add(new Field("tvpositionoffset","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + + writer.addDocument(doc); + } + writer.close(); + IndexReader reader = IndexReader.open(d, false); + FieldSortedTermVectorMapper mapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); + reader.getTermFreqVector(0, mapper); + Map> map = mapper.getFieldToTerms(); + assertTrue("map is null and it shouldn't be", map != null); + assertTrue("map Size: " + map.size() + " is not: " + 4, map.size() == 4); + Set set = map.get("termvector"); + for (Iterator iterator = set.iterator(); iterator.hasNext();) { + TermVectorEntry entry = iterator.next(); + assertTrue("entry is null and it shouldn't be", entry != null); + if (VERBOSE) System.out.println("Entry: " + entry); + } + reader.close(); + d.close(); + } + + static void assertTermDocsCount(String msg, + IndexReader reader, + Term term, + int expected) + throws IOException + { + TermDocs tdocs = null; + + try { + tdocs = reader.termDocs(term); + assertNotNull(msg + ", null TermDocs", tdocs); + int count = 0; + while(tdocs.next()) { + count++; + } + assertEquals(msg + ", count mismatch", expected, count); + + } finally { + if (tdocs != null) + tdocs.close(); + } + + } + + + public void testBinaryFields() throws IOException { + Directory dir = newDirectory(); + byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + + for (int i = 0; i < 10; i++) { + addDoc(writer, "document number " + (i + 1)); + addDocumentWithFields(writer); + addDocumentWithDifferentFields(writer); + addDocumentWithTermVectorFields(writer); + } + writer.close(); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); + Document doc = new Document(); + doc.add(new Field("bin1", bin)); + doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + IndexReader reader = IndexReader.open(dir, false); + doc = reader.document(reader.maxDoc() - 1); + Field[] fields = doc.getFields("bin1"); + assertNotNull(fields); + assertEquals(1, fields.length); + Field b1 = fields[0]; + assertTrue(b1.isBinary()); + byte[] data1 = b1.getBinaryValue(); + assertEquals(bin.length, b1.getBinaryLength()); + for (int i = 0; i < bin.length; i++) { + assertEquals(bin[i], data1[i + b1.getBinaryOffset()]); + } + Set lazyFields = new HashSet(); + lazyFields.add("bin1"); + FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields); + doc = reader.document(reader.maxDoc() - 1, sel); + Fieldable[] fieldables = doc.getFieldables("bin1"); + assertNotNull(fieldables); + assertEquals(1, fieldables.length); + Fieldable fb1 = fieldables[0]; + assertTrue(fb1.isBinary()); + assertEquals(bin.length, fb1.getBinaryLength()); + data1 = fb1.getBinaryValue(); + assertEquals(bin.length, fb1.getBinaryLength()); + for (int i = 0; i < bin.length; i++) { + assertEquals(bin[i], data1[i + fb1.getBinaryOffset()]); + } + reader.close(); + // force optimize + + + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); + writer.optimize(); + writer.close(); + reader = IndexReader.open(dir, false); + doc = reader.document(reader.maxDoc() - 1); + fields = doc.getFields("bin1"); + assertNotNull(fields); + assertEquals(1, fields.length); + b1 = fields[0]; + assertTrue(b1.isBinary()); + data1 = b1.getBinaryValue(); + assertEquals(bin.length, b1.getBinaryLength()); + for (int i = 0; i < bin.length; i++) { + assertEquals(bin[i], data1[i + b1.getBinaryOffset()]); + } + reader.close(); + dir.close(); + } + + // Make sure attempts to make changes after reader is + // closed throws IOException: + public void testChangesAfterClose() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = null; + IndexReader reader = null; + Term searchTerm = new Term("content", "aaa"); + + // add 11 documents with term : aaa + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + for (int i = 0; i < 11; i++) { + addDoc(writer, searchTerm.text()); + } + writer.close(); + + reader = IndexReader.open(dir, false); + + // Close reader: + reader.close(); + + // Then, try to make changes: + try { + reader.deleteDocument(4); + fail("deleteDocument after close failed to throw IOException"); + } catch (AlreadyClosedException e) { + // expected + } + + try { + reader.setNorm(5, "aaa", 2.0f); + fail("setNorm after close failed to throw IOException"); + } catch (AlreadyClosedException e) { + // expected + } + + try { + reader.undeleteAll(); + fail("undeleteAll after close failed to throw IOException"); + } catch (AlreadyClosedException e) { + // expected + } + dir.close(); + } + + // Make sure we get lock obtain failed exception with 2 writers: + public void testLockObtainFailed() throws IOException { + Directory dir = newDirectory(); + + Term searchTerm = new Term("content", "aaa"); + + // add 11 documents with term : aaa + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.commit(); + for (int i = 0; i < 11; i++) { + addDoc(writer, searchTerm.text()); + } + + // Create reader: + IndexReader reader = IndexReader.open(dir, false); + + // Try to make changes + try { + reader.deleteDocument(4); + fail("deleteDocument should have hit LockObtainFailedException"); + } catch (LockObtainFailedException e) { + // expected + } + + try { + reader.setNorm(5, "aaa", 2.0f); + fail("setNorm should have hit LockObtainFailedException"); + } catch (LockObtainFailedException e) { + // expected + } + + try { + reader.undeleteAll(); + fail("undeleteAll should have hit LockObtainFailedException"); + } catch (LockObtainFailedException e) { + // expected + } + writer.close(); + reader.close(); + dir.close(); + } + + // Make sure you can set norms & commit even if a reader + // is open against the index: + public void testWritingNorms() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer; + IndexReader reader; + Term searchTerm = new Term("content", "aaa"); + + // add 1 documents with term : aaa + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDoc(writer, searchTerm.text()); + writer.close(); + + // now open reader & set norm for doc 0 + reader = IndexReader.open(dir, false); + reader.setNorm(0, "content", (float) 2.0); + + // we should be holding the write lock now: + assertTrue("locked", IndexWriter.isLocked(dir)); + + reader.commit(); + + // we should not be holding the write lock now: + assertTrue("not locked", !IndexWriter.isLocked(dir)); + + // open a 2nd reader: + IndexReader reader2 = IndexReader.open(dir, false); + + // set norm again for doc 0 + reader.setNorm(0, "content", (float) 3.0); + assertTrue("locked", IndexWriter.isLocked(dir)); + + reader.close(); + + // we should not be holding the write lock now: + assertTrue("not locked", !IndexWriter.isLocked(dir)); + + reader2.close(); + dir.close(); + } + + + // Make sure you can set norms & commit, and there are + // no extra norms files left: + public void testWritingNormsNoReader() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = null; + IndexReader reader = null; + Term searchTerm = new Term("content", "aaa"); + + // add 1 documents with term : aaa + writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(false)) + ); + addDoc(writer, searchTerm.text()); + writer.close(); + + // now open reader & set norm for doc 0 (writes to + // _0_1.s0) + reader = IndexReader.open(dir, false); + reader.setNorm(0, "content", (float) 2.0); + reader.close(); + + // now open reader again & set norm for doc 0 (writes to _0_2.s0) + reader = IndexReader.open(dir, false); + reader.setNorm(0, "content", (float) 2.0); + reader.close(); + assertFalse("failed to remove first generation norms file on writing second generation", + dir.fileExists("_0_1.s0")); + + dir.close(); + } + + /* ??? public void testOpenEmptyDirectory() throws IOException{ + String dirName = "test.empty"; + File fileDirName = new File(dirName); + if (!fileDirName.exists()) { + fileDirName.mkdir(); + } + try { + IndexReader.open(fileDirName); + fail("opening IndexReader on empty directory failed to produce FileNotFoundException"); + } catch (FileNotFoundException e) { + // GOOD + } + rmDir(fileDirName); + }*/ + + public void testFilesOpenClose() throws IOException { + // Create initial data set + File dirFile = _TestUtil.getTempDir("TestIndexReader.testFilesOpenClose"); + Directory dir = newFSDirectory(dirFile); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDoc(writer, "test"); + writer.close(); + dir.close(); + + // Try to erase the data - this ensures that the writer closed all files + _TestUtil.rmDir(dirFile); + dir = newFSDirectory(dirFile); + + // Now create the data set again, just as before + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + addDoc(writer, "test"); + writer.close(); + dir.close(); + + // Now open existing directory and test that reader closes all files + dir = newFSDirectory(dirFile); + IndexReader reader1 = IndexReader.open(dir, false); + reader1.close(); + dir.close(); + + // The following will fail if reader did not close + // all files + _TestUtil.rmDir(dirFile); + } + + public void testLastModified() throws Exception { + for(int i=0;i<2;i++) { + final Directory dir = newDirectory(); + assertFalse(IndexReader.indexExists(dir)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + addDocumentWithFields(writer); + assertTrue(IndexWriter.isLocked(dir)); // writer open, so dir is locked + writer.close(); + assertTrue(IndexReader.indexExists(dir)); + IndexReader reader = IndexReader.open(dir, false); + assertFalse(IndexWriter.isLocked(dir)); // reader only, no lock + long version = IndexReader.lastModified(dir); + if (i == 1) { + long version2 = IndexReader.lastModified(dir); + assertEquals(version, version2); + } + reader.close(); + // modify index and check version has been + // incremented: + Thread.sleep(1000); + + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + addDocumentWithFields(writer); + writer.close(); + reader = IndexReader.open(dir, false); + assertTrue("old lastModified is " + version + "; new lastModified is " + IndexReader.lastModified(dir), version <= IndexReader.lastModified(dir)); + reader.close(); + dir.close(); + } + } + + public void testVersion() throws IOException { + Directory dir = newDirectory(); + assertFalse(IndexReader.indexExists(dir)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDocumentWithFields(writer); + assertTrue(IndexWriter.isLocked(dir)); // writer open, so dir is locked + writer.close(); + assertTrue(IndexReader.indexExists(dir)); + IndexReader reader = IndexReader.open(dir, false); + assertFalse(IndexWriter.isLocked(dir)); // reader only, no lock + long version = IndexReader.getCurrentVersion(dir); + reader.close(); + // modify index and check version has been + // incremented: + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + addDocumentWithFields(writer); + writer.close(); + reader = IndexReader.open(dir, false); + assertTrue("old version is " + version + "; new version is " + IndexReader.getCurrentVersion(dir), version < IndexReader.getCurrentVersion(dir)); + reader.close(); + dir.close(); + } + + public void testLock() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDocumentWithFields(writer); + writer.close(); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + IndexReader reader = IndexReader.open(dir, false); + try { + reader.deleteDocument(0); + fail("expected lock"); + } catch(IOException e) { + // expected exception + } + try { + IndexWriter.unlock(dir); // this should not be done in the real world! + } catch (LockReleaseFailedException lrfe) { + writer.close(); + } + reader.deleteDocument(0); + reader.close(); + writer.close(); + dir.close(); + } + + public void testDocsOutOfOrderJIRA140() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + for(int i=0;i<11;i++) { + addDoc(writer, "aaa"); + } + writer.close(); + IndexReader reader = IndexReader.open(dir, false); + + // Try to delete an invalid docId, yet, within range + // of the final bits of the BitVector: + + boolean gotException = false; + try { + reader.deleteDocument(11); + } catch (ArrayIndexOutOfBoundsException e) { + gotException = true; + } + reader.close(); + + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + + // We must add more docs to get a new segment written + for(int i=0;i<11;i++) { + addDoc(writer, "aaa"); + } + + // Without the fix for LUCENE-140 this call will + // [incorrectly] hit a "docs out of order" + // IllegalStateException because above out-of-bounds + // deleteDocument corrupted the index: + writer.optimize(); + writer.close(); + if (!gotException) { + fail("delete of out-of-bounds doc number failed to hit exception"); + } + dir.close(); + } + + public void testExceptionReleaseWriteLockJIRA768() throws IOException { + + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDoc(writer, "aaa"); + writer.close(); + + IndexReader reader = IndexReader.open(dir, false); + try { + reader.deleteDocument(1); + fail("did not hit exception when deleting an invalid doc number"); + } catch (ArrayIndexOutOfBoundsException e) { + // expected + } + reader.close(); + if (IndexWriter.isLocked(dir)) { + fail("write lock is still held after close"); + } + + reader = IndexReader.open(dir, false); + try { + reader.setNorm(1, "content", (float) 2.0); + fail("did not hit exception when calling setNorm on an invalid doc number"); + } catch (ArrayIndexOutOfBoundsException e) { + // expected + } + reader.close(); + if (IndexWriter.isLocked(dir)) { + fail("write lock is still held after close"); + } + dir.close(); + } + + private String arrayToString(String[] l) { + String s = ""; + for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + + public void testOpenReaderAfterDelete() throws IOException { + File dirFile = _TestUtil.getTempDir("deletetest"); + Directory dir = newFSDirectory(dirFile); + try { + IndexReader.open(dir, false); + fail("expected FileNotFoundException"); + } catch (FileNotFoundException e) { + // expected + } + + dirFile.delete(); + + // Make sure we still get a CorruptIndexException (not NPE): + try { + IndexReader.open(dir, false); + fail("expected FileNotFoundException"); + } catch (FileNotFoundException e) { + // expected + } + + dir.close(); + } + + static void addDocumentWithFields(IndexWriter writer) throws IOException + { + Document doc = new Document(); + doc.add(newField("keyword","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("text","test1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("unindexed","test1", Field.Store.YES, Field.Index.NO)); + doc.add(newField("unstored","test1", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException + { + Document doc = new Document(); + doc.add(newField("keyword2","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("text2","test1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("unindexed2","test1", Field.Store.YES, Field.Index.NO)); + doc.add(newField("unstored2","test1", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + static void addDocumentWithTermVectorFields(IndexWriter writer) throws IOException + { + Document doc = new Document(); + doc.add(newField("tvnot","tvnot", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(newField("termvector","termvector", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.add(newField("tvoffset","tvoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); + doc.add(newField("tvposition","tvposition", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); + doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + + writer.addDocument(doc); + } + + static void addDoc(IndexWriter writer, String value) throws IOException { + Document doc = new Document(); + doc.add(newField("content", value, Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + public static void assertIndexEquals(IndexReader index1, IndexReader index2) throws IOException { + assertEquals("IndexReaders have different values for numDocs.", index1.numDocs(), index2.numDocs()); + assertEquals("IndexReaders have different values for maxDoc.", index1.maxDoc(), index2.maxDoc()); + assertEquals("Only one IndexReader has deletions.", index1.hasDeletions(), index2.hasDeletions()); + assertEquals("Only one index is optimized.", index1.isOptimized(), index2.isOptimized()); + + // check field names + Collection fields1 = index1.getFieldNames(FieldOption.ALL); + Collection fields2 = index1.getFieldNames(FieldOption.ALL); + assertEquals("IndexReaders have different numbers of fields.", fields1.size(), fields2.size()); + Iterator it1 = fields1.iterator(); + Iterator it2 = fields1.iterator(); + while (it1.hasNext()) { + assertEquals("Different field names.", it1.next(), it2.next()); + } + + // check norms + it1 = fields1.iterator(); + while (it1.hasNext()) { + String curField = it1.next(); + byte[] norms1 = index1.norms(curField); + byte[] norms2 = index2.norms(curField); + if (norms1 != null && norms2 != null) + { + assertEquals(norms1.length, norms2.length); + for (int i = 0; i < norms1.length; i++) { + assertEquals("Norm different for doc " + i + " and field '" + curField + "'.", norms1[i], norms2[i]); + } + } + else + { + assertSame(norms1, norms2); + } + } + + // check deletions + for (int i = 0; i < index1.maxDoc(); i++) { + assertEquals("Doc " + i + " only deleted in one index.", index1.isDeleted(i), index2.isDeleted(i)); + } + + // check stored fields + for (int i = 0; i < index1.maxDoc(); i++) { + if (!index1.isDeleted(i)) { + Document doc1 = index1.document(i); + Document doc2 = index2.document(i); + List fieldable1 = doc1.getFields(); + List fieldable2 = doc2.getFields(); + assertEquals("Different numbers of fields for doc " + i + ".", fieldable1.size(), fieldable2.size()); + Iterator itField1 = fieldable1.iterator(); + Iterator itField2 = fieldable2.iterator(); + while (itField1.hasNext()) { + Field curField1 = (Field) itField1.next(); + Field curField2 = (Field) itField2.next(); + assertEquals("Different fields names for doc " + i + ".", curField1.name(), curField2.name()); + assertEquals("Different field values for doc " + i + ".", curField1.stringValue(), curField2.stringValue()); + } + } + } + + // check dictionary and posting lists + TermEnum enum1 = index1.terms(); + TermEnum enum2 = index2.terms(); + TermPositions tp1 = index1.termPositions(); + TermPositions tp2 = index2.termPositions(); + while(enum1.next()) { + assertTrue(enum2.next()); + assertEquals("Different term in dictionary.", enum1.term(), enum2.term()); + tp1.seek(enum1.term()); + tp2.seek(enum1.term()); + while(tp1.next()) { + assertTrue(tp2.next()); + assertEquals("Different doc id in postinglist of term " + enum1.term() + ".", tp1.doc(), tp2.doc()); + assertEquals("Different term frequence in postinglist of term " + enum1.term() + ".", tp1.freq(), tp2.freq()); + for (int i = 0; i < tp1.freq(); i++) { + assertEquals("Different positions in postinglist of term " + enum1.term() + ".", tp1.nextPosition(), tp2.nextPosition()); + } + } + } + } + + public void testGetIndexCommit() throws IOException { + + Directory d = newDirectory(); + + // set up writer + IndexWriter writer = new IndexWriter( + d, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setMergePolicy(newLogMergePolicy(10)) + ); + for(int i=0;i<27;i++) + addDocumentWithFields(writer); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(d); + IndexReader r = IndexReader.open(d, false); + IndexCommit c = r.getIndexCommit(); + + assertEquals(sis.getCurrentSegmentFileName(), c.getSegmentsFileName()); + + assertTrue(c.equals(r.getIndexCommit())); + + // Change the index + writer = new IndexWriter( + d, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMaxBufferedDocs(2). + setMergePolicy(newLogMergePolicy(10)) + ); + for(int i=0;i<7;i++) + addDocumentWithFields(writer); + writer.close(); + + IndexReader r2 = r.reopen(); + assertFalse(c.equals(r2.getIndexCommit())); + assertFalse(r2.getIndexCommit().isOptimized()); + r2.close(); + + writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)) + .setOpenMode(OpenMode.APPEND)); + writer.optimize(); + writer.close(); + + r2 = r.reopen(); + assertTrue(r2.getIndexCommit().isOptimized()); + + r.close(); + r2.close(); + d.close(); + } + + public void testReadOnly() throws Throwable { + Directory d = newDirectory(); + IndexWriter writer = new IndexWriter(d, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDocumentWithFields(writer); + writer.commit(); + addDocumentWithFields(writer); + writer.close(); + + IndexReader r = IndexReader.open(d, true); + try { + r.deleteDocument(0); + fail(); + } catch (UnsupportedOperationException uoe) { + // expected + } + + writer = new IndexWriter( + d, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.APPEND). + setMergePolicy(newLogMergePolicy(10)) + ); + addDocumentWithFields(writer); + writer.close(); + + // Make sure reopen is still readonly: + IndexReader r2 = r.reopen(); + r.close(); + + assertFalse(r == r2); + + try { + r2.deleteDocument(0); + fail(); + } catch (UnsupportedOperationException uoe) { + // expected + } + + writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)) + .setOpenMode(OpenMode.APPEND)); + writer.optimize(); + writer.close(); + + // Make sure reopen to a single segment is still readonly: + IndexReader r3 = r2.reopen(); + assertFalse(r3 == r2); + r2.close(); + + assertFalse(r == r2); + + try { + r3.deleteDocument(0); + fail(); + } catch (UnsupportedOperationException uoe) { + // expected + } + + // Make sure write lock isn't held + writer = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)) + .setOpenMode(OpenMode.APPEND)); + writer.close(); + + r3.close(); + d.close(); + } + + + // LUCENE-1474 + public void testIndexReader() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.addDocument(createDocument("a")); + writer.addDocument(createDocument("b")); + writer.addDocument(createDocument("c")); + writer.close(); + IndexReader reader = IndexReader.open(dir, false); + reader.deleteDocuments(new Term("id", "a")); + reader.flush(); + reader.deleteDocuments(new Term("id", "b")); + reader.close(); + IndexReader.open(dir,true).close(); + dir.close(); + } + + static Document createDocument(String id) { + Document doc = new Document(); + doc.add(newField("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + return doc; + } + + // LUCENE-1468 -- make sure on attempting to open an + // IndexReader on a non-existent directory, you get a + // good exception + public void testNoDir() throws Throwable { + Directory dir = newFSDirectory(_TestUtil.getTempDir("doesnotexist")); + try { + IndexReader.open(dir, true); + fail("did not hit expected exception"); + } catch (NoSuchDirectoryException nsde) { + // expected + } + dir.close(); + } + + // LUCENE-1509 + public void testNoDupCommitFileNames() throws Throwable { + + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(2)); + writer.addDocument(createDocument("a")); + writer.addDocument(createDocument("a")); + writer.addDocument(createDocument("a")); + writer.close(); + + Collection commits = IndexReader.listCommits(dir); + for (final IndexCommit commit : commits) { + Collection files = commit.getFileNames(); + HashSet seen = new HashSet(); + for (final String fileName : files) { + assertTrue("file " + fileName + " was duplicated", !seen.contains(fileName)); + seen.add(fileName); + } + } + + dir.close(); + } + + // LUCENE-1579: Ensure that on a cloned reader, segments + // reuse the doc values arrays in FieldCache + public void testFieldCacheReuseAfterClone() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(newField("number", "17", Field.Store.NO, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + writer.close(); + + // Open reader + IndexReader r = SegmentReader.getOnlySegmentReader(dir); + final int[] ints = FieldCache.DEFAULT.getInts(r, "number"); + assertEquals(1, ints.length); + assertEquals(17, ints[0]); + + // Clone reader + IndexReader r2 = (IndexReader) r.clone(); + r.close(); + assertTrue(r2 != r); + final int[] ints2 = FieldCache.DEFAULT.getInts(r2, "number"); + r2.close(); + + assertEquals(1, ints2.length); + assertEquals(17, ints2[0]); + assertTrue(ints == ints2); + + dir.close(); + } + + // LUCENE-1579: Ensure that on a reopened reader, that any + // shared segments reuse the doc values arrays in + // FieldCache + public void testFieldCacheReuseAfterReopen() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(10)) + ); + Document doc = new Document(); + doc.add(newField("number", "17", Field.Store.NO, Field.Index.NOT_ANALYZED)); + ((LogMergePolicy) writer.getMergePolicy()).setMergeFactor(10); + writer.addDocument(doc); + writer.commit(); + + // Open reader1 + IndexReader r = IndexReader.open(dir, false); + IndexReader r1 = SegmentReader.getOnlySegmentReader(r); + final int[] ints = FieldCache.DEFAULT.getInts(r1, "number"); + assertEquals(1, ints.length); + assertEquals(17, ints[0]); + + // Add new segment + writer.addDocument(doc); + writer.commit(); + + // Reopen reader1 --> reader2 + IndexReader r2 = r.reopen(); + r.close(); + IndexReader sub0 = r2.getSequentialSubReaders()[0]; + final int[] ints2 = FieldCache.DEFAULT.getInts(sub0, "number"); + r2.close(); + assertTrue(ints == ints2); + + writer.close(); + dir.close(); + } + + // LUCENE-1586: getUniqueTermCount + public void testUniqueTermCount() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(newField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.addDocument(doc); + writer.commit(); + + IndexReader r = IndexReader.open(dir, false); + IndexReader r1 = SegmentReader.getOnlySegmentReader(r); + assertEquals(36, r1.getUniqueTermCount()); + writer.addDocument(doc); + writer.commit(); + IndexReader r2 = r.reopen(); + r.close(); + try { + r2.getUniqueTermCount(); + fail("expected exception"); + } catch (UnsupportedOperationException uoe) { + // expected + } + IndexReader[] subs = r2.getSequentialSubReaders(); + for(int i=0;i 1; + + IndexReader[] clones = new IndexReader[subs.length]; + for (int x=0; x < subs.length; x++) { + clones[x] = (IndexReader) subs[x].clone(); + } + reader.close(); + for (int x=0; x < subs.length; x++) { + clones[x].close(); + } + dir1.close(); + } + + public void testLucene1516Bug() throws Exception { + final Directory dir1 = newDirectory(); + TestIndexReaderReopen.createIndex(random, dir1, false); + IndexReader r1 = IndexReader.open(dir1, false); + r1.incRef(); + IndexReader r2 = r1.clone(false); + r1.deleteDocument(5); + r1.decRef(); + + r1.incRef(); + + r2.close(); + r1.decRef(); + r1.close(); + dir1.close(); + } + + public void testCloseStoredFields() throws Exception { + final Directory dir = newDirectory(); + IndexWriter w = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(false)) + ); + Document doc = new Document(); + doc.add(newField("field", "yes it's stored", Field.Store.YES, Field.Index.ANALYZED)); + w.addDocument(doc); + w.close(); + IndexReader r1 = IndexReader.open(dir, false); + IndexReader r2 = r1.clone(false); + r1.close(); + r2.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java new file mode 100644 index 0000000..0bce457 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java @@ -0,0 +1,326 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Random; + +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.SegmentNorms; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests cloning IndexReader norms + */ +public class TestIndexReaderCloneNorms extends LuceneTestCase { + + private class SimilarityOne extends DefaultSimilarity { + @Override + public float computeNorm(String fieldName, FieldInvertState state) { + // diable length norm + return state.getBoost(); + } + } + + private static final int NUM_FIELDS = 10; + + private Similarity similarityOne; + + private Analyzer anlzr; + + private int numDocNorms; + + private ArrayList norms; + + private ArrayList modifiedNorms; + + private float lastNorm = 0; + + private float normDelta = (float) 0.001; + + @Override + public void setUp() throws Exception { + super.setUp(); + similarityOne = new SimilarityOne(); + anlzr = new MockAnalyzer(random); + } + + /** + * Test that norms values are preserved as the index is maintained. Including + * separate norms. Including merging indexes with seprate norms. Including + * optimize. + */ + public void testNorms() throws IOException { + // test with a single index: index1 + Directory dir1 = newDirectory(); + IndexWriter.unlock(dir1); + + norms = new ArrayList(); + modifiedNorms = new ArrayList(); + + createIndex(random, dir1); + doTestNorms(random, dir1); + + // test with a single index: index2 + ArrayList norms1 = norms; + ArrayList modifiedNorms1 = modifiedNorms; + int numDocNorms1 = numDocNorms; + + norms = new ArrayList(); + modifiedNorms = new ArrayList(); + numDocNorms = 0; + + Directory dir2 = newDirectory(); + + createIndex(random, dir2); + doTestNorms(random, dir2); + + // add index1 and index2 to a third index: index3 + Directory dir3 = newDirectory(); + + createIndex(random, dir3); + IndexWriter iw = new IndexWriter(dir3, newIndexWriterConfig( + TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) + .setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3))); + iw.addIndexes(new Directory[] { dir1, dir2 }); + iw.optimize(); + iw.close(); + + norms1.addAll(norms); + norms = norms1; + modifiedNorms1.addAll(modifiedNorms); + modifiedNorms = modifiedNorms1; + numDocNorms += numDocNorms1; + + // test with index3 + verifyIndex(dir3); + doTestNorms(random, dir3); + + // now with optimize + iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT, + anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3))); + iw.optimize(); + iw.close(); + verifyIndex(dir3); + + dir1.close(); + dir2.close(); + dir3.close(); + } + + // try cloning and reopening the norms + private void doTestNorms(Random random, Directory dir) throws IOException { + addDocs(random, dir, 12, true); + IndexReader ir = IndexReader.open(dir, false); + verifyIndex(ir); + modifyNormsForF1(ir); + IndexReader irc = (IndexReader) ir.clone();// IndexReader.open(dir, false);//ir.clone(); + verifyIndex(irc); + + modifyNormsForF1(irc); + + IndexReader irc3 = (IndexReader) irc.clone(); + verifyIndex(irc3); + modifyNormsForF1(irc3); + verifyIndex(irc3); + irc3.flush(); + irc3.close(); + + irc.close(); + ir.close(); + } + + public void testNormsClose() throws IOException { + Directory dir1 = newDirectory(); + TestIndexReaderReopen.createIndex(random, dir1, false); + SegmentReader reader1 = SegmentReader.getOnlySegmentReader(dir1); + reader1.norms("field1"); + SegmentNorms r1norm = reader1.norms.get("field1"); + AtomicInteger r1BytesRef = r1norm.bytesRef(); + SegmentReader reader2 = (SegmentReader)reader1.clone(); + assertEquals(2, r1norm.bytesRef().get()); + reader1.close(); + assertEquals(1, r1BytesRef.get()); + reader2.norms("field1"); + reader2.close(); + dir1.close(); + } + + public void testNormsRefCounting() throws IOException { + Directory dir1 = newDirectory(); + TestIndexReaderReopen.createIndex(random, dir1, false); + IndexReader reader1 = IndexReader.open(dir1, false); + + IndexReader reader2C = (IndexReader) reader1.clone(); + SegmentReader segmentReader2C = SegmentReader.getOnlySegmentReader(reader2C); + segmentReader2C.norms("field1"); // load the norms for the field + SegmentNorms reader2CNorm = segmentReader2C.norms.get("field1"); + assertTrue("reader2CNorm.bytesRef()=" + reader2CNorm.bytesRef(), reader2CNorm.bytesRef().get() == 2); + + + + IndexReader reader3C = (IndexReader) reader2C.clone(); + SegmentReader segmentReader3C = SegmentReader.getOnlySegmentReader(reader3C); + SegmentNorms reader3CCNorm = segmentReader3C.norms.get("field1"); + assertEquals(3, reader3CCNorm.bytesRef().get()); + + // edit a norm and the refcount should be 1 + IndexReader reader4C = (IndexReader) reader3C.clone(); + SegmentReader segmentReader4C = SegmentReader.getOnlySegmentReader(reader4C); + assertEquals(4, reader3CCNorm.bytesRef().get()); + reader4C.setNorm(5, "field1", 0.33f); + + // generate a cannot update exception in reader1 + try { + reader3C.setNorm(1, "field1", 0.99f); + fail("did not hit expected exception"); + } catch (Exception ex) { + // expected + } + + // norm values should be different + assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5]) + != Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5])); + SegmentNorms reader4CCNorm = segmentReader4C.norms.get("field1"); + assertEquals(3, reader3CCNorm.bytesRef().get()); + assertEquals(1, reader4CCNorm.bytesRef().get()); + + IndexReader reader5C = (IndexReader) reader4C.clone(); + SegmentReader segmentReader5C = SegmentReader.getOnlySegmentReader(reader5C); + SegmentNorms reader5CCNorm = segmentReader5C.norms.get("field1"); + reader5C.setNorm(5, "field1", 0.7f); + assertEquals(1, reader5CCNorm.bytesRef().get()); + + reader5C.close(); + reader4C.close(); + reader3C.close(); + reader2C.close(); + reader1.close(); + dir1.close(); + } + + private void createIndex(Random random, Directory dir) throws IOException { + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE) + .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy())); + setUseCompoundFile(iw.getConfig().getMergePolicy(), true); + setMergeFactor(iw.getConfig().getMergePolicy(), 3); + iw.close(); + } + + private void modifyNormsForF1(IndexReader ir) throws IOException { + int n = ir.maxDoc(); + // System.out.println("modifyNormsForF1 maxDoc: "+n); + for (int i = 0; i < n; i += 3) { // modify for every third doc + int k = (i * 3) % modifiedNorms.size(); + float origNorm = modifiedNorms.get(i).floatValue(); + float newNorm = modifiedNorms.get(k).floatValue(); + // System.out.println("Modifying: for "+i+" from "+origNorm+" to + // "+newNorm); + // System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); + modifiedNorms.set(i, Float.valueOf(newNorm)); + modifiedNorms.set(k, Float.valueOf(origNorm)); + ir.setNorm(i, "f" + 1, newNorm); + ir.setNorm(k, "f" + 1, origNorm); + // System.out.println("setNorm i: "+i); + // break; + } + // ir.close(); + } + + private void verifyIndex(Directory dir) throws IOException { + IndexReader ir = IndexReader.open(dir, false); + verifyIndex(ir); + ir.close(); + } + + private void verifyIndex(IndexReader ir) throws IOException { + for (int i = 0; i < NUM_FIELDS; i++) { + String field = "f" + i; + byte b[] = ir.norms(field); + assertEquals("number of norms mismatches", numDocNorms, b.length); + ArrayList storedNorms = (i == 1 ? modifiedNorms : norms); + for (int j = 0; j < b.length; j++) { + float norm = Similarity.getDefault().decodeNormValue(b[j]); + float norm1 = storedNorms.get(j).floatValue(); + assertEquals("stored norm value of " + field + " for doc " + j + " is " + + norm + " - a mismatch!", norm, norm1, 0.000001); + } + } + } + + private void addDocs(Random random, Directory dir, int ndocs, boolean compound) + throws IOException { + IndexWriterConfig conf = newIndexWriterConfig( + TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) + .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()); + LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy(); + lmp.setMergeFactor(3); + lmp.setUseCompoundFile(compound); + IndexWriter iw = new IndexWriter(dir, conf); + for (int i = 0; i < ndocs; i++) { + iw.addDocument(newDoc()); + } + iw.close(); + } + + // create the next document + private Document newDoc() { + Document d = new Document(); + float boost = nextNorm(); + for (int i = 0; i < 10; i++) { + Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED); + f.setBoost(boost); + d.add(f); + } + return d; + } + + // return unique norm values that are unchanged by encoding/decoding + private float nextNorm() { + float norm = lastNorm + normDelta; + do { + float norm1 = Similarity.getDefault().decodeNormValue( + Similarity.getDefault().encodeNormValue(norm)); + if (norm1 > lastNorm) { + // System.out.println(norm1+" > "+lastNorm); + norm = norm1; + break; + } + norm += normDelta; + } while (true); + norms.add(numDocNorms, Float.valueOf(norm)); + modifiedNorms.add(numDocNorms, Float.valueOf(norm)); + // System.out.println("creating norm("+numDocNorms+"): "+norm); + numDocNorms++; + lastNorm = (norm > 10 ? 0 : norm); // there's a limit to how many distinct + // values can be stored in a ingle byte + return norm; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderDelete.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderDelete.java new file mode 100644 index 0000000..31d3575 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderDelete.java @@ -0,0 +1,374 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.LuceneTestCase; + +import static org.apache.lucene.index.TestIndexReader.addDoc; +import static org.apache.lucene.index.TestIndexReader.addDocumentWithFields; +import static org.apache.lucene.index.TestIndexReader.assertTermDocsCount; +import static org.apache.lucene.index.TestIndexReader.createDocument; + +public class TestIndexReaderDelete extends LuceneTestCase { + private void deleteReaderReaderConflict(boolean optimize) throws IOException { + Directory dir = newDirectory(); + + Term searchTerm1 = new Term("content", "aaa"); + Term searchTerm2 = new Term("content", "bbb"); + Term searchTerm3 = new Term("content", "ccc"); + + // add 100 documents with term : aaa + // add 100 documents with term : bbb + // add 100 documents with term : ccc + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + for (int i = 0; i < 100; i++) { + addDoc(writer, searchTerm1.text()); + addDoc(writer, searchTerm2.text()); + addDoc(writer, searchTerm3.text()); + } + if(optimize) + writer.optimize(); + writer.close(); + + // OPEN TWO READERS + // Both readers get segment info as exists at this time + IndexReader reader1 = IndexReader.open(dir, false); + assertEquals("first opened", 100, reader1.docFreq(searchTerm1)); + assertEquals("first opened", 100, reader1.docFreq(searchTerm2)); + assertEquals("first opened", 100, reader1.docFreq(searchTerm3)); + assertTermDocsCount("first opened", reader1, searchTerm1, 100); + assertTermDocsCount("first opened", reader1, searchTerm2, 100); + assertTermDocsCount("first opened", reader1, searchTerm3, 100); + + IndexReader reader2 = IndexReader.open(dir, false); + assertEquals("first opened", 100, reader2.docFreq(searchTerm1)); + assertEquals("first opened", 100, reader2.docFreq(searchTerm2)); + assertEquals("first opened", 100, reader2.docFreq(searchTerm3)); + assertTermDocsCount("first opened", reader2, searchTerm1, 100); + assertTermDocsCount("first opened", reader2, searchTerm2, 100); + assertTermDocsCount("first opened", reader2, searchTerm3, 100); + + // DELETE DOCS FROM READER 2 and CLOSE IT + // delete documents containing term: aaa + // when the reader is closed, the segment info is updated and + // the first reader is now stale + reader2.deleteDocuments(searchTerm1); + assertEquals("after delete 1", 100, reader2.docFreq(searchTerm1)); + assertEquals("after delete 1", 100, reader2.docFreq(searchTerm2)); + assertEquals("after delete 1", 100, reader2.docFreq(searchTerm3)); + assertTermDocsCount("after delete 1", reader2, searchTerm1, 0); + assertTermDocsCount("after delete 1", reader2, searchTerm2, 100); + assertTermDocsCount("after delete 1", reader2, searchTerm3, 100); + reader2.close(); + + // Make sure reader 1 is unchanged since it was open earlier + assertEquals("after delete 1", 100, reader1.docFreq(searchTerm1)); + assertEquals("after delete 1", 100, reader1.docFreq(searchTerm2)); + assertEquals("after delete 1", 100, reader1.docFreq(searchTerm3)); + assertTermDocsCount("after delete 1", reader1, searchTerm1, 100); + assertTermDocsCount("after delete 1", reader1, searchTerm2, 100); + assertTermDocsCount("after delete 1", reader1, searchTerm3, 100); + + + // ATTEMPT TO DELETE FROM STALE READER + // delete documents containing term: bbb + try { + reader1.deleteDocuments(searchTerm2); + fail("Delete allowed from a stale index reader"); + } catch (IOException e) { + /* success */ + } + + // RECREATE READER AND TRY AGAIN + reader1.close(); + reader1 = IndexReader.open(dir, false); + assertEquals("reopened", 100, reader1.docFreq(searchTerm1)); + assertEquals("reopened", 100, reader1.docFreq(searchTerm2)); + assertEquals("reopened", 100, reader1.docFreq(searchTerm3)); + assertTermDocsCount("reopened", reader1, searchTerm1, 0); + assertTermDocsCount("reopened", reader1, searchTerm2, 100); + assertTermDocsCount("reopened", reader1, searchTerm3, 100); + + reader1.deleteDocuments(searchTerm2); + assertEquals("deleted 2", 100, reader1.docFreq(searchTerm1)); + assertEquals("deleted 2", 100, reader1.docFreq(searchTerm2)); + assertEquals("deleted 2", 100, reader1.docFreq(searchTerm3)); + assertTermDocsCount("deleted 2", reader1, searchTerm1, 0); + assertTermDocsCount("deleted 2", reader1, searchTerm2, 0); + assertTermDocsCount("deleted 2", reader1, searchTerm3, 100); + reader1.close(); + + // Open another reader to confirm that everything is deleted + reader2 = IndexReader.open(dir, false); + assertTermDocsCount("reopened 2", reader2, searchTerm1, 0); + assertTermDocsCount("reopened 2", reader2, searchTerm2, 0); + assertTermDocsCount("reopened 2", reader2, searchTerm3, 100); + reader2.close(); + + dir.close(); + } + + private void deleteReaderWriterConflict(boolean optimize) throws IOException { + //Directory dir = new RAMDirectory(); + Directory dir = newDirectory(); + + Term searchTerm = new Term("content", "aaa"); + Term searchTerm2 = new Term("content", "bbb"); + + // add 100 documents with term : aaa + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + for (int i = 0; i < 100; i++) { + addDoc(writer, searchTerm.text()); + } + writer.close(); + + // OPEN READER AT THIS POINT - this should fix the view of the + // index at the point of having 100 "aaa" documents and 0 "bbb" + IndexReader reader = IndexReader.open(dir, false); + assertEquals("first docFreq", 100, reader.docFreq(searchTerm)); + assertEquals("first docFreq", 0, reader.docFreq(searchTerm2)); + assertTermDocsCount("first reader", reader, searchTerm, 100); + assertTermDocsCount("first reader", reader, searchTerm2, 0); + + // add 100 documents with term : bbb + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + for (int i = 0; i < 100; i++) { + addDoc(writer, searchTerm2.text()); + } + + // REQUEST OPTIMIZATION + // This causes a new segment to become current for all subsequent + // searchers. Because of this, deletions made via a previously open + // reader, which would be applied to that reader's segment, are lost + // for subsequent searchers/readers + if(optimize) + writer.optimize(); + writer.close(); + + // The reader should not see the new data + assertEquals("first docFreq", 100, reader.docFreq(searchTerm)); + assertEquals("first docFreq", 0, reader.docFreq(searchTerm2)); + assertTermDocsCount("first reader", reader, searchTerm, 100); + assertTermDocsCount("first reader", reader, searchTerm2, 0); + + + // DELETE DOCUMENTS CONTAINING TERM: aaa + // NOTE: the reader was created when only "aaa" documents were in + int deleted = 0; + try { + deleted = reader.deleteDocuments(searchTerm); + fail("Delete allowed on an index reader with stale segment information"); + } catch (StaleReaderException e) { + /* success */ + } + + // Re-open index reader and try again. This time it should see + // the new data. + reader.close(); + reader = IndexReader.open(dir, false); + assertEquals("first docFreq", 100, reader.docFreq(searchTerm)); + assertEquals("first docFreq", 100, reader.docFreq(searchTerm2)); + assertTermDocsCount("first reader", reader, searchTerm, 100); + assertTermDocsCount("first reader", reader, searchTerm2, 100); + + deleted = reader.deleteDocuments(searchTerm); + assertEquals("deleted count", 100, deleted); + assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm)); + assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm2)); + assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); + assertTermDocsCount("deleted termDocs", reader, searchTerm2, 100); + reader.close(); + + // CREATE A NEW READER and re-test + reader = IndexReader.open(dir, false); + assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm2)); + assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); + assertTermDocsCount("deleted termDocs", reader, searchTerm2, 100); + reader.close(); + dir.close(); + } + + public void testBasicDelete() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = null; + IndexReader reader = null; + Term searchTerm = new Term("content", "aaa"); + + // add 100 documents with term : aaa + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.setInfoStream(VERBOSE ? System.out : null); + for (int i = 0; i < 100; i++) { + addDoc(writer, searchTerm.text()); + } + writer.close(); + + // OPEN READER AT THIS POINT - this should fix the view of the + // index at the point of having 100 "aaa" documents and 0 "bbb" + reader = IndexReader.open(dir, false); + assertEquals("first docFreq", 100, reader.docFreq(searchTerm)); + assertTermDocsCount("first reader", reader, searchTerm, 100); + reader.close(); + + // DELETE DOCUMENTS CONTAINING TERM: aaa + int deleted = 0; + reader = IndexReader.open(dir, false); + deleted = reader.deleteDocuments(searchTerm); + assertEquals("deleted count", 100, deleted); + assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm)); + assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); + + // open a 2nd reader to make sure first reader can + // commit its changes (.del) while second reader + // is open: + IndexReader reader2 = IndexReader.open(dir, false); + reader.close(); + + // CREATE A NEW READER and re-test + reader = IndexReader.open(dir, false); + assertEquals("deleted docFreq", 0, reader.docFreq(searchTerm)); + assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); + reader.close(); + reader2.close(); + dir.close(); + } + + public void testDeleteReaderReaderConflictUnoptimized() throws IOException { + deleteReaderReaderConflict(false); + } + + public void testDeleteReaderReaderConflictOptimized() throws IOException { + deleteReaderReaderConflict(true); + } + + public void testDeleteReaderWriterConflictUnoptimized() throws IOException { + deleteReaderWriterConflict(false); + } + + public void testDeleteReaderWriterConflictOptimized() throws IOException { + deleteReaderWriterConflict(true); + } + + public void testMultiReaderDeletes() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + Document doc = new Document(); + doc.add(newField("f", "doctor", Field.Store.NO, Field.Index.NOT_ANALYZED)); + w.addDocument(doc); + doc = new Document(); + w.commit(); + doc.add(newField("f", "who", Field.Store.NO, Field.Index.NOT_ANALYZED)); + w.addDocument(doc); + IndexReader r = new SlowMultiReaderWrapper(w.getReader()); + w.close(); + + assertFalse(r.hasDeletions()); + r.close(); + + r = new SlowMultiReaderWrapper(IndexReader.open(dir, false)); + + assertFalse(r.hasDeletions()); + assertEquals(1, r.deleteDocuments(new Term("f", "doctor"))); + assertTrue(r.hasDeletions()); + assertTrue(r.isDeleted(0)); + assertEquals(1, r.deleteDocuments(new Term("f", "who"))); + assertTrue(r.isDeleted(1)); + r.close(); + dir.close(); + } + + public void testUndeleteAll() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDocumentWithFields(writer); + addDocumentWithFields(writer); + writer.close(); + IndexReader reader = IndexReader.open(dir, false); + reader.deleteDocument(0); + reader.deleteDocument(1); + reader.undeleteAll(); + reader.close(); + reader = IndexReader.open(dir, false); + assertEquals(2, reader.numDocs()); // nothing has really been deleted thanks to undeleteAll() + reader.close(); + dir.close(); + } + + public void testUndeleteAllAfterClose() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDocumentWithFields(writer); + addDocumentWithFields(writer); + writer.close(); + IndexReader reader = IndexReader.open(dir, false); + reader.deleteDocument(0); + reader.close(); + reader = IndexReader.open(dir, false); + reader.undeleteAll(); + assertEquals(2, reader.numDocs()); // nothing has really been deleted thanks to undeleteAll() + reader.close(); + dir.close(); + } + + public void testUndeleteAllAfterCloseThenReopen() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDocumentWithFields(writer); + addDocumentWithFields(writer); + writer.close(); + IndexReader reader = IndexReader.open(dir, false); + reader.deleteDocument(0); + reader.close(); + reader = IndexReader.open(dir, false); + reader.undeleteAll(); + reader.close(); + reader = IndexReader.open(dir, false); + assertEquals(2, reader.numDocs()); // nothing has really been deleted thanks to undeleteAll() + reader.close(); + dir.close(); + } + + // LUCENE-1647 + public void testIndexReaderUnDeleteAll() throws Exception { + MockDirectoryWrapper dir = newDirectory(); + dir.setPreventDoubleWrite(false); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.addDocument(createDocument("a")); + writer.addDocument(createDocument("b")); + writer.addDocument(createDocument("c")); + writer.close(); + IndexReader reader = IndexReader.open(dir, false); + reader.deleteDocuments(new Term("id", "a")); + reader.flush(); + reader.deleteDocuments(new Term("id", "b")); + reader.undeleteAll(); + reader.deleteDocuments(new Term("id", "b")); + reader.close(); + IndexReader.open(dir,true).close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java new file mode 100644 index 0000000..d17457f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java @@ -0,0 +1,229 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestIndexReaderOnDiskFull extends LuceneTestCase { + /** + * Make sure if reader tries to commit but hits disk + * full that reader remains consistent and usable. + */ + public void testDiskFull() throws IOException { + + Term searchTerm = new Term("content", "aaa"); + int START_COUNT = 157; + int END_COUNT = 144; + + // First build up a starting index: + MockDirectoryWrapper startDir = newDirectory(); + IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + if (VERBOSE) { + System.out.println("TEST: create initial index"); + writer.setInfoStream(System.out); + } + for(int i=0;i<157;i++) { + Document d = new Document(); + d.add(newField("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); + d.add(newField("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(d); + if (0==i%10) + writer.commit(); + } + writer.close(); + + { + IndexReader r = IndexReader.open(startDir); + IndexSearcher searcher = newSearcher(r); + ScoreDoc[] hits = null; + try { + hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; + } catch (IOException e) { + e.printStackTrace(); + fail("exception when init searching: " + e); + } + searcher.close(); + r.close(); + } + + long diskUsage = startDir.getRecomputedActualSizeInBytes(); + long diskFree = diskUsage+_TestUtil.nextInt(random, 50, 200); + + IOException err = null; + + boolean done = false; + boolean gotExc = false; + + // Iterate w/ ever increasing free disk space: + while(!done) { + MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir)); + + // If IndexReader hits disk full, it can write to + // the same files again. + dir.setPreventDoubleWrite(false); + + IndexReader reader = IndexReader.open(dir, false); + + // For each disk size, first try to commit against + // dir that will hit random IOExceptions & disk + // full; after, give it infinite disk space & turn + // off random IOExceptions & retry w/ same reader: + boolean success = false; + + for(int x=0;x<2;x++) { + + double rate = 0.05; + double diskRatio = ((double) diskFree)/diskUsage; + long thisDiskFree; + String testName; + + if (0 == x) { + thisDiskFree = diskFree; + if (diskRatio >= 2.0) { + rate /= 2; + } + if (diskRatio >= 4.0) { + rate /= 2; + } + if (diskRatio >= 6.0) { + rate = 0.0; + } + if (VERBOSE) { + System.out.println("\ncycle: " + diskFree + " bytes"); + } + testName = "disk full during reader.close() @ " + thisDiskFree + " bytes"; + } else { + thisDiskFree = 0; + rate = 0.0; + if (VERBOSE) { + System.out.println("\ncycle: same writer: unlimited disk space"); + } + testName = "reader re-use after disk full"; + } + + dir.setMaxSizeInBytes(thisDiskFree); + dir.setRandomIOExceptionRate(rate); + Similarity sim = new DefaultSimilarity(); + try { + if (0 == x) { + int docId = 12; + for(int i=0;i<13;i++) { + reader.deleteDocument(docId); + reader.setNorm(docId, "content", sim.encodeNormValue(2.0f)); + docId += 12; + } + } + reader.close(); + success = true; + if (0 == x) { + done = true; + } + } catch (IOException e) { + if (VERBOSE) { + System.out.println(" hit IOException: " + e); + e.printStackTrace(System.out); + } + err = e; + gotExc = true; + if (1 == x) { + e.printStackTrace(); + fail(testName + " hit IOException after disk space was freed up"); + } + } + + // Finally, verify index is not corrupt, and, if + // we succeeded, we see all docs changed, and if + // we failed, we see either all docs or no docs + // changed (transactional semantics): + IndexReader newReader = null; + try { + newReader = IndexReader.open(dir, false); + } catch (IOException e) { + e.printStackTrace(); + fail(testName + ":exception when creating IndexReader after disk full during close: " + e); + } + /* + int result = newReader.docFreq(searchTerm); + if (success) { + if (result != END_COUNT) { + fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT); + } + } else { + // On hitting exception we still may have added + // all docs: + if (result != START_COUNT && result != END_COUNT) { + err.printStackTrace(); + fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT); + } + } + */ + + IndexSearcher searcher = newSearcher(newReader); + ScoreDoc[] hits = null; + try { + hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; + } catch (IOException e) { + e.printStackTrace(); + fail(testName + ": exception when searching: " + e); + } + int result2 = hits.length; + if (success) { + if (result2 != END_COUNT) { + fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT); + } + } else { + // On hitting exception we still may have added + // all docs: + if (result2 != START_COUNT && result2 != END_COUNT) { + err.printStackTrace(); + fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT); + } + } + + searcher.close(); + newReader.close(); + + if (result2 == END_COUNT) { + if (!gotExc) + fail("never hit disk full"); + break; + } + } + + dir.close(); + + // Try again with more bytes of free space: + diskFree += TEST_NIGHTLY ? _TestUtil.nextInt(random, 5, 20) : _TestUtil.nextInt(random, 50, 200); + } + + startDir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderReopen.java new file mode 100644 index 0000000..1cd5142 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexReaderReopen.java @@ -0,0 +1,1273 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BitVector; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestIndexReaderReopen extends LuceneTestCase { + + public void testReopen() throws Exception { + final Directory dir1 = newDirectory(); + + createIndex(random, dir1, false); + performDefaultTests(new TestReopen() { + + @Override + protected void modifyIndex(int i) throws IOException { + TestIndexReaderReopen.modifyIndex(i, dir1); + } + + @Override + protected IndexReader openReader() throws IOException { + return IndexReader.open(dir1, false); + } + + }); + dir1.close(); + + final Directory dir2 = newDirectory(); + + createIndex(random, dir2, true); + performDefaultTests(new TestReopen() { + + @Override + protected void modifyIndex(int i) throws IOException { + TestIndexReaderReopen.modifyIndex(i, dir2); + } + + @Override + protected IndexReader openReader() throws IOException { + return IndexReader.open(dir2, false); + } + + }); + dir2.close(); + } + + public void testParallelReaderReopen() throws Exception { + final Directory dir1 = newDirectory(); + createIndex(random, dir1, true); + final Directory dir2 = newDirectory(); + createIndex(random, dir2, true); + + performDefaultTests(new TestReopen() { + + @Override + protected void modifyIndex(int i) throws IOException { + TestIndexReaderReopen.modifyIndex(i, dir1); + TestIndexReaderReopen.modifyIndex(i, dir2); + } + + @Override + protected IndexReader openReader() throws IOException { + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + return pr; + } + + }); + dir1.close(); + dir2.close(); + + final Directory dir3 = newDirectory(); + createIndex(random, dir3, true); + final Directory dir4 = newDirectory(); + createIndex(random, dir4, true); + + performTestsWithExceptionInReopen(new TestReopen() { + + @Override + protected void modifyIndex(int i) throws IOException { + TestIndexReaderReopen.modifyIndex(i, dir3); + TestIndexReaderReopen.modifyIndex(i, dir4); + } + + @Override + protected IndexReader openReader() throws IOException { + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir3, false)); + pr.add(IndexReader.open(dir4, false)); + // Does not implement reopen, so + // hits exception: + pr.add(new FilterIndexReader(IndexReader.open(dir3, false))); + return pr; + } + + }); + dir3.close(); + dir4.close(); + } + + // LUCENE-1228: IndexWriter.commit() does not update the index version + // populate an index in iterations. + // at the end of every iteration, commit the index and reopen/recreate the reader. + // in each iteration verify the work of previous iteration. + // try this once with reopen once recreate, on both RAMDir and FSDir. + public void testCommitReopen () throws IOException { + Directory dir = newDirectory(); + doTestReopenWithCommit(random, dir, true); + dir.close(); + } + public void testCommitRecreate () throws IOException { + Directory dir = newDirectory(); + doTestReopenWithCommit(random, dir, false); + dir.close(); + } + + private void doTestReopenWithCommit (Random random, Directory dir, boolean withReopen) throws IOException { + IndexWriter iwriter = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode( + OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newLogMergePolicy())); + iwriter.commit(); + IndexReader reader = IndexReader.open(dir, false); + try { + int M = 3; + for (int i=0; i<4; i++) { + for (int j=0; j0) { + int k = i-1; + int n = j + k*M; + Document prevItereationDoc = reader.document(n); + assertNotNull(prevItereationDoc); + String id = prevItereationDoc.get("id"); + assertEquals(k+"_"+j, id); + } + } + iwriter.commit(); + if (withReopen) { + // reopen + IndexReader r2 = reader.reopen(); + if (reader != r2) { + reader.close(); + reader = r2; + } + } else { + // recreate + reader.close(); + reader = IndexReader.open(dir, false); + } + } + } finally { + iwriter.close(); + reader.close(); + } + } + + public void testMultiReaderReopen() throws Exception { + final Directory dir1 = newDirectory(); + createIndex(random, dir1, true); + + final Directory dir2 = newDirectory(); + createIndex(random, dir2, true); + + performDefaultTests(new TestReopen() { + + @Override + protected void modifyIndex(int i) throws IOException { + TestIndexReaderReopen.modifyIndex(i, dir1); + TestIndexReaderReopen.modifyIndex(i, dir2); + } + + @Override + protected IndexReader openReader() throws IOException { + return new MultiReader(new IndexReader[] + {IndexReader.open(dir1, false), + IndexReader.open(dir2, false)}); + } + + }); + + dir1.close(); + dir2.close(); + + final Directory dir3 = newDirectory(); + createIndex(random, dir3, true); + + final Directory dir4 = newDirectory(); + createIndex(random, dir4, true); + + performTestsWithExceptionInReopen(new TestReopen() { + + @Override + protected void modifyIndex(int i) throws IOException { + TestIndexReaderReopen.modifyIndex(i, dir3); + TestIndexReaderReopen.modifyIndex(i, dir4); + } + + @Override + protected IndexReader openReader() throws IOException { + return new MultiReader(new IndexReader[] + {IndexReader.open(dir3, false), + IndexReader.open(dir4, false), + // Does not implement reopen, so + // hits exception: + new FilterIndexReader(IndexReader.open(dir3, false))}); + } + + }); + dir3.close(); + dir4.close(); + } + + public void testMixedReaders() throws Exception { + final Directory dir1 = newDirectory(); + createIndex(random, dir1, true); + final Directory dir2 = newDirectory(); + createIndex(random, dir2, true); + final Directory dir3 = newDirectory(); + createIndex(random, dir3, false); + final Directory dir4 = newDirectory(); + createIndex(random, dir4, true); + final Directory dir5 = newDirectory(); + createIndex(random, dir5, false); + + performDefaultTests(new TestReopen() { + + @Override + protected void modifyIndex(int i) throws IOException { + // only change norms in this index to maintain the same number of docs for each of ParallelReader's subreaders + if (i == 1) TestIndexReaderReopen.modifyIndex(i, dir1); + + TestIndexReaderReopen.modifyIndex(i, dir4); + TestIndexReaderReopen.modifyIndex(i, dir5); + } + + @Override + protected IndexReader openReader() throws IOException { + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + MultiReader mr = new MultiReader(new IndexReader[] { + IndexReader.open(dir3, false), IndexReader.open(dir4, false)}); + return new MultiReader(new IndexReader[] { + pr, mr, IndexReader.open(dir5, false)}); + } + }); + dir1.close(); + dir2.close(); + dir3.close(); + dir4.close(); + dir5.close(); + } + + private void performDefaultTests(TestReopen test) throws Exception { + + IndexReader index1 = test.openReader(); + IndexReader index2 = test.openReader(); + + TestIndexReader.assertIndexEquals(index1, index2); + + // verify that reopen() does not return a new reader instance + // in case the index has no changes + ReaderCouple couple = refreshReader(index2, false); + assertTrue(couple.refreshedReader == index2); + + couple = refreshReader(index2, test, 0, true); + index1.close(); + index1 = couple.newReader; + + IndexReader index2_refreshed = couple.refreshedReader; + index2.close(); + + // test if refreshed reader and newly opened reader return equal results + TestIndexReader.assertIndexEquals(index1, index2_refreshed); + + index2_refreshed.close(); + assertReaderClosed(index2, true, true); + assertReaderClosed(index2_refreshed, true, true); + + index2 = test.openReader(); + + for (int i = 1; i < 4; i++) { + + index1.close(); + couple = refreshReader(index2, test, i, true); + // refresh IndexReader + index2.close(); + + index2 = couple.refreshedReader; + index1 = couple.newReader; + TestIndexReader.assertIndexEquals(index1, index2); + } + + index1.close(); + index2.close(); + assertReaderClosed(index1, true, true); + assertReaderClosed(index2, true, true); + } + + public void testReferenceCounting() throws IOException { + for (int mode = 0; mode < 4; mode++) { + Directory dir1 = newDirectory(); + createIndex(random, dir1, true); + + IndexReader reader0 = IndexReader.open(dir1, false); + assertRefCountEquals(1, reader0); + + assertTrue(reader0 instanceof DirectoryReader); + IndexReader[] subReaders0 = reader0.getSequentialSubReaders(); + for (int i = 0; i < subReaders0.length; i++) { + assertRefCountEquals(1, subReaders0[i]); + } + + // delete first document, so that only one of the subReaders have to be re-opened + IndexReader modifier = IndexReader.open(dir1, false); + modifier.deleteDocument(0); + modifier.close(); + + IndexReader reader1 = refreshReader(reader0, true).refreshedReader; + assertTrue(reader1 instanceof DirectoryReader); + IndexReader[] subReaders1 = reader1.getSequentialSubReaders(); + assertEquals(subReaders0.length, subReaders1.length); + + for (int i = 0; i < subReaders0.length; i++) { + if (subReaders0[i] != subReaders1[i]) { + assertRefCountEquals(1, subReaders0[i]); + assertRefCountEquals(1, subReaders1[i]); + } else { + assertRefCountEquals(2, subReaders0[i]); + } + } + + // delete first document, so that only one of the subReaders have to be re-opened + modifier = IndexReader.open(dir1, false); + modifier.deleteDocument(1); + modifier.close(); + + IndexReader reader2 = refreshReader(reader1, true).refreshedReader; + assertTrue(reader2 instanceof DirectoryReader); + IndexReader[] subReaders2 = reader2.getSequentialSubReaders(); + assertEquals(subReaders1.length, subReaders2.length); + + for (int i = 0; i < subReaders2.length; i++) { + if (subReaders2[i] == subReaders1[i]) { + if (subReaders1[i] == subReaders0[i]) { + assertRefCountEquals(3, subReaders2[i]); + } else { + assertRefCountEquals(2, subReaders2[i]); + } + } else { + assertRefCountEquals(1, subReaders2[i]); + if (subReaders0[i] == subReaders1[i]) { + assertRefCountEquals(2, subReaders2[i]); + assertRefCountEquals(2, subReaders0[i]); + } else { + assertRefCountEquals(1, subReaders0[i]); + assertRefCountEquals(1, subReaders1[i]); + } + } + } + + IndexReader reader3 = refreshReader(reader0, true).refreshedReader; + assertTrue(reader3 instanceof DirectoryReader); + IndexReader[] subReaders3 = reader3.getSequentialSubReaders(); + assertEquals(subReaders3.length, subReaders0.length); + + // try some permutations + switch (mode) { + case 0: + reader0.close(); + reader1.close(); + reader2.close(); + reader3.close(); + break; + case 1: + reader3.close(); + reader2.close(); + reader1.close(); + reader0.close(); + break; + case 2: + reader2.close(); + reader3.close(); + reader0.close(); + reader1.close(); + break; + case 3: + reader1.close(); + reader3.close(); + reader2.close(); + reader0.close(); + break; + } + + assertReaderClosed(reader0, true, true); + assertReaderClosed(reader1, true, true); + assertReaderClosed(reader2, true, true); + assertReaderClosed(reader3, true, true); + + dir1.close(); + } + } + + + public void testReferenceCountingMultiReader() throws IOException { + for (int mode = 0; mode <=1; mode++) { + Directory dir1 = newDirectory(); + createIndex(random, dir1, false); + Directory dir2 = newDirectory(); + createIndex(random, dir2, true); + + IndexReader reader1 = IndexReader.open(dir1, false); + assertRefCountEquals(1, reader1); + + IndexReader initReader2 = IndexReader.open(dir2, false); + IndexReader multiReader1 = new MultiReader(new IndexReader[] {reader1, initReader2}, (mode == 0)); + modifyIndex(0, dir2); + assertRefCountEquals(1 + mode, reader1); + + IndexReader multiReader2 = multiReader1.reopen(); + // index1 hasn't changed, so multiReader2 should share reader1 now with multiReader1 + assertRefCountEquals(2 + mode, reader1); + + modifyIndex(0, dir1); + IndexReader reader2 = reader1.reopen(); + assertRefCountEquals(2 + mode, reader1); + + if (mode == 1) { + initReader2.close(); + } + + modifyIndex(1, dir1); + IndexReader reader3 = reader2.reopen(); + assertRefCountEquals(2 + mode, reader1); + assertRefCountEquals(1, reader2); + + multiReader1.close(); + assertRefCountEquals(1 + mode, reader1); + + multiReader1.close(); + assertRefCountEquals(1 + mode, reader1); + + if (mode == 1) { + initReader2.close(); + } + + reader1.close(); + assertRefCountEquals(1, reader1); + + multiReader2.close(); + assertRefCountEquals(0, reader1); + + multiReader2.close(); + assertRefCountEquals(0, reader1); + + reader3.close(); + assertRefCountEquals(0, reader1); + assertReaderClosed(reader1, true, false); + + reader2.close(); + assertRefCountEquals(0, reader1); + assertReaderClosed(reader1, true, false); + + reader2.close(); + assertRefCountEquals(0, reader1); + + reader3.close(); + assertRefCountEquals(0, reader1); + assertReaderClosed(reader1, true, true); + dir1.close(); + dir2.close(); + } + + } + + public void testReferenceCountingParallelReader() throws IOException { + for (int mode = 0; mode <=1; mode++) { + Directory dir1 = newDirectory(); + createIndex(random, dir1, false); + Directory dir2 = newDirectory(); + createIndex(random, dir2, true); + + IndexReader reader1 = IndexReader.open(dir1, false); + assertRefCountEquals(1, reader1); + + ParallelReader parallelReader1 = new ParallelReader(mode == 0); + parallelReader1.add(reader1); + IndexReader initReader2 = IndexReader.open(dir2, false); + parallelReader1.add(initReader2); + modifyIndex(1, dir2); + assertRefCountEquals(1 + mode, reader1); + + IndexReader parallelReader2 = parallelReader1.reopen(); + // index1 hasn't changed, so parallelReader2 should share reader1 now with multiReader1 + assertRefCountEquals(2 + mode, reader1); + + modifyIndex(0, dir1); + modifyIndex(0, dir2); + IndexReader reader2 = reader1.reopen(); + assertRefCountEquals(2 + mode, reader1); + + if (mode == 1) { + initReader2.close(); + } + + modifyIndex(4, dir1); + IndexReader reader3 = reader2.reopen(); + assertRefCountEquals(2 + mode, reader1); + assertRefCountEquals(1, reader2); + + parallelReader1.close(); + assertRefCountEquals(1 + mode, reader1); + + parallelReader1.close(); + assertRefCountEquals(1 + mode, reader1); + + if (mode == 1) { + initReader2.close(); + } + + reader1.close(); + assertRefCountEquals(1, reader1); + + parallelReader2.close(); + assertRefCountEquals(0, reader1); + + parallelReader2.close(); + assertRefCountEquals(0, reader1); + + reader3.close(); + assertRefCountEquals(0, reader1); + assertReaderClosed(reader1, true, false); + + reader2.close(); + assertRefCountEquals(0, reader1); + assertReaderClosed(reader1, true, false); + + reader2.close(); + assertRefCountEquals(0, reader1); + + reader3.close(); + assertRefCountEquals(0, reader1); + assertReaderClosed(reader1, true, true); + + dir1.close(); + dir2.close(); + } + + } + + public void testNormsRefCounting() throws IOException { + Directory dir1 = newDirectory(); + createIndex(random, dir1, false); + + IndexReader reader1 = IndexReader.open(dir1, false); + SegmentReader segmentReader1 = SegmentReader.getOnlySegmentReader(reader1); + IndexReader modifier = IndexReader.open(dir1, false); + modifier.deleteDocument(0); + modifier.close(); + + IndexReader reader2 = reader1.reopen(); + modifier = IndexReader.open(dir1, false); + modifier.setNorm(1, "field1", 50); + modifier.setNorm(1, "field2", 50); + modifier.close(); + + IndexReader reader3 = reader2.reopen(); + SegmentReader segmentReader3 = SegmentReader.getOnlySegmentReader(reader3); + modifier = IndexReader.open(dir1, false); + modifier.deleteDocument(2); + modifier.close(); + + IndexReader reader4 = reader3.reopen(); + modifier = IndexReader.open(dir1, false); + modifier.deleteDocument(3); + modifier.close(); + + IndexReader reader5 = reader3.reopen(); + + // Now reader2-reader5 references reader1. reader1 and reader2 + // share the same norms. reader3, reader4, reader5 also share norms. + assertRefCountEquals(1, reader1); + assertFalse(segmentReader1.normsClosed()); + + reader1.close(); + + assertRefCountEquals(0, reader1); + assertFalse(segmentReader1.normsClosed()); + + reader2.close(); + assertRefCountEquals(0, reader1); + + // now the norms for field1 and field2 should be closed + assertTrue(segmentReader1.normsClosed("field1")); + assertTrue(segmentReader1.normsClosed("field2")); + + // but the norms for field3 and field4 should still be open + assertFalse(segmentReader1.normsClosed("field3")); + assertFalse(segmentReader1.normsClosed("field4")); + + reader3.close(); + assertRefCountEquals(0, reader1); + assertFalse(segmentReader3.normsClosed()); + reader5.close(); + assertRefCountEquals(0, reader1); + assertFalse(segmentReader3.normsClosed()); + reader4.close(); + assertRefCountEquals(0, reader1); + + // and now all norms that reader1 used should be closed + assertTrue(segmentReader1.normsClosed()); + + // now that reader3, reader4 and reader5 are closed, + // the norms that those three readers shared should be + // closed as well + assertTrue(segmentReader3.normsClosed()); + + dir1.close(); + } + + private void performTestsWithExceptionInReopen(TestReopen test) throws Exception { + IndexReader index1 = test.openReader(); + IndexReader index2 = test.openReader(); + + TestIndexReader.assertIndexEquals(index1, index2); + + try { + refreshReader(index1, test, 0, true); + fail("Expected exception not thrown."); + } catch (Exception e) { + // expected exception + } + + // index2 should still be usable and unaffected by the failed reopen() call + TestIndexReader.assertIndexEquals(index1, index2); + + index1.close(); + index2.close(); + } + + public void testThreadSafety() throws Exception { + final Directory dir = newDirectory(); + // NOTE: this also controls the number of threads! + final int n = _TestUtil.nextInt(random, 20, 40); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + for (int i = 0; i < n; i++) { + writer.addDocument(createDocument(i, 3)); + } + writer.optimize(); + writer.close(); + + final TestReopen test = new TestReopen() { + @Override + protected void modifyIndex(int i) throws IOException { + if (i % 3 == 0) { + IndexReader modifier = IndexReader.open(dir, false); + modifier.setNorm(i, "field1", 50); + modifier.close(); + } else if (i % 3 == 1) { + IndexReader modifier = IndexReader.open(dir, false); + modifier.deleteDocument(i % modifier.maxDoc()); + modifier.close(); + } else { + IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + modifier.addDocument(createDocument(n + i, 6)); + modifier.close(); + } + } + + @Override + protected IndexReader openReader() throws IOException { + return IndexReader.open(dir, false); + } + }; + + final List readers = Collections.synchronizedList(new ArrayList()); + IndexReader firstReader = IndexReader.open(dir, false); + IndexReader reader = firstReader; + final Random rnd = random; + + ReaderThread[] threads = new ReaderThread[n]; + final Set readersToClose = Collections.synchronizedSet(new HashSet()); + + for (int i = 0; i < n; i++) { + if (i % 2 == 0) { + IndexReader refreshed = reader.reopen(); + if (refreshed != reader) { + readersToClose.add(reader); + } + reader = refreshed; + } + final IndexReader r = reader; + + final int index = i; + + ReaderThreadTask task; + + if (i < 4 || (i >=10 && i < 14) || i > 18) { + task = new ReaderThreadTask() { + + @Override + public void run() throws Exception { + while (!stopped) { + if (index % 2 == 0) { + // refresh reader synchronized + ReaderCouple c = (refreshReader(r, test, index, true)); + readersToClose.add(c.newReader); + readersToClose.add(c.refreshedReader); + readers.add(c); + // prevent too many readers + break; + } else { + // not synchronized + IndexReader refreshed = r.reopen(); + + IndexSearcher searcher = newSearcher(refreshed); + ScoreDoc[] hits = searcher.search( + new TermQuery(new Term("field1", "a" + rnd.nextInt(refreshed.maxDoc()))), + null, 1000).scoreDocs; + if (hits.length > 0) { + searcher.doc(hits[0].doc); + } + searcher.close(); + if (refreshed != r) { + refreshed.close(); + } + } + synchronized(this) { + wait(_TestUtil.nextInt(random, 1, 100)); + } + } + } + + }; + } else { + task = new ReaderThreadTask() { + @Override + public void run() throws Exception { + while (!stopped) { + int numReaders = readers.size(); + if (numReaders > 0) { + ReaderCouple c = readers.get(rnd.nextInt(numReaders)); + TestIndexReader.assertIndexEquals(c.newReader, c.refreshedReader); + } + + synchronized(this) { + wait(_TestUtil.nextInt(random, 1, 100)); + } + } + } + }; + } + + threads[i] = new ReaderThread(task); + threads[i].start(); + } + + synchronized(this) { + wait(1000); + } + + for (int i = 0; i < n; i++) { + if (threads[i] != null) { + threads[i].stopThread(); + } + } + + for (int i = 0; i < n; i++) { + if (threads[i] != null) { + threads[i].join(); + if (threads[i].error != null) { + String msg = "Error occurred in thread " + threads[i].getName() + ":\n" + threads[i].error.getMessage(); + fail(msg); + } + } + + } + + for (final IndexReader readerToClose : readersToClose) { + readerToClose.close(); + } + + firstReader.close(); + reader.close(); + + for (final IndexReader readerToClose : readersToClose) { + assertReaderClosed(readerToClose, true, true); + } + + assertReaderClosed(reader, true, true); + assertReaderClosed(firstReader, true, true); + + dir.close(); + } + + private static class ReaderCouple { + ReaderCouple(IndexReader r1, IndexReader r2) { + newReader = r1; + refreshedReader = r2; + } + + IndexReader newReader; + IndexReader refreshedReader; + } + + private abstract static class ReaderThreadTask { + protected volatile boolean stopped; + public void stop() { + this.stopped = true; + } + + public abstract void run() throws Exception; + } + + private static class ReaderThread extends Thread { + private ReaderThreadTask task; + private Throwable error; + + + ReaderThread(ReaderThreadTask task) { + this.task = task; + } + + public void stopThread() { + this.task.stop(); + } + + @Override + public void run() { + try { + this.task.run(); + } catch (Throwable r) { + r.printStackTrace(System.out); + this.error = r; + } + } + } + + private Object createReaderMutex = new Object(); + + private ReaderCouple refreshReader(IndexReader reader, boolean hasChanges) throws IOException { + return refreshReader(reader, null, -1, hasChanges); + } + + ReaderCouple refreshReader(IndexReader reader, TestReopen test, int modify, boolean hasChanges) throws IOException { + synchronized (createReaderMutex) { + IndexReader r = null; + if (test != null) { + test.modifyIndex(modify); + r = test.openReader(); + } + + IndexReader refreshed = null; + try { + refreshed = reader.reopen(); + } finally { + if (refreshed == null && r != null) { + // Hit exception -- close opened reader + r.close(); + } + } + + if (hasChanges) { + if (refreshed == reader) { + fail("No new IndexReader instance created during refresh."); + } + } else { + if (refreshed != reader) { + fail("New IndexReader instance created during refresh even though index had no changes."); + } + } + + return new ReaderCouple(r, refreshed); + } + } + + public static void createIndex(Random random, Directory dir, boolean multiSegment) throws IOException { + IndexWriter.unlock(dir); + IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMergePolicy(new LogDocMergePolicy())); + + for (int i = 0; i < 100; i++) { + w.addDocument(createDocument(i, 4)); + if (multiSegment && (i % 10) == 0) { + w.commit(); + } + } + + if (!multiSegment) { + w.optimize(); + } + + w.close(); + + IndexReader r = IndexReader.open(dir, false); + if (multiSegment) { + assertTrue(r.getSequentialSubReaders().length > 1); + } else { + assertTrue(r.getSequentialSubReaders().length == 1); + } + r.close(); + } + + public static Document createDocument(int n, int numFields) { + StringBuilder sb = new StringBuilder(); + Document doc = new Document(); + sb.append("a"); + sb.append(n); + doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED)); + doc.add(new Field("fielda", sb.toString(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field("fieldb", sb.toString(), Store.YES, Index.NO)); + sb.append(" b"); + sb.append(n); + for (int i = 1; i < numFields; i++) { + doc.add(new Field("field" + (i+1), sb.toString(), Store.YES, Index.ANALYZED)); + } + return doc; + } + + static void modifyIndex(int i, Directory dir) throws IOException { + switch (i) { + case 0: { + if (VERBOSE) { + System.out.println("TEST: modify index"); + } + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + w.setInfoStream(VERBOSE ? System.out : null); + w.deleteDocuments(new Term("field2", "a11")); + w.deleteDocuments(new Term("field2", "b30")); + w.close(); + break; + } + case 1: { + IndexReader reader = IndexReader.open(dir, false); + reader.setNorm(4, "field1", 123); + reader.setNorm(44, "field2", 222); + reader.setNorm(44, "field4", 22); + reader.close(); + break; + } + case 2: { + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + w.optimize(); + w.close(); + break; + } + case 3: { + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + w.addDocument(createDocument(101, 4)); + w.optimize(); + w.addDocument(createDocument(102, 4)); + w.addDocument(createDocument(103, 4)); + w.close(); + break; + } + case 4: { + IndexReader reader = IndexReader.open(dir, false); + reader.setNorm(5, "field1", 123); + reader.setNorm(55, "field2", 222); + reader.close(); + break; + } + case 5: { + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + w.addDocument(createDocument(101, 4)); + w.close(); + break; + } + } + } + + private void assertReaderClosed(IndexReader reader, boolean checkSubReaders, boolean checkNormsClosed) { + assertEquals(0, reader.getRefCount()); + + if (checkNormsClosed && reader instanceof SegmentReader) { + assertTrue(((SegmentReader) reader).normsClosed()); + } + + if (checkSubReaders) { + if (reader instanceof DirectoryReader) { + IndexReader[] subReaders = reader.getSequentialSubReaders(); + for (int i = 0; i < subReaders.length; i++) { + assertReaderClosed(subReaders[i], checkSubReaders, checkNormsClosed); + } + } + + if (reader instanceof MultiReader) { + IndexReader[] subReaders = reader.getSequentialSubReaders(); + for (int i = 0; i < subReaders.length; i++) { + assertReaderClosed(subReaders[i], checkSubReaders, checkNormsClosed); + } + } + + if (reader instanceof ParallelReader) { + IndexReader[] subReaders = ((ParallelReader) reader).getSubReaders(); + for (int i = 0; i < subReaders.length; i++) { + assertReaderClosed(subReaders[i], checkSubReaders, checkNormsClosed); + } + } + } + } + + /* + private void assertReaderOpen(IndexReader reader) { + reader.ensureOpen(); + + if (reader instanceof DirectoryReader) { + IndexReader[] subReaders = reader.getSequentialSubReaders(); + for (int i = 0; i < subReaders.length; i++) { + assertReaderOpen(subReaders[i]); + } + } + } + */ + + private void assertRefCountEquals(int refCount, IndexReader reader) { + assertEquals("Reader has wrong refCount value.", refCount, reader.getRefCount()); + } + + + private abstract static class TestReopen { + protected abstract IndexReader openReader() throws IOException; + protected abstract void modifyIndex(int i) throws IOException; + } + + public void testCloseOrig() throws Throwable { + Directory dir = newDirectory(); + createIndex(random, dir, false); + IndexReader r1 = IndexReader.open(dir, false); + IndexReader r2 = IndexReader.open(dir, false); + r2.deleteDocument(0); + r2.close(); + + IndexReader r3 = r1.reopen(); + assertTrue(r1 != r3); + r1.close(); + try { + r1.document(2); + fail("did not hit exception"); + } catch (AlreadyClosedException ace) { + // expected + } + r3.close(); + dir.close(); + } + + public void testDeletes() throws Throwable { + Directory dir = newDirectory(); + createIndex(random, dir, false); // Create an index with a bunch of docs (1 segment) + + modifyIndex(0, dir); // Get delete bitVector on 1st segment + modifyIndex(5, dir); // Add a doc (2 segments) + + IndexReader r1 = IndexReader.open(dir, false); // MSR + + modifyIndex(5, dir); // Add another doc (3 segments) + + IndexReader r2 = r1.reopen(); // MSR + assertTrue(r1 != r2); + + SegmentReader sr1 = (SegmentReader) r1.getSequentialSubReaders()[0]; // Get SRs for the first segment from original + SegmentReader sr2 = (SegmentReader) r2.getSequentialSubReaders()[0]; // and reopened IRs + + // At this point they share the same BitVector + assertTrue(sr1.deletedDocs==sr2.deletedDocs); + + r2.deleteDocument(0); + + // r1 should not see the delete + assertFalse(r1.isDeleted(0)); + + // Now r2 should have made a private copy of deleted docs: + assertTrue(sr1.deletedDocs!=sr2.deletedDocs); + + r1.close(); + r2.close(); + dir.close(); + } + + public void testDeletes2() throws Throwable { + Directory dir = newDirectory(); + createIndex(random, dir, false); + // Get delete bitVector + modifyIndex(0, dir); + IndexReader r1 = IndexReader.open(dir, false); + + // Add doc: + modifyIndex(5, dir); + + IndexReader r2 = r1.reopen(); + assertTrue(r1 != r2); + + IndexReader[] rs2 = r2.getSequentialSubReaders(); + + SegmentReader sr1 = SegmentReader.getOnlySegmentReader(r1); + SegmentReader sr2 = (SegmentReader) rs2[0]; + + // At this point they share the same BitVector + assertTrue(sr1.deletedDocs==sr2.deletedDocs); + final BitVector delDocs = sr1.deletedDocs; + r1.close(); + + r2.deleteDocument(0); + assertTrue(delDocs==sr2.deletedDocs); + r2.close(); + dir.close(); + } + + private static class KeepAllCommits implements IndexDeletionPolicy { + public void onInit(List commits) { + } + public void onCommit(List commits) { + } + } + + public void testReopenOnCommit() throws Throwable { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setIndexDeletionPolicy(new KeepAllCommits()). + setMaxBufferedDocs(-1). + setMergePolicy(newLogMergePolicy(10)) + ); + for(int i=0;i<4;i++) { + Document doc = new Document(); + doc.add(newField("id", ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + Map data = new HashMap(); + data.put("index", i+""); + writer.commit(data); + } + for(int i=0;i<4;i++) { + writer.deleteDocuments(new Term("id", ""+i)); + Map data = new HashMap(); + data.put("index", (4+i)+""); + writer.commit(data); + } + writer.close(); + + IndexReader r = IndexReader.open(dir, false); + assertEquals(0, r.numDocs()); + + Collection commits = IndexReader.listCommits(dir); + for (final IndexCommit commit : commits) { + IndexReader r2 = r.reopen(commit); + assertTrue(r2 != r); + + // Reader should be readOnly + try { + r2.deleteDocument(0); + fail("no exception hit"); + } catch (UnsupportedOperationException uoe) { + // expected + } + + final Map s = commit.getUserData(); + final int v; + if (s.size() == 0) { + // First commit created by IW + v = -1; + } else { + v = Integer.parseInt(s.get("index")); + } + if (v < 4) { + assertEquals(1+v, r2.numDocs()); + } else { + assertEquals(7-v, r2.numDocs()); + } + r.close(); + r = r2; + } + r.close(); + dir.close(); + } + + // LUCENE-1579: Make sure all SegmentReaders are new when + // reopen switches readOnly + public void testReopenChangeReadonly() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(-1). + setMergePolicy(newLogMergePolicy(10)) + ); + Document doc = new Document(); + doc.add(newField("number", "17", Field.Store.NO, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + writer.commit(); + + // Open reader1 + IndexReader r = IndexReader.open(dir, false); + assertTrue(r instanceof DirectoryReader); + IndexReader r1 = SegmentReader.getOnlySegmentReader(r); + final int[] ints = FieldCache.DEFAULT.getInts(r1, "number"); + assertEquals(1, ints.length); + assertEquals(17, ints[0]); + + // Reopen to readonly w/ no chnages + IndexReader r3 = r.reopen(true); + assertTrue(r3 instanceof ReadOnlyDirectoryReader); + r3.close(); + + // Add new segment + writer.addDocument(doc); + writer.commit(); + + // Reopen reader1 --> reader2 + IndexReader r2 = r.reopen(true); + r.close(); + assertTrue(r2 instanceof ReadOnlyDirectoryReader); + IndexReader[] subs = r2.getSequentialSubReaders(); + final int[] ints2 = FieldCache.DEFAULT.getInts(subs[0], "number"); + r2.close(); + + assertTrue(subs[0] instanceof ReadOnlySegmentReader); + assertTrue(subs[1] instanceof ReadOnlySegmentReader); + assertTrue(ints == ints2); + + writer.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriter.java new file mode 100644 index 0000000..62761f7 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -0,0 +1,1913 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.Reader; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Collections; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.WhitespaceTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.Lock; +import org.apache.lucene.store.LockFactory; +import org.apache.lucene.store.NoLockFactory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.SingleInstanceLockFactory; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.ThreadInterruptedException; + +public class TestIndexWriter extends LuceneTestCase { + + public void testDocCount() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = null; + IndexReader reader = null; + int i; + + long savedWriteLockTimeout = IndexWriterConfig.getDefaultWriteLockTimeout(); + try { + IndexWriterConfig.setDefaultWriteLockTimeout(2000); + assertEquals(2000, IndexWriterConfig.getDefaultWriteLockTimeout()); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + } finally { + IndexWriterConfig.setDefaultWriteLockTimeout(savedWriteLockTimeout); + } + + // add 100 documents + for (i = 0; i < 100; i++) { + addDoc(writer); + } + assertEquals(100, writer.maxDoc()); + writer.close(); + + // delete 40 documents + reader = IndexReader.open(dir, false); + for (i = 0; i < 40; i++) { + reader.deleteDocument(i); + } + reader.close(); + + reader = IndexReader.open(dir, true); + assertEquals(60, reader.numDocs()); + reader.close(); + + // optimize the index and check that the new doc count is correct + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + assertEquals(60, writer.numDocs()); + writer.optimize(); + assertEquals(60, writer.maxDoc()); + assertEquals(60, writer.numDocs()); + writer.close(); + + // check that the index reader gives the same numbers. + reader = IndexReader.open(dir, true); + assertEquals(60, reader.maxDoc()); + assertEquals(60, reader.numDocs()); + reader.close(); + + // make sure opening a new index for create over + // this existing one works correctly: + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + assertEquals(0, writer.maxDoc()); + assertEquals(0, writer.numDocs()); + writer.close(); + dir.close(); + } + + static void addDoc(IndexWriter writer) throws IOException + { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + static void addDocWithIndex(IndexWriter writer, int index) throws IOException + { + Document doc = new Document(); + doc.add(newField("content", "aaa " + index, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("id", "" + index, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { + String[] startFiles = dir.listAll(); + SegmentInfos infos = new SegmentInfos(); + infos.read(dir); + new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))).rollback(); + String[] endFiles = dir.listAll(); + + Arrays.sort(startFiles); + Arrays.sort(endFiles); + + if (!Arrays.equals(startFiles, endFiles)) { + fail(message + ": before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles)); + } + } + + static final class StringSplitAnalyzer extends Analyzer { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new StringSplitTokenizer(reader); + } + } + + private static class StringSplitTokenizer extends Tokenizer { + private final String[] tokens; + private int upto = 0; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + public StringSplitTokenizer(Reader r) { + try { + final StringBuilder b = new StringBuilder(); + final char[] buffer = new char[1024]; + int n; + while((n = r.read(buffer)) != -1) { + b.append(buffer, 0, n); + } + tokens = b.toString().split(" "); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + + @Override + public final boolean incrementToken() throws IOException { + clearAttributes(); + if (upto < tokens.length) { + termAtt.setEmpty(); + termAtt.append(tokens[upto]); + upto++; + return true; + } else { + return false; + } + } + } + + /** + * Make sure we skip wicked long terms. + */ + public void testWickedLongTerm() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); + + char[] chars = new char[DocumentsWriter.CHAR_BLOCK_SIZE-1]; + Arrays.fill(chars, 'x'); + Document doc = new Document(); + final String bigTerm = new String(chars); + + // Max length term is 16383, so this contents produces + // a too-long term: + String contents = "abc xyz x" + bigTerm + " another term"; + doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + + // Make sure we can add another normal document + doc = new Document(); + doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + + // Make sure all terms < max size were indexed + assertEquals(2, reader.docFreq(new Term("content", "abc"))); + assertEquals(1, reader.docFreq(new Term("content", "bbb"))); + assertEquals(1, reader.docFreq(new Term("content", "term"))); + assertEquals(1, reader.docFreq(new Term("content", "another"))); + + // Make sure position is still incremented when + // massive term is skipped: + TermPositions tps = reader.termPositions(new Term("content", "another")); + assertTrue(tps.next()); + assertEquals(1, tps.freq()); + assertEquals(3, tps.nextPosition()); + + // Make sure the doc that has the massive term is in + // the index: + assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs()); + + reader.close(); + + // Make sure we can add a document with exactly the + // maximum length term, and search on that term: + doc = new Document(); + doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED)); + StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT); + sa.setMaxTokenLength(100000); + writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)); + writer.addDocument(doc); + writer.close(); + reader = IndexReader.open(dir, true); + assertEquals(1, reader.docFreq(new Term("content", bigTerm))); + reader.close(); + + dir.close(); + } + + static String arrayToString(String[] l) { + String s = ""; + for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + + // Make sure we can open an index for create even when a + // reader holds it open (this fails pre lock-less + // commits on windows): + public void testCreateWithReader() throws IOException { + Directory dir = newDirectory(); + + // add one document & close writer + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDoc(writer); + writer.close(); + + // now open reader: + IndexReader reader = IndexReader.open(dir, true); + assertEquals("should be one document", reader.numDocs(), 1); + + // now open index for create: + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + assertEquals("should be zero documents", writer.maxDoc(), 0); + addDoc(writer); + writer.close(); + + assertEquals("should be one document", reader.numDocs(), 1); + IndexReader reader2 = IndexReader.open(dir, true); + assertEquals("should be one document", reader2.numDocs(), 1); + reader.close(); + reader2.close(); + + dir.close(); + } + + public void testChangesAfterClose() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = null; + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDoc(writer); + + // close + writer.close(); + try { + addDoc(writer); + fail("did not hit AlreadyClosedException"); + } catch (AlreadyClosedException e) { + // expected + } + dir.close(); + } + + public void testIndexNoDocuments() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.commit(); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(0, reader.maxDoc()); + assertEquals(0, reader.numDocs()); + reader.close(); + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + writer.commit(); + writer.close(); + + reader = IndexReader.open(dir, true); + assertEquals(0, reader.maxDoc()); + assertEquals(0, reader.numDocs()); + reader.close(); + dir.close(); + } + + public void testManyFields() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); + for(int j=0;j<100;j++) { + Document doc = new Document(); + doc.add(newField("a"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("b"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("c"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("d"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("e"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("f"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(100, reader.maxDoc()); + assertEquals(100, reader.numDocs()); + for(int j=0;j<100;j++) { + assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j))); + assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j))); + assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j))); + assertEquals(1, reader.docFreq(new Term("d"+j, "aaa"))); + assertEquals(1, reader.docFreq(new Term("e"+j, "aaa"))); + assertEquals(1, reader.docFreq(new Term("f"+j, "aaa"))); + } + reader.close(); + dir.close(); + } + + public void testSmallRAMBuffer() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setRAMBufferSizeMB(0.000001). + setMergePolicy(newLogMergePolicy(10)) + ); + int lastNumFile = dir.listAll().length; + for(int j=0;j<9;j++) { + Document doc = new Document(); + doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + int numFile = dir.listAll().length; + // Verify that with a tiny RAM buffer we see new + // segment after every doc + assertTrue(numFile > lastNumFile); + lastNumFile = numFile; + } + writer.close(); + dir.close(); + } + + /** + * Make sure it's OK to change RAM buffer size and // maxBufferedDocs in a + * write session + * + * @deprecated after all the setters on IW go away (4.0), this test can be + * removed because changing ram buffer settings during a write + * session won't be possible. + */ + @Deprecated + public void testChangingRAMBuffer() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10).setRAMBufferSizeMB( + IndexWriterConfig.DISABLE_AUTO_FLUSH)); + + int lastFlushCount = -1; + for(int j=1;j<52;j++) { + Document doc = new Document(); + doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + _TestUtil.syncConcurrentMerges(writer); + int flushCount = writer.getFlushCount(); + if (j == 1) + lastFlushCount = flushCount; + else if (j < 10) + // No new files should be created + assertEquals(flushCount, lastFlushCount); + else if (10 == j) { + assertTrue(flushCount > lastFlushCount); + lastFlushCount = flushCount; + writer.setRAMBufferSizeMB(0.000001); + writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + } else if (j < 20) { + assertTrue(flushCount > lastFlushCount); + lastFlushCount = flushCount; + } else if (20 == j) { + writer.setRAMBufferSizeMB(16); + writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + lastFlushCount = flushCount; + } else if (j < 30) { + assertEquals(flushCount, lastFlushCount); + } else if (30 == j) { + writer.setRAMBufferSizeMB(0.000001); + writer.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + } else if (j < 40) { + assertTrue(flushCount> lastFlushCount); + lastFlushCount = flushCount; + } else if (40 == j) { + writer.setMaxBufferedDocs(10); + writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + lastFlushCount = flushCount; + } else if (j < 50) { + assertEquals(flushCount, lastFlushCount); + writer.setMaxBufferedDocs(10); + writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + } else if (50 == j) { + assertTrue(flushCount > lastFlushCount); + } + } + writer.close(); + dir.close(); + } + + /** + * @deprecated after setters on IW go away, this test can be deleted because + * changing those settings on IW won't be possible. + */ + @Deprecated + public void testChangingRAMBuffer2() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(10).setMaxBufferedDeleteTerms( + 10).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)); + + for(int j=1;j<52;j++) { + Document doc = new Document(); + doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + int lastFlushCount = -1; + for(int j=1;j<52;j++) { + writer.deleteDocuments(new Term("field", "aaa" + j)); + _TestUtil.syncConcurrentMerges(writer); + int flushCount = writer.getFlushCount(); + if (j == 1) + lastFlushCount = flushCount; + else if (j < 10) { + // No new files should be created + assertEquals(flushCount, lastFlushCount); + } else if (10 == j) { + assertTrue(flushCount > lastFlushCount); + lastFlushCount = flushCount; + writer.setRAMBufferSizeMB(0.000001); + writer.setMaxBufferedDeleteTerms(1); + } else if (j < 20) { + assertTrue(flushCount > lastFlushCount); + lastFlushCount = flushCount; + } else if (20 == j) { + writer.setRAMBufferSizeMB(16); + writer.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + lastFlushCount = flushCount; + } else if (j < 30) { + assertEquals(flushCount, lastFlushCount); + } else if (30 == j) { + writer.setRAMBufferSizeMB(0.000001); + writer.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + writer.setMaxBufferedDeleteTerms(1); + } else if (j < 40) { + assertTrue(flushCount> lastFlushCount); + lastFlushCount = flushCount; + } else if (40 == j) { + writer.setMaxBufferedDeleteTerms(10); + writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + lastFlushCount = flushCount; + } else if (j < 50) { + assertEquals(flushCount, lastFlushCount); + writer.setMaxBufferedDeleteTerms(10); + writer.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + } else if (50 == j) { + assertTrue(flushCount > lastFlushCount); + } + } + writer.close(); + dir.close(); + } + + // Make sure it's OK to change RAM buffer size and + // maxBufferedDocs in a write session, using IW.getConfig() + public void testChangingRAMBufferWithIWC() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.getConfig().setMaxBufferedDocs(10); + writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + + int lastFlushCount = -1; + for(int j=1;j<52;j++) { + Document doc = new Document(); + doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + _TestUtil.syncConcurrentMerges(writer); + int flushCount = writer.getFlushCount(); + if (j == 1) + lastFlushCount = flushCount; + else if (j < 10) + // No new files should be created + assertEquals(flushCount, lastFlushCount); + else if (10 == j) { + assertTrue(flushCount > lastFlushCount); + lastFlushCount = flushCount; + writer.getConfig().setRAMBufferSizeMB(0.000001); + writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + } else if (j < 20) { + assertTrue(flushCount > lastFlushCount); + lastFlushCount = flushCount; + } else if (20 == j) { + writer.getConfig().setRAMBufferSizeMB(16); + writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + lastFlushCount = flushCount; + } else if (j < 30) { + assertEquals(flushCount, lastFlushCount); + } else if (30 == j) { + writer.getConfig().setRAMBufferSizeMB(0.000001); + writer.getConfig().setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + } else if (j < 40) { + assertTrue(flushCount> lastFlushCount); + lastFlushCount = flushCount; + } else if (40 == j) { + writer.getConfig().setMaxBufferedDocs(10); + writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + lastFlushCount = flushCount; + } else if (j < 50) { + assertEquals(flushCount, lastFlushCount); + writer.getConfig().setMaxBufferedDocs(10); + writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + } else if (50 == j) { + assertTrue(flushCount > lastFlushCount); + } + } + writer.close(); + dir.close(); + } + + public void testChangingRAMBuffer2WithIWC() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.getConfig().setMaxBufferedDocs(10); + writer.getConfig().setMaxBufferedDeleteTerms(10); + writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + + for(int j=1;j<52;j++) { + Document doc = new Document(); + doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + int lastFlushCount = -1; + for(int j=1;j<52;j++) { + writer.deleteDocuments(new Term("field", "aaa" + j)); + _TestUtil.syncConcurrentMerges(writer); + int flushCount = writer.getFlushCount(); + if (j == 1) + lastFlushCount = flushCount; + else if (j < 10) { + // No new files should be created + assertEquals(flushCount, lastFlushCount); + } else if (10 == j) { + assertTrue(flushCount > lastFlushCount); + lastFlushCount = flushCount; + writer.getConfig().setRAMBufferSizeMB(0.000001); + writer.getConfig().setMaxBufferedDeleteTerms(1); + } else if (j < 20) { + assertTrue(flushCount > lastFlushCount); + lastFlushCount = flushCount; + } else if (20 == j) { + writer.getConfig().setRAMBufferSizeMB(16); + writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + lastFlushCount = flushCount; + } else if (j < 30) { + assertEquals(flushCount, lastFlushCount); + } else if (30 == j) { + writer.getConfig().setRAMBufferSizeMB(0.000001); + writer.getConfig().setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); + writer.getConfig().setMaxBufferedDeleteTerms(1); + } else if (j < 40) { + assertTrue(flushCount> lastFlushCount); + lastFlushCount = flushCount; + } else if (40 == j) { + writer.getConfig().setMaxBufferedDeleteTerms(10); + writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + lastFlushCount = flushCount; + } else if (j < 50) { + assertEquals(flushCount, lastFlushCount); + writer.getConfig().setMaxBufferedDeleteTerms(10); + writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + } else if (50 == j) { + assertTrue(flushCount > lastFlushCount); + } + } + writer.close(); + dir.close(); + } + + public void testDiverseDocs() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5)); + for(int i=0;i<3;i++) { + // First, docs where every term is unique (heavy on + // Posting instances) + for(int j=0;j<100;j++) { + Document doc = new Document(); + for(int k=0;k<100;k++) { + doc.add(newField("field", Integer.toString(random.nextInt()), Field.Store.YES, Field.Index.ANALYZED)); + } + writer.addDocument(doc); + } + + // Next, many single term docs where only one term + // occurs (heavy on byte blocks) + for(int j=0;j<100;j++) { + Document doc = new Document(); + doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + // Next, many single term docs where only one term + // occurs but the terms are very long (heavy on + // char[] arrays) + for(int j=0;j<100;j++) { + StringBuilder b = new StringBuilder(); + String x = Integer.toString(j) + "."; + for(int k=0;k<1000;k++) + b.append(x); + String longTerm = b.toString(); + + Document doc = new Document(); + doc.add(newField("field", longTerm, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + } + writer.close(); + + IndexSearcher searcher = new IndexSearcher(dir, false); + ScoreDoc[] hits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1000).scoreDocs; + assertEquals(300, hits.length); + searcher.close(); + + dir.close(); + } + + public void testEnablingNorms() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); + // Enable norms for only 1 doc, pre flush + for(int j=0;j<10;j++) { + Document doc = new Document(); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + if (j != 8) { + f.setOmitNorms(true); + } + doc.add(f); + writer.addDocument(doc); + } + writer.close(); + + Term searchTerm = new Term("field", "aaa"); + + IndexSearcher searcher = new IndexSearcher(dir, false); + ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; + assertEquals(10, hits.length); + searcher.close(); + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); + // Enable norms for only 1 doc, post flush + for(int j=0;j<27;j++) { + Document doc = new Document(); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + if (j != 26) { + f.setOmitNorms(true); + } + doc.add(f); + writer.addDocument(doc); + } + writer.close(); + searcher = new IndexSearcher(dir, false); + hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; + assertEquals(27, hits.length); + searcher.close(); + + IndexReader reader = IndexReader.open(dir, true); + reader.close(); + + dir.close(); + } + + public void testHighFreqTerm() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01)); + // Massive doc that has 128 K a's + StringBuilder b = new StringBuilder(1024*1024); + for(int i=0;i<4096;i++) { + b.append(" a a a a a a a a"); + b.append(" a a a a a a a a"); + b.append(" a a a a a a a a"); + b.append(" a a a a a a a a"); + } + Document doc = new Document(); + doc.add(newField("field", b.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(1, reader.maxDoc()); + assertEquals(1, reader.numDocs()); + Term t = new Term("field", "a"); + assertEquals(1, reader.docFreq(t)); + TermDocs td = reader.termDocs(t); + td.next(); + assertEquals(128*1024, td.freq()); + reader.close(); + dir.close(); + } + + // Make sure that a Directory implementation that does + // not use LockFactory at all (ie overrides makeLock and + // implements its own private locking) works OK. This + // was raised on java-dev as loss of backwards + // compatibility. + public void testNullLockFactory() throws IOException { + + final class MyRAMDirectory extends MockDirectoryWrapper { + private LockFactory myLockFactory; + MyRAMDirectory(Directory delegate) { + super(random, delegate); + lockFactory = null; + myLockFactory = new SingleInstanceLockFactory(); + } + @Override + public Lock makeLock(String name) { + return myLockFactory.makeLock(name); + } + } + + Directory dir = new MyRAMDirectory(new RAMDirectory()); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + for (int i = 0; i < 100; i++) { + addDoc(writer); + } + writer.close(); + Term searchTerm = new Term("content", "aaa"); + IndexSearcher searcher = new IndexSearcher(dir, false); + ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; + assertEquals("did not get right number of hits", 100, hits.length); + searcher.close(); + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setOpenMode(OpenMode.CREATE)); + writer.close(); + searcher.close(); + dir.close(); + } + + public void testFlushWithNoMerging() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setMergePolicy(newLogMergePolicy(10)) + ); + Document doc = new Document(); + doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + for(int i=0;i<19;i++) + writer.addDocument(doc); + writer.flush(false, true); + writer.close(); + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + // Since we flushed w/o allowing merging we should now + // have 10 segments + assertEquals(10, sis.size()); + dir.close(); + } + + // Make sure we can flush segment w/ norms, then add + // empty doc (no norms) and flush + public void testEmptyDocAfterFlushingRealDoc() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.setInfoStream(VERBOSE ? System.out : null); + Document doc = new Document(); + doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.commit(); + if (VERBOSE) { + System.out.println("\nTEST: now add empty doc"); + } + writer.addDocument(new Document()); + writer.close(); + IndexReader reader = IndexReader.open(dir, true); + assertEquals(2, reader.numDocs()); + reader.close(); + dir.close(); + } + + /** + * Test that no NullPointerException will be raised, + * when adding one document with a single, empty field + * and term vectors enabled. + * @throws IOException + * + */ + public void testBadSegment() throws IOException { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + Document document = new Document(); + document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES)); + iw.addDocument(document); + iw.close(); + dir.close(); + } + + // LUCENE-1036 + public void testMaxThreadPriority() throws IOException { + int pri = Thread.currentThread().getPriority(); + try { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()); + ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); + IndexWriter iw = new IndexWriter(dir, conf); + Document document = new Document(); + document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, + Field.TermVector.YES)); + Thread.currentThread().setPriority(Thread.MAX_PRIORITY); + for(int i=0;i<4;i++) + iw.addDocument(document); + iw.close(); + dir.close(); + } finally { + Thread.currentThread().setPriority(pri); + } + } + + // Just intercepts all merges & verifies that we are never + // merging a segment with >= 20 (maxMergeDocs) docs + private class MyMergeScheduler extends MergeScheduler { + @Override + synchronized public void merge(IndexWriter writer) + throws CorruptIndexException, IOException { + + while(true) { + MergePolicy.OneMerge merge = writer.getNextMerge(); + if (merge == null) { + break; + } + for(int i=0;i failure = new ArrayList(); + Thread t1 = new Thread() { + @Override + public void run() { + boolean done = false; + while(!done) { + for(int i=0;i<100;i++) { + try { + finalWriter.addDocument(doc); + } catch (AlreadyClosedException e) { + done = true; + break; + } catch (NullPointerException e) { + done = true; + break; + } catch (Throwable e) { + e.printStackTrace(System.out); + failure.add(e); + done = true; + break; + } + } + Thread.yield(); + } + + } + }; + + if (failure.size() > 0) { + throw failure.get(0); + } + + t1.start(); + + writer.close(false); + t1.join(); + + // Make sure reader can read + IndexReader reader = IndexReader.open(directory, true); + reader.close(); + + // Reopen + writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); + writer.setInfoStream(VERBOSE ? System.out : null); + } + writer.close(); + } + + directory.close(); + } + + // LUCENE-1084: test unlimited field length + public void testUnlimitedMaxFieldLength() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + Document doc = new Document(); + StringBuilder b = new StringBuilder(); + for(int i=0;i<10000;i++) + b.append(" a"); + b.append(" x"); + doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + Term t = new Term("field", "x"); + assertEquals(1, reader.docFreq(t)); + reader.close(); + dir.close(); + } + + // LUCENE-1084: test user-specified field length + public void testUserSpecifiedMaxFieldLength() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + writer.setMaxFieldLength(100000); + + Document doc = new Document(); + StringBuilder b = new StringBuilder(); + for(int i=0;i<10000;i++) + b.append(" a"); + b.append(" x"); + doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + Term t = new Term("field", "x"); + assertEquals(1, reader.docFreq(t)); + reader.close(); + dir.close(); + } + + // LUCENE-1179 + public void testEmptyFieldName() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + dir.close(); + } + + + + private static final class MockIndexWriter extends IndexWriter { + + public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException { + super(dir, conf); + } + + boolean afterWasCalled; + boolean beforeWasCalled; + + @Override + public void doAfterFlush() { + afterWasCalled = true; + } + + @Override + protected void doBeforeFlush() throws IOException { + beforeWasCalled = true; + } + } + + + // LUCENE-1222 + public void testDoBeforeAfterFlush() throws IOException { + Directory dir = newDirectory(); + MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(newField("field", "a field", Field.Store.YES, + Field.Index.ANALYZED)); + w.addDocument(doc); + w.commit(); + assertTrue(w.beforeWasCalled); + assertTrue(w.afterWasCalled); + w.beforeWasCalled = false; + w.afterWasCalled = false; + w.deleteDocuments(new Term("field", "field")); + w.commit(); + assertTrue(w.beforeWasCalled); + assertTrue(w.afterWasCalled); + w.close(); + + IndexReader ir = IndexReader.open(dir, true); + assertEquals(0, ir.numDocs()); + ir.close(); + + dir.close(); + } + + // LUCENE-1255 + public void testNegativePositions() throws Throwable { + final TokenStream tokens = new TokenStream() { + final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + + final Iterator terms = Arrays.asList("a","b","c").iterator(); + boolean first = true; + + @Override + public boolean incrementToken() { + if (!terms.hasNext()) return false; + clearAttributes(); + termAtt.append(terms.next()); + posIncrAtt.setPositionIncrement(first ? 0 : 1); + first = false; + return true; + } + }; + + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(new Field("field", tokens)); + w.addDocument(doc); + w.commit(); + + IndexSearcher s = new IndexSearcher(dir, false); + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term("field", "a")); + pq.add(new Term("field", "b")); + pq.add(new Term("field", "c")); + ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + Query q = new SpanTermQuery(new Term("field", "a")); + hits = s.search(q, null, 1000).scoreDocs; + assertEquals(1, hits.length); + TermPositions tps = s.getIndexReader().termPositions(new Term("field", "a")); + assertTrue(tps.next()); + assertEquals(1, tps.freq()); + assertEquals(0, tps.nextPosition()); + w.close(); + + s.close(); + dir.close(); + } + + // LUCENE-1219 + public void testBinaryFieldOffsetLength() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + byte[] b = new byte[50]; + for(int i=0;i<50;i++) + b[i] = (byte) (i+77); + + Document doc = new Document(); + Field f = new Field("binary", b, 10, 17); + byte[] bx = f.getBinaryValue(); + assertTrue(bx != null); + assertEquals(50, bx.length); + assertEquals(10, f.getBinaryOffset()); + assertEquals(17, f.getBinaryLength()); + doc.add(f); + w.addDocument(doc); + w.close(); + + IndexReader ir = IndexReader.open(dir, true); + doc = ir.document(0); + f = doc.getField("binary"); + b = f.getBinaryValue(); + assertTrue(b != null); + assertEquals(17, b.length, 17); + assertEquals(87, b[0]); + ir.close(); + dir.close(); + } + + // LUCENE-2529 + public void testPositionIncrementGapEmptyField() throws Exception { + Directory dir = newDirectory(); + Analyzer analyzer = new Analyzer(){ + Analyzer a = new WhitespaceAnalyzer( TEST_VERSION_CURRENT ); + @Override + public TokenStream tokenStream(String fieldName, Reader reader){ + return a.tokenStream(fieldName, reader); + } + @Override + public int getPositionIncrementGap(String fieldName) { + return 100; + } + }; + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + Field f = newField("field", "", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); + Field f2 = newField("field", "crunch man", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); + doc.add(f); + doc.add(f2); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); + int[] poss = tpv.getTermPositions(0); + assertEquals(1, poss.length); + assertEquals(100, poss[0]); + poss = tpv.getTermPositions(1); + assertEquals(1, poss.length); + assertEquals(101, poss[0]); + r.close(); + dir.close(); + } + + + // LUCENE-1468 -- make sure opening an IndexWriter with + // create=true does not remove non-index files + + public void testOtherFiles() throws Throwable { + Directory dir = newDirectory(); + try { + // Create my own random file: + IndexOutput out = dir.createOutput("myrandomfile"); + out.writeByte((byte) 42); + out.close(); + + new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); + + assertTrue(dir.fileExists("myrandomfile")); + + // Make sure this does not copy myrandomfile: + Directory dir2 = new MockDirectoryWrapper(random, new RAMDirectory(dir)); + assertTrue(!dir2.fileExists("myrandomfile")); + dir2.close(); + } finally { + dir.close(); + } + } + + public void testDeadlock() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + Document doc = new Document(); + doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.addDocument(doc); + writer.addDocument(doc); + writer.commit(); + // index has 2 segments + + Directory dir2 = newDirectory(); + IndexWriter writer2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer2.addDocument(doc); + writer2.close(); + + IndexReader r1 = IndexReader.open(dir2, true); + IndexReader r2 = (IndexReader) r1.clone(); + writer.addIndexes(new IndexReader[] {r1, r2}); + writer.close(); + + IndexReader r3 = IndexReader.open(dir, true); + assertEquals(5, r3.numDocs()); + r3.close(); + + r1.close(); + r2.close(); + + dir2.close(); + dir.close(); + } + + private class IndexerThreadInterrupt extends Thread { + volatile boolean failed; + volatile boolean finish; + + volatile boolean allowInterrupt = false; + + @Override + public void run() { + // LUCENE-2239: won't work with NIOFS/MMAP + Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); + IndexWriter w = null; + while(!finish) { + try { + + while(!finish) { + if (w != null) { + w.close(); + w = null; + } + IndexWriterConfig conf = newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2); + w = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(newField("field", "some text contents", Field.Store.YES, Field.Index.ANALYZED)); + for(int i=0;i<100;i++) { + w.addDocument(doc); + if (i%10 == 0) { + w.commit(); + } + } + w.close(); + w = null; + _TestUtil.checkIndex(dir); + IndexReader.open(dir, true).close(); + + // Strangely, if we interrupt a thread before + // all classes are loaded, the class loader + // seems to do scary things with the interrupt + // status. In java 1.5, it'll throw an + // incorrect ClassNotFoundException. In java + // 1.6, it'll silently clear the interrupt. + // So, on first iteration through here we + // don't open ourselves up for interrupts + // until we've done the above loop. + allowInterrupt = true; + } + } catch (ThreadInterruptedException re) { + if (VERBOSE) { + System.out.println("TEST: got interrupt"); + re.printStackTrace(System.out); + } + Throwable e = re.getCause(); + assertTrue(e instanceof InterruptedException); + if (finish) { + break; + } + } catch (Throwable t) { + System.out.println("FAILED; unexpected exception"); + t.printStackTrace(System.out); + failed = true; + break; + } + } + + if (!failed) { + // clear interrupt state: + Thread.interrupted(); + if (w != null) { + try { + w.rollback(); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + + try { + _TestUtil.checkIndex(dir); + } catch (Exception e) { + failed = true; + System.out.println("CheckIndex FAILED: unexpected exception"); + e.printStackTrace(System.out); + } + try { + IndexReader r = IndexReader.open(dir, true); + //System.out.println("doc count=" + r.numDocs()); + r.close(); + } catch (Exception e) { + failed = true; + System.out.println("IndexReader.open FAILED: unexpected exception"); + e.printStackTrace(System.out); + } + } + try { + dir.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + public void testThreadInterruptDeadlock() throws Exception { + IndexerThreadInterrupt t = new IndexerThreadInterrupt(); + t.setDaemon(true); + t.start(); + + // Force class loader to load ThreadInterruptedException + // up front... else we can see a false failure if 2nd + // interrupt arrives while class loader is trying to + // init this class (in servicing a first interrupt): + assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException); + + // issue 100 interrupts to child thread + int i = 0; + while(i < 100) { + Thread.sleep(10); + if (t.allowInterrupt) { + i++; + t.interrupt(); + } + if (!t.isAlive()) { + break; + } + } + t.finish = true; + t.join(); + assertFalse(t.failed); + } + + + public void testIndexStoreCombos() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + byte[] b = new byte[50]; + for(int i=0;i<50;i++) + b[i] = (byte) (i+77); + + Document doc = new Document(); + Field f = new Field("binary", b, 10, 17); + f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field1"))); + Field f2 = newField("string", "value", Field.Store.YES,Field.Index.ANALYZED); + f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field2"))); + doc.add(f); + doc.add(f2); + w.addDocument(doc); + + // add 2 docs to test in-memory merging + f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field1"))); + f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field2"))); + w.addDocument(doc); + + // force segment flush so we can force a segment merge with doc3 later. + w.commit(); + + f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field1"))); + f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field2"))); + + w.addDocument(doc); + w.commit(); + w.optimize(); // force segment merge. + w.close(); + + IndexReader ir = IndexReader.open(dir, true); + doc = ir.document(0); + f = doc.getField("binary"); + b = f.getBinaryValue(); + assertTrue(b != null); + assertEquals(17, b.length, 17); + assertEquals(87, b[0]); + + assertTrue(ir.document(0).getFieldable("binary").isBinary()); + assertTrue(ir.document(1).getFieldable("binary").isBinary()); + assertTrue(ir.document(2).getFieldable("binary").isBinary()); + + assertEquals("value", ir.document(0).get("string")); + assertEquals("value", ir.document(1).get("string")); + assertEquals("value", ir.document(2).get("string")); + + + // test that the terms were indexed. + assertTrue(ir.termDocs(new Term("binary","doc1field1")).next()); + assertTrue(ir.termDocs(new Term("binary","doc2field1")).next()); + assertTrue(ir.termDocs(new Term("binary","doc3field1")).next()); + assertTrue(ir.termDocs(new Term("string","doc1field2")).next()); + assertTrue(ir.termDocs(new Term("string","doc2field2")).next()); + assertTrue(ir.termDocs(new Term("string","doc3field2")).next()); + + ir.close(); + dir.close(); + + } + + // LUCENE-1727: make sure doc fields are stored in order + public void testStoredFieldsOrder() throws Throwable { + Directory d = newDirectory(); + IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(newField("zzz", "a b c", Field.Store.YES, Field.Index.NO)); + doc.add(newField("aaa", "a b c", Field.Store.YES, Field.Index.NO)); + doc.add(newField("zzz", "1 2 3", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + doc = r.document(0); + Iterator it = doc.getFields().iterator(); + assertTrue(it.hasNext()); + Field f = (Field) it.next(); + assertEquals(f.name(), "zzz"); + assertEquals(f.stringValue(), "a b c"); + + assertTrue(it.hasNext()); + f = (Field) it.next(); + assertEquals(f.name(), "aaa"); + assertEquals(f.stringValue(), "a b c"); + + assertTrue(it.hasNext()); + f = (Field) it.next(); + assertEquals(f.name(), "zzz"); + assertEquals(f.stringValue(), "1 2 3"); + assertFalse(it.hasNext()); + r.close(); + w.close(); + d.close(); + } + + public void testNoDocsIndex() throws Throwable { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + writer.setInfoStream(new PrintStream(bos)); + writer.addDocument(new Document()); + writer.close(); + + dir.close(); + } + + public void testDeleteUnusedFiles() throws Exception { + + for(int iter=0;iter<2;iter++) { + Directory dir = newDirectory(); + + LogMergePolicy mergePolicy = newLogMergePolicy(true); + mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS + + IndexWriter w = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(mergePolicy) + ); + Document doc = new Document(); + doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED)); + w.addDocument(doc); + IndexReader r; + if (iter == 0) { + // use NRT + r = w.getReader(); + } else { + // don't use NRT + w.commit(); + r = IndexReader.open(dir); + } + + List files = Arrays.asList(dir.listAll()); + assertTrue(files.contains("_0.cfs")); + w.addDocument(doc); + w.optimize(); + if (iter == 1) { + w.commit(); + } + IndexReader r2 = r.reopen(); + assertTrue(r != r2); + files = Arrays.asList(dir.listAll()); + + // NOTE: here we rely on "Windows" behavior, ie, even + // though IW wanted to delete _0.cfs since it was + // optimized away, because we have a reader open + // against this file, it should still be here: + assertTrue(files.contains("_0.cfs")); + // optimize created this + //assertTrue(files.contains("_2.cfs")); + w.deleteUnusedFiles(); + + files = Arrays.asList(dir.listAll()); + // r still holds this file open + assertTrue(files.contains("_0.cfs")); + //assertTrue(files.contains("_2.cfs")); + + r.close(); + if (iter == 0) { + // on closing NRT reader, it calls writer.deleteUnusedFiles + files = Arrays.asList(dir.listAll()); + assertFalse(files.contains("_0.cfs")); + } else { + // now writer can remove it + w.deleteUnusedFiles(); + files = Arrays.asList(dir.listAll()); + assertFalse(files.contains("_0.cfs")); + } + //assertTrue(files.contains("_2.cfs")); + + w.close(); + r2.close(); + + dir.close(); + } + } + + public void testDeleteUnsedFiles2() throws Exception { + // Validates that iw.deleteUnusedFiles() also deletes unused index commits + // in case a deletion policy which holds onto commits is used. + Directory dir = newDirectory(); + SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setIndexDeletionPolicy(sdp)); + + // First commit + Document doc = new Document(); + doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.commit(); + assertEquals(1, IndexReader.listCommits(dir).size()); + + // Keep that commit + sdp.snapshot("id"); + + // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. + doc = new Document(); + doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.commit(); + assertEquals(2, IndexReader.listCommits(dir).size()); + + // Should delete the unreferenced commit + sdp.release("id"); + writer.deleteUnusedFiles(); + assertEquals(1, IndexReader.listCommits(dir).size()); + + writer.close(); + dir.close(); + } + + private static class FlushCountingIndexWriter extends IndexWriter { + int flushCount; + public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException { + super(dir, iwc); + } + @Override + public void doAfterFlush() { + flushCount++; + } + } + + public void testEmptyFSDirWithNoLock() throws Exception { + // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF), + // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed + // when listAll() was called in IndexFileDeleter. + Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory()); + new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); + dir.close(); + } + + public void testEmptyDirRollback() throws Exception { + // Tests that if IW is created over an empty Directory, some documents are + // indexed, flushed (but not committed) and then IW rolls back, then no + // files are left in the Directory. + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); + String[] files = dir.listAll(); + + writer.setInfoStream(VERBOSE ? System.out : null); + + // Creating over empty dir should not create any files, + // or, at most the write.lock file + final int extraFileCount; + if (files.length == 1) { + assertTrue(files[0].endsWith("write.lock")); + extraFileCount = 1; + } else { + assertEquals(0, files.length); + extraFileCount = 0; + } + + Document doc = new Document(); + // create as many files as possible + doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + // Adding just one document does not call flush yet. + assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length); + + doc = new Document(); + doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + + // The second document should cause a flush. + assertTrue("flush should have occurred and files should have been created", dir.listAll().length > 5 + extraFileCount); + + // After rollback, IW should remove all files + writer.rollback(); + assertEquals("no files should exist in the directory after rollback", 0, dir.listAll().length); + + // Since we rolled-back above, that close should be a no-op + writer.close(); + assertEquals("expected a no-op close after IW.rollback()", 0, dir.listAll().length); + dir.close(); + } + + public void testNoSegmentFile() throws IOException { + Directory dir = newDirectory(); + dir.setLockFactory(NoLockFactory.getNoLockFactory()); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + + Document doc = new Document(); + doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + w.addDocument(doc); + w.addDocument(doc); + IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2) + .setOpenMode(OpenMode.CREATE)); + + w2.close(); + w.rollback(); + dir.close(); + } + + public void testRandomStoredFields() throws IOException { + Directory dir = newDirectory(); + Random rand = random; + RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20))); + //w.w.setInfoStream(System.out); + //w.w.setUseCompoundFile(false); + if (VERBOSE) { + w.w.setInfoStream(System.out); + } + final int docCount = atLeast(200); + final int fieldCount = _TestUtil.nextInt(rand, 1, 5); + + final List fieldIDs = new ArrayList(); + + Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + + for(int i=0;i docs = new HashMap(); + + if (VERBOSE) { + System.out.println("TEST: build index docCount=" + docCount); + } + + for(int i=0;i Number every so often + Collections.shuffle(fieldIDs); + } + if (rand.nextInt(5) == 3 && i > 0) { + final String delID = ""+rand.nextInt(i); + if (VERBOSE) { + System.out.println("TEST: delete doc " + delID); + } + w.deleteDocuments(new Term("id", delID)); + docs.remove(delID); + } + } + + if (VERBOSE) { + System.out.println("TEST: " + docs.size() + " docs in index; now load fields"); + } + if (docs.size() > 0) { + String[] idsList = docs.keySet().toArray(new String[docs.size()]); + + for(int x=0;x<2;x++) { + IndexReader r = w.getReader(); + IndexSearcher s = newSearcher(r); + + if (VERBOSE) { + System.out.println("TEST: cycle x=" + x + " r=" + r); + } + + int num = atLeast(1000); + for(int iter=0;iter commitData = new HashMap(); + commitData.put("tag", "first"); + w.commit(commitData); + + // commit to "second" + w.addDocument(doc); + commitData.put("tag", "second"); + w.commit(commitData); + w.close(); + + // open "first" with IndexWriter + IndexCommit commit = null; + for(IndexCommit c : IndexReader.listCommits(dir)) { + if (c.getUserData().get("tag").equals("first")) { + commit = c; + break; + } + } + + assertNotNull(commit); + + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).setIndexCommit(commit)); + + assertEquals(1, w.numDocs()); + + // commit IndexWriter to "third" + w.addDocument(doc); + commitData.put("tag", "third"); + w.commit(commitData); + w.close(); + + // make sure "second" commit is still there + commit = null; + for(IndexCommit c : IndexReader.listCommits(dir)) { + if (c.getUserData().get("tag").equals("second")) { + commit = c; + break; + } + } + + assertNotNull(commit); + + IndexReader r = IndexReader.open(commit, true); + assertEquals(2, r.numDocs()); + r.close(); + + // open "second", w/ writeable IndexReader & commit + r = IndexReader.open(commit, NoDeletionPolicy.INSTANCE, false); + assertEquals(2, r.numDocs()); + r.deleteDocument(0); + r.deleteDocument(1); + commitData.put("tag", "fourth"); + r.commit(commitData); + r.close(); + + // make sure "third" commit is still there + commit = null; + for(IndexCommit c : IndexReader.listCommits(dir)) { + if (c.getUserData().get("tag").equals("third")) { + commit = c; + break; + } + } + assertNotNull(commit); + + dir.close(); + } + + public void testNoCommits() throws Exception { + // Tests that if we don't call commit(), the directory has 0 commits. This has + // changed since LUCENE-2386, where before IW would always commit on a fresh + // new index. + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + try { + IndexReader.listCommits(dir); + fail("listCommits should have thrown an exception over empty index"); + } catch (IndexNotFoundException e) { + // that's expected ! + } + // No changes still should generate a commit, because it's a new index. + writer.close(); + assertEquals("expected 1 commits!", 1, IndexReader.listCommits(dir).size()); + dir.close(); + } + + // LUCENE-1274: test writer.prepareCommit() + public void testPrepareCommit() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setMergePolicy(newLogMergePolicy(5)) + ); + writer.commit(); + + for (int i = 0; i < 23; i++) + TestIndexWriter.addDoc(writer); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(0, reader.numDocs()); + + writer.prepareCommit(); + + IndexReader reader2 = IndexReader.open(dir, true); + assertEquals(0, reader2.numDocs()); + + writer.commit(); + + IndexReader reader3 = reader.reopen(); + assertEquals(0, reader.numDocs()); + assertEquals(0, reader2.numDocs()); + assertEquals(23, reader3.numDocs()); + reader.close(); + reader2.close(); + + for (int i = 0; i < 17; i++) + TestIndexWriter.addDoc(writer); + + assertEquals(23, reader3.numDocs()); + reader3.close(); + reader = IndexReader.open(dir, true); + assertEquals(23, reader.numDocs()); + reader.close(); + + writer.prepareCommit(); + + reader = IndexReader.open(dir, true); + assertEquals(23, reader.numDocs()); + reader.close(); + + writer.commit(); + reader = IndexReader.open(dir, true); + assertEquals(40, reader.numDocs()); + reader.close(); + writer.close(); + dir.close(); + } + + // LUCENE-1274: test writer.prepareCommit() + public void testPrepareCommitRollback() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + dir.setPreventDoubleWrite(false); + + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setMergePolicy(newLogMergePolicy(5)) + ); + writer.commit(); + + for (int i = 0; i < 23; i++) + TestIndexWriter.addDoc(writer); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(0, reader.numDocs()); + + writer.prepareCommit(); + + IndexReader reader2 = IndexReader.open(dir, true); + assertEquals(0, reader2.numDocs()); + + writer.rollback(); + + IndexReader reader3 = reader.reopen(); + assertEquals(0, reader.numDocs()); + assertEquals(0, reader2.numDocs()); + assertEquals(0, reader3.numDocs()); + reader.close(); + reader2.close(); + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + for (int i = 0; i < 17; i++) + TestIndexWriter.addDoc(writer); + + assertEquals(0, reader3.numDocs()); + reader3.close(); + reader = IndexReader.open(dir, true); + assertEquals(0, reader.numDocs()); + reader.close(); + + writer.prepareCommit(); + + reader = IndexReader.open(dir, true); + assertEquals(0, reader.numDocs()); + reader.close(); + + writer.commit(); + reader = IndexReader.open(dir, true); + assertEquals(17, reader.numDocs()); + reader.close(); + writer.close(); + dir.close(); + } + + // LUCENE-1274 + public void testPrepareCommitNoChanges() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.prepareCommit(); + writer.commit(); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(0, reader.numDocs()); + reader.close(); + dir.close(); + } + + // LUCENE-1382 + public void testCommitUserData() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + for(int j=0;j<17;j++) + TestIndexWriter.addDoc(w); + w.close(); + + assertEquals(0, IndexReader.getCommitUserData(dir).size()); + + IndexReader r = IndexReader.open(dir, true); + // commit(Map) never called for this index + assertEquals(0, r.getCommitUserData().size()); + r.close(); + + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + for(int j=0;j<17;j++) + TestIndexWriter.addDoc(w); + Map data = new HashMap(); + data.put("label", "test1"); + w.commit(data); + w.close(); + + assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); + + r = IndexReader.open(dir, true); + assertEquals("test1", r.getCommitUserData().get("label")); + r.close(); + + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + w.optimize(); + w.close(); + + assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); + + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterConfig.java new file mode 100644 index 0000000..3412b3d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterConfig.java @@ -0,0 +1,309 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.index.DocumentsWriter.IndexingChain; +import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +public class TestIndexWriterConfig extends LuceneTestCase { + + private static final class MySimilarity extends DefaultSimilarity { + // Does not implement anything - used only for type checking on IndexWriterConfig. + } + + private static final class MyIndexingChain extends IndexingChain { + // Does not implement anything - used only for type checking on IndexWriterConfig. + + @Override + DocConsumer getChain(DocumentsWriter documentsWriter) { + return null; + } + + } + + private static final class MyWarmer extends IndexReaderWarmer { + // Does not implement anything - used only for type checking on IndexWriterConfig. + + @Override + public void warm(IndexReader reader) throws IOException { + } + + } + + @Test + public void testDefaults() throws Exception { + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + assertEquals(MockAnalyzer.class, conf.getAnalyzer().getClass()); + assertNull(conf.getIndexCommit()); + assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); + assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); + assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode()); + assertTrue(Similarity.getDefault() == conf.getSimilarity()); + assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval()); + assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout()); + assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout()); + assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.getMaxBufferedDeleteTerms()); + assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.getRAMBufferSizeMB(), 0.0); + assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.getMaxBufferedDocs()); + assertEquals(IndexWriterConfig.DEFAULT_READER_POOLING, conf.getReaderPooling()); + assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + assertNull(conf.getMergedSegmentWarmer()); + assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); + assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor()); + assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); + + // Sanity check - validate that all getters are covered. + Set getters = new HashSet(); + getters.add("getAnalyzer"); + getters.add("getIndexCommit"); + getters.add("getIndexDeletionPolicy"); + getters.add("getMergeScheduler"); + getters.add("getOpenMode"); + getters.add("getSimilarity"); + getters.add("getTermIndexInterval"); + getters.add("getWriteLockTimeout"); + getters.add("getDefaultWriteLockTimeout"); + getters.add("getMaxBufferedDeleteTerms"); + getters.add("getRAMBufferSizeMB"); + getters.add("getMaxBufferedDocs"); + getters.add("getIndexingChain"); + getters.add("getMergedSegmentWarmer"); + getters.add("getMergePolicy"); + getters.add("getMaxThreadStates"); + getters.add("getReaderPooling"); + getters.add("getReaderTermsIndexDivisor"); + for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { + if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("get")) { + assertTrue("method " + m.getName() + " is not tested for defaults", getters.contains(m.getName())); + } + } + } + + @Test + public void testSettersChaining() throws Exception { + // Ensures that every setter returns IndexWriterConfig to enable easy + // chaining. + for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { + if (m.getDeclaringClass() == IndexWriterConfig.class + && m.getName().startsWith("set") + && !Modifier.isStatic(m.getModifiers())) { + assertEquals("method " + m.getName() + " does not return IndexWriterConfig", + IndexWriterConfig.class, m.getReturnType()); + } + } + } + + @Test + public void testConstants() throws Exception { + // Tests that the values of the constants does not change + assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT); + assertEquals(128, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL); + assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH); + assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS); + assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); + assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0); + assertEquals(false, IndexWriterConfig.DEFAULT_READER_POOLING); + assertEquals(8, IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); + assertEquals(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR); + } + + @Test + public void testToString() throws Exception { + String str = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).toString(); + for (Field f : IndexWriterConfig.class.getDeclaredFields()) { + int modifiers = f.getModifiers(); + if (Modifier.isStatic(modifiers) && Modifier.isFinal(modifiers)) { + // Skip static final fields, they are only constants + continue; + } else if ("indexingChain".equals(f.getName())) { + // indexingChain is a package-private setting and thus is not output by + // toString. + continue; + } + assertTrue(f.getName() + " not found in toString", str.indexOf(f.getName()) != -1); + } + } + + @Test + public void testClone() throws Exception { + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + IndexWriterConfig clone = (IndexWriterConfig) conf.clone(); + + // Clone is shallow since not all parameters are cloneable. + assertTrue(conf.getIndexDeletionPolicy() == clone.getIndexDeletionPolicy()); + + conf.setMergeScheduler(new SerialMergeScheduler()); + assertEquals(ConcurrentMergeScheduler.class, clone.getMergeScheduler().getClass()); + } + + @Test + public void testInvalidValues() throws Exception { + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + + // Test IndexDeletionPolicy + assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); + conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); + assertEquals(SnapshotDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); + conf.setIndexDeletionPolicy(null); + assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); + + // Test MergeScheduler + assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); + conf.setMergeScheduler(new SerialMergeScheduler()); + assertEquals(SerialMergeScheduler.class, conf.getMergeScheduler().getClass()); + conf.setMergeScheduler(null); + assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); + + // Test Similarity + assertTrue(Similarity.getDefault() == conf.getSimilarity()); + conf.setSimilarity(new MySimilarity()); + assertEquals(MySimilarity.class, conf.getSimilarity().getClass()); + conf.setSimilarity(null); + assertTrue(Similarity.getDefault() == conf.getSimilarity()); + + // Test IndexingChain + assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + conf.setIndexingChain(new MyIndexingChain()); + assertEquals(MyIndexingChain.class, conf.getIndexingChain().getClass()); + conf.setIndexingChain(null); + assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + + try { + conf.setMaxBufferedDeleteTerms(0); + fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); + } catch (IllegalArgumentException e) { + // this is expected + } + + try { + conf.setMaxBufferedDocs(1); + fail("should not have succeeded to set maxBufferedDocs to 1"); + } catch (IllegalArgumentException e) { + // this is expected + } + + try { + // Disable both MAX_BUF_DOCS and RAM_SIZE_MB + conf.setMaxBufferedDocs(4); + conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is disabled as well"); + } catch (IllegalArgumentException e) { + // this is expected + } + + conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); + conf.setMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); + try { + conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is disabled as well"); + } catch (IllegalArgumentException e) { + // this is expected + } + + // Test setReaderTermsIndexDivisor + try { + conf.setReaderTermsIndexDivisor(0); + fail("should not have succeeded to set termsIndexDivisor to 0"); + } catch (IllegalArgumentException e) { + // this is expected + } + + // Setting to -1 is ok + conf.setReaderTermsIndexDivisor(-1); + try { + conf.setReaderTermsIndexDivisor(-2); + fail("should not have succeeded to set termsIndexDivisor to < -1"); + } catch (IllegalArgumentException e) { + // this is expected + } + + assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); + conf.setMaxThreadStates(5); + assertEquals(5, conf.getMaxThreadStates()); + conf.setMaxThreadStates(0); + assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); + + // Test MergePolicy + assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); + conf.setMergePolicy(new LogDocMergePolicy()); + assertEquals(LogDocMergePolicy.class, conf.getMergePolicy().getClass()); + conf.setMergePolicy(null); + assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass()); + } + + /** + * @deprecated should be removed once all the deprecated setters are removed + * from IndexWriter. + */ + @Test @Deprecated + public void testIndexWriterSetters() throws Exception { + // This test intentionally tests deprecated methods. The purpose is to pass + // whatever the user set on IW to IWC, so that if the user calls + // iw.getConfig().getXYZ(), he'll get the same value he passed to + // iw.setXYZ(). + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, conf); + + writer.setSimilarity(new MySimilarity()); + assertEquals(MySimilarity.class, writer.getConfig().getSimilarity().getClass()); + + writer.setMaxBufferedDeleteTerms(4); + assertEquals(4, writer.getConfig().getMaxBufferedDeleteTerms()); + + writer.setMaxBufferedDocs(10); + assertEquals(10, writer.getConfig().getMaxBufferedDocs()); + + writer.setMergeScheduler(new SerialMergeScheduler()); + assertEquals(SerialMergeScheduler.class, writer.getConfig().getMergeScheduler().getClass()); + + writer.setRAMBufferSizeMB(1.5); + assertEquals(1.5, writer.getConfig().getRAMBufferSizeMB(), 0.0); + + writer.setTermIndexInterval(40); + assertEquals(40, writer.getConfig().getTermIndexInterval()); + + writer.setWriteLockTimeout(100); + assertEquals(100, writer.getConfig().getWriteLockTimeout()); + + writer.setMergedSegmentWarmer(new MyWarmer()); + assertEquals(MyWarmer.class, writer.getConfig().getMergedSegmentWarmer().getClass()); + + writer.setMergePolicy(new LogDocMergePolicy()); + assertEquals(LogDocMergePolicy.class, writer.getConfig().getMergePolicy().getClass()); + writer.close(); + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterDelete.java new file mode 100644 index 0000000..5c92ae4 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterDelete.java @@ -0,0 +1,937 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestIndexWriterDelete extends LuceneTestCase { + + // test the simple case + public void testSimpleCase() throws IOException { + String[] keywords = { "1", "2" }; + String[] unindexed = { "Netherlands", "Italy" }; + String[] unstored = { "Amsterdam has lots of bridges", + "Venice has lots of canals" }; + String[] text = { "Amsterdam", "Venice" }; + + Directory dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1)); + + for (int i = 0; i < keywords.length; i++) { + Document doc = new Document(); + doc.add(newField("id", keywords[i], Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(newField("country", unindexed[i], Field.Store.YES, + Field.Index.NO)); + doc.add(newField("contents", unstored[i], Field.Store.NO, + Field.Index.ANALYZED)); + doc + .add(newField("city", text[i], Field.Store.YES, + Field.Index.ANALYZED)); + modifier.addDocument(doc); + } + modifier.optimize(); + modifier.commit(); + + Term term = new Term("city", "Amsterdam"); + int hitCount = getHitCount(dir, term); + assertEquals(1, hitCount); + modifier.deleteDocuments(term); + modifier.commit(); + hitCount = getHitCount(dir, term); + assertEquals(0, hitCount); + + modifier.close(); + dir.close(); + } + + // test when delete terms only apply to disk segments + public void testNonRAMDelete() throws IOException { + + Directory dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + .setMaxBufferedDeleteTerms(2)); + modifier.setInfoStream(VERBOSE ? System.out : null); + int id = 0; + int value = 100; + + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.commit(); + + assertEquals(0, modifier.getNumBufferedDocuments()); + assertTrue(0 < modifier.getSegmentCount()); + + modifier.commit(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(7, reader.numDocs()); + reader.close(); + + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + + modifier.commit(); + + reader = IndexReader.open(dir, true); + assertEquals(0, reader.numDocs()); + reader.close(); + modifier.close(); + dir.close(); + } + + public void testMaxBufferedDeletes() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1)); + + writer.setInfoStream(VERBOSE ? System.out : null); + writer.addDocument(new Document()); + writer.deleteDocuments(new Term("foobar", "1")); + writer.deleteDocuments(new Term("foobar", "1")); + writer.deleteDocuments(new Term("foobar", "1")); + assertEquals(3, writer.getFlushDeletesCount()); + writer.close(); + dir.close(); + } + + // test when delete terms only apply to ram segments + public void testRAMDeletes() throws IOException { + for(int t=0;t<2;t++) { + if (VERBOSE) { + System.out.println("TEST: t=" + t); + } + Directory dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(4) + .setMaxBufferedDeleteTerms(4)); + modifier.setInfoStream(VERBOSE ? System.out : null); + int id = 0; + int value = 100; + + addDoc(modifier, ++id, value); + if (0 == t) + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + else + modifier.deleteDocuments(new TermQuery(new Term("value", String.valueOf(value)))); + addDoc(modifier, ++id, value); + if (0 == t) { + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + assertEquals(2, modifier.getNumBufferedDeleteTerms()); + assertEquals(1, modifier.getBufferedDeleteTermsSize()); + } + else + modifier.deleteDocuments(new TermQuery(new Term("value", String.valueOf(value)))); + + addDoc(modifier, ++id, value); + assertEquals(0, modifier.getSegmentCount()); + modifier.commit(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(1, reader.numDocs()); + + int hitCount = getHitCount(dir, new Term("id", String.valueOf(id))); + assertEquals(1, hitCount); + reader.close(); + modifier.close(); + dir.close(); + } + } + + // test when delete terms apply to both disk and ram segments + public void testBothDeletes() throws IOException { + Directory dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(100) + .setMaxBufferedDeleteTerms(100)); + + int id = 0; + int value = 100; + + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } + + value = 200; + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } + modifier.commit(); + + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + + modifier.commit(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(5, reader.numDocs()); + modifier.close(); + reader.close(); + dir.close(); + } + + // test that batched delete terms are flushed together + public void testBatchDeletes() throws IOException { + Directory dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + .setMaxBufferedDeleteTerms(2)); + + int id = 0; + int value = 100; + + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.commit(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(7, reader.numDocs()); + reader.close(); + + id = 0; + modifier.deleteDocuments(new Term("id", String.valueOf(++id))); + modifier.deleteDocuments(new Term("id", String.valueOf(++id))); + + modifier.commit(); + + reader = IndexReader.open(dir, true); + assertEquals(5, reader.numDocs()); + reader.close(); + + Term[] terms = new Term[3]; + for (int i = 0; i < terms.length; i++) { + terms[i] = new Term("id", String.valueOf(++id)); + } + modifier.deleteDocuments(terms); + modifier.commit(); + reader = IndexReader.open(dir, true); + assertEquals(2, reader.numDocs()); + reader.close(); + + modifier.close(); + dir.close(); + } + + // test deleteAll() + public void testDeleteAll() throws IOException { + Directory dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + .setMaxBufferedDeleteTerms(2)); + + int id = 0; + int value = 100; + + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.commit(); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(7, reader.numDocs()); + reader.close(); + + // Add 1 doc (so we will have something buffered) + addDoc(modifier, 99, value); + + // Delete all + modifier.deleteAll(); + + // Delete all shouldn't be on disk yet + reader = IndexReader.open(dir, true); + assertEquals(7, reader.numDocs()); + reader.close(); + + // Add a doc and update a doc (after the deleteAll, before the commit) + addDoc(modifier, 101, value); + updateDoc(modifier, 102, value); + + // commit the delete all + modifier.commit(); + + // Validate there are no docs left + reader = IndexReader.open(dir, true); + assertEquals(2, reader.numDocs()); + reader.close(); + + modifier.close(); + dir.close(); + } + + // test rollback of deleteAll() + public void testDeleteAllRollback() throws IOException { + Directory dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + .setMaxBufferedDeleteTerms(2)); + + int id = 0; + int value = 100; + + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.commit(); + + addDoc(modifier, ++id, value); + + IndexReader reader = IndexReader.open(dir, true); + assertEquals(7, reader.numDocs()); + reader.close(); + + // Delete all + modifier.deleteAll(); + + // Roll it back + modifier.rollback(); + modifier.close(); + + // Validate that the docs are still there + reader = IndexReader.open(dir, true); + assertEquals(7, reader.numDocs()); + reader.close(); + + dir.close(); + } + + + // test deleteAll() w/ near real-time reader + public void testDeleteAllNRT() throws IOException { + Directory dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) + .setMaxBufferedDeleteTerms(2)); + + int id = 0; + int value = 100; + + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.commit(); + + IndexReader reader = modifier.getReader(); + assertEquals(7, reader.numDocs()); + reader.close(); + + addDoc(modifier, ++id, value); + addDoc(modifier, ++id, value); + + // Delete all + modifier.deleteAll(); + + reader = modifier.getReader(); + assertEquals(0, reader.numDocs()); + reader.close(); + + + // Roll it back + modifier.rollback(); + modifier.close(); + + // Validate that the docs are still there + reader = IndexReader.open(dir, true); + assertEquals(7, reader.numDocs()); + reader.close(); + + dir.close(); + } + + + private void updateDoc(IndexWriter modifier, int id, int value) + throws IOException { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("id", String.valueOf(id), Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(newField("value", String.valueOf(value), Field.Store.NO, + Field.Index.NOT_ANALYZED)); + modifier.updateDocument(new Term("id", String.valueOf(id)), doc); + } + + + private void addDoc(IndexWriter modifier, int id, int value) + throws IOException { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("id", String.valueOf(id), Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(newField("value", String.valueOf(value), Field.Store.NO, + Field.Index.NOT_ANALYZED)); + modifier.addDocument(doc); + } + + private int getHitCount(Directory dir, Term term) throws IOException { + IndexSearcher searcher = new IndexSearcher(dir, true); + int hitCount = searcher.search(new TermQuery(term), null, 1000).totalHits; + searcher.close(); + return hitCount; + } + + public void testDeletesOnDiskFull() throws IOException { + doTestOperationsOnDiskFull(false); + } + + public void testUpdatesOnDiskFull() throws IOException { + doTestOperationsOnDiskFull(true); + } + + /** + * Make sure if modifier tries to commit but hits disk full that modifier + * remains consistent and usable. Similar to TestIndexReader.testDiskFull(). + */ + private void doTestOperationsOnDiskFull(boolean updates) throws IOException { + + Term searchTerm = new Term("content", "aaa"); + int START_COUNT = 157; + int END_COUNT = 144; + + // First build up a starting index: + MockDirectoryWrapper startDir = newDirectory(); + // TODO: find the resource leak that only occurs sometimes here. + startDir.setNoDeleteOpenFile(false); + IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); + for (int i = 0; i < 157; i++) { + Document d = new Document(); + d.add(newField("id", Integer.toString(i), Field.Store.YES, + Field.Index.NOT_ANALYZED)); + d.add(newField("content", "aaa " + i, Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(d); + } + writer.close(); + + long diskUsage = startDir.sizeInBytes(); + long diskFree = diskUsage + 10; + + IOException err = null; + + boolean done = false; + + // Iterate w/ ever increasing free disk space: + while (!done) { + if (VERBOSE) { + System.out.println("TEST: cycle"); + } + MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir)); + dir.setPreventDoubleWrite(false); + IndexWriter modifier = new IndexWriter(dir, + newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) + .setMaxBufferedDocs(1000) + .setMaxBufferedDeleteTerms(1000) + .setMergeScheduler(new ConcurrentMergeScheduler())); + ((ConcurrentMergeScheduler) modifier.getConfig().getMergeScheduler()).setSuppressExceptions(); + modifier.setInfoStream(VERBOSE ? System.out : null); + + // For each disk size, first try to commit against + // dir that will hit random IOExceptions & disk + // full; after, give it infinite disk space & turn + // off random IOExceptions & retry w/ same reader: + boolean success = false; + + for (int x = 0; x < 2; x++) { + if (VERBOSE) { + System.out.println("TEST: x=" + x); + } + + double rate = 0.1; + double diskRatio = ((double)diskFree) / diskUsage; + long thisDiskFree; + String testName; + + if (0 == x) { + thisDiskFree = diskFree; + if (diskRatio >= 2.0) { + rate /= 2; + } + if (diskRatio >= 4.0) { + rate /= 2; + } + if (diskRatio >= 6.0) { + rate = 0.0; + } + if (VERBOSE) { + System.out.println("\ncycle: " + diskFree + " bytes"); + } + testName = "disk full during reader.close() @ " + thisDiskFree + + " bytes"; + } else { + thisDiskFree = 0; + rate = 0.0; + if (VERBOSE) { + System.out.println("\ncycle: same writer: unlimited disk space"); + } + testName = "reader re-use after disk full"; + } + + dir.setMaxSizeInBytes(thisDiskFree); + dir.setRandomIOExceptionRate(rate); + + try { + if (0 == x) { + int docId = 12; + for (int i = 0; i < 13; i++) { + if (updates) { + Document d = new Document(); + d.add(newField("id", Integer.toString(i), Field.Store.YES, + Field.Index.NOT_ANALYZED)); + d.add(newField("content", "bbb " + i, Field.Store.NO, + Field.Index.ANALYZED)); + modifier.updateDocument(new Term("id", Integer.toString(docId)), d); + } else { // deletes + modifier.deleteDocuments(new Term("id", Integer.toString(docId))); + // modifier.setNorm(docId, "contents", (float)2.0); + } + docId += 12; + } + } + modifier.close(); + success = true; + if (0 == x) { + done = true; + } + } + catch (IOException e) { + if (VERBOSE) { + System.out.println(" hit IOException: " + e); + e.printStackTrace(System.out); + } + err = e; + if (1 == x) { + e.printStackTrace(); + fail(testName + " hit IOException after disk space was freed up"); + } + } + + if (!success) { + // Must force the close else the writer can have + // open files which cause exc in MockRAMDir.close + modifier.rollback(); + } + + // If the close() succeeded, make sure there are + // no unreferenced files. + if (success) { + _TestUtil.checkIndex(dir); + TestIndexWriter.assertNoUnreferencedFiles(dir, "after writer.close"); + } + + // Finally, verify index is not corrupt, and, if + // we succeeded, we see all docs changed, and if + // we failed, we see either all docs or no docs + // changed (transactional semantics): + IndexReader newReader = null; + try { + newReader = IndexReader.open(dir, true); + } + catch (IOException e) { + e.printStackTrace(); + fail(testName + + ":exception when creating IndexReader after disk full during close: " + + e); + } + + IndexSearcher searcher = newSearcher(newReader); + ScoreDoc[] hits = null; + try { + hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; + } + catch (IOException e) { + e.printStackTrace(); + fail(testName + ": exception when searching: " + e); + } + int result2 = hits.length; + if (success) { + if (x == 0 && result2 != END_COUNT) { + fail(testName + + ": method did not throw exception but hits.length for search on term 'aaa' is " + + result2 + " instead of expected " + END_COUNT); + } else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT) { + // It's possible that the first exception was + // "recoverable" wrt pending deletes, in which + // case the pending deletes are retained and + // then re-flushing (with plenty of disk + // space) will succeed in flushing the + // deletes: + fail(testName + + ": method did not throw exception but hits.length for search on term 'aaa' is " + + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); + } + } else { + // On hitting exception we still may have added + // all docs: + if (result2 != START_COUNT && result2 != END_COUNT) { + err.printStackTrace(); + fail(testName + + ": method did throw exception but hits.length for search on term 'aaa' is " + + result2 + " instead of expected " + START_COUNT + " or " + END_COUNT); + } + } + + searcher.close(); + newReader.close(); + } + + modifier.close(); + dir.close(); + + // Try again with 10 more bytes of free space: + diskFree += 10; + } + startDir.close(); + } + + // This test tests that buffered deletes are cleared when + // an Exception is hit during flush. + public void testErrorAfterApplyDeletes() throws IOException { + + MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() { + boolean sawMaybe = false; + boolean failed = false; + Thread thread; + @Override + public MockDirectoryWrapper.Failure reset() { + thread = Thread.currentThread(); + sawMaybe = false; + failed = false; + return this; + } + @Override + public void eval(MockDirectoryWrapper dir) throws IOException { + if (Thread.currentThread() != thread) { + // don't fail during merging + return; + } + if (sawMaybe && !failed) { + boolean seen = false; + StackTraceElement[] trace = new Exception().getStackTrace(); + for (int i = 0; i < trace.length; i++) { + if ("applyDeletes".equals(trace[i].getMethodName())) { + seen = true; + break; + } + } + if (!seen) { + // Only fail once we are no longer in applyDeletes + failed = true; + if (VERBOSE) { + System.out.println("TEST: mock failure: now fail"); + new Throwable().printStackTrace(System.out); + } + throw new IOException("fail after applyDeletes"); + } + } + if (!failed) { + StackTraceElement[] trace = new Exception().getStackTrace(); + for (int i = 0; i < trace.length; i++) { + if ("applyDeletes".equals(trace[i].getMethodName())) { + if (VERBOSE) { + System.out.println("TEST: mock failure: saw applyDeletes"); + new Throwable().printStackTrace(System.out); + } + sawMaybe = true; + break; + } + } + } + } + }; + + // create a couple of files + + String[] keywords = { "1", "2" }; + String[] unindexed = { "Netherlands", "Italy" }; + String[] unstored = { "Amsterdam has lots of bridges", + "Venice has lots of canals" }; + String[] text = { "Amsterdam", "Venice" }; + + MockDirectoryWrapper dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(2).setReaderPooling(false).setMergePolicy(newLogMergePolicy())); + modifier.setInfoStream(VERBOSE ? System.out : null); + + LogMergePolicy lmp = (LogMergePolicy) modifier.getConfig().getMergePolicy(); + lmp.setUseCompoundFile(true); + + dir.failOn(failure.reset()); + + for (int i = 0; i < keywords.length; i++) { + Document doc = new Document(); + doc.add(newField("id", keywords[i], Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(newField("country", unindexed[i], Field.Store.YES, + Field.Index.NO)); + doc.add(newField("contents", unstored[i], Field.Store.NO, + Field.Index.ANALYZED)); + doc.add(newField("city", text[i], Field.Store.YES, + Field.Index.ANALYZED)); + modifier.addDocument(doc); + } + // flush (and commit if ac) + + if (VERBOSE) { + System.out.println("TEST: now optimize"); + } + + modifier.optimize(); + if (VERBOSE) { + System.out.println("TEST: now commit"); + } + modifier.commit(); + + // one of the two files hits + + Term term = new Term("city", "Amsterdam"); + int hitCount = getHitCount(dir, term); + assertEquals(1, hitCount); + + // open the writer again (closed above) + + // delete the doc + // max buf del terms is two, so this is buffered + + if (VERBOSE) { + System.out.println("TEST: delete term=" + term); + } + + modifier.deleteDocuments(term); + + // add a doc (needed for the !ac case; see below) + // doc remains buffered + + if (VERBOSE) { + System.out.println("TEST: add empty doc"); + } + Document doc = new Document(); + modifier.addDocument(doc); + + // commit the changes, the buffered deletes, and the new doc + + // The failure object will fail on the first write after the del + // file gets created when processing the buffered delete + + // in the ac case, this will be when writing the new segments + // files so we really don't need the new doc, but it's harmless + + // a new segments file won't be created but in this + // case, creation of the cfs file happens next so we + // need the doc (to test that it's okay that we don't + // lose deletes if failing while creating the cfs file) + boolean failed = false; + try { + if (VERBOSE) { + System.out.println("TEST: now commit for failure"); + } + modifier.commit(); + } catch (IOException ioe) { + // expected + failed = true; + } + + assertTrue(failed); + + // The commit above failed, so we need to retry it (which will + // succeed, because the failure is a one-shot) + + modifier.commit(); + + hitCount = getHitCount(dir, term); + + // Make sure the delete was successfully flushed: + assertEquals(0, hitCount); + + modifier.close(); + dir.close(); + } + + // This test tests that the files created by the docs writer before + // a segment is written are cleaned up if there's an i/o error + + public void testErrorInDocsWriterAdd() throws IOException { + + MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() { + boolean failed = false; + @Override + public MockDirectoryWrapper.Failure reset() { + failed = false; + return this; + } + @Override + public void eval(MockDirectoryWrapper dir) throws IOException { + if (!failed) { + failed = true; + throw new IOException("fail in add doc"); + } + } + }; + + // create a couple of files + + String[] keywords = { "1", "2" }; + String[] unindexed = { "Netherlands", "Italy" }; + String[] unstored = { "Amsterdam has lots of bridges", + "Venice has lots of canals" }; + String[] text = { "Amsterdam", "Venice" }; + + MockDirectoryWrapper dir = newDirectory(); + IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); + modifier.commit(); + dir.failOn(failure.reset()); + + for (int i = 0; i < keywords.length; i++) { + Document doc = new Document(); + doc.add(newField("id", keywords[i], Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(newField("country", unindexed[i], Field.Store.YES, + Field.Index.NO)); + doc.add(newField("contents", unstored[i], Field.Store.NO, + Field.Index.ANALYZED)); + doc.add(newField("city", text[i], Field.Store.YES, + Field.Index.ANALYZED)); + try { + modifier.addDocument(doc); + } catch (IOException io) { + if (VERBOSE) { + System.out.println("TEST: got expected exc:"); + io.printStackTrace(System.out); + } + break; + } + } + + modifier.close(); + TestIndexWriter.assertNoUnreferencedFiles(dir, "docswriter abort() failed to delete unreferenced files"); + dir.close(); + } + + private String arrayToString(String[] l) { + String s = ""; + for (int i = 0; i < l.length; i++) { + if (i > 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + + public void testDeleteAllSlowly() throws Exception { + final Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, dir); + final int NUM_DOCS = atLeast(1000); + final List ids = new ArrayList(NUM_DOCS); + for(int id=0;id docs = new ArrayList(); + final int count = _TestUtil.nextInt(r, 1, 20); + for(int c=0;c doFail = new ThreadLocal(); + + private class MockIndexWriter extends IndexWriter { + Random r = new Random(random.nextLong()); + + public MockIndexWriter(Directory dir, IndexWriterConfig conf) throws IOException { + super(dir, conf); + } + + @Override + boolean testPoint(String name) { + if (doFail.get() != null && !name.equals("startDoFlush") && r.nextInt(40) == 17) { + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": NOW FAIL: " + name); + new Throwable().printStackTrace(System.out); + } + throw new RuntimeException(Thread.currentThread().getName() + ": intentionally failing at " + name); + } + return true; + } + } + + public void testRandomExceptions() throws Throwable { + if (VERBOSE) { + System.out.println("\nTEST: start testRandomExceptions"); + } + MockDirectoryWrapper dir = newDirectory(); + + MockAnalyzer analyzer = new MockAnalyzer(random); + analyzer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. + MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer) + .setRAMBufferSizeMB(0.1).setMergeScheduler(new ConcurrentMergeScheduler())); + ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions(); + //writer.setMaxBufferedDocs(10); + if (VERBOSE) { + System.out.println("TEST: initial commit"); + } + writer.commit(); + + if (VERBOSE) { + writer.setInfoStream(System.out); + } + + IndexerThread thread = new IndexerThread(0, writer); + thread.run(); + if (thread.failure != null) { + thread.failure.printStackTrace(System.out); + fail("thread " + thread.getName() + ": hit unexpected failure"); + } + + if (VERBOSE) { + System.out.println("TEST: commit after thread start"); + } + writer.commit(); + + try { + writer.close(); + } catch (Throwable t) { + System.out.println("exception during close:"); + t.printStackTrace(System.out); + writer.rollback(); + } + + // Confirm that when doc hits exception partway through tokenization, it's deleted: + IndexReader r2 = IndexReader.open(dir, true); + final int count = r2.docFreq(new Term("content4", "aaa")); + final int count2 = r2.docFreq(new Term("content4", "ddd")); + assertEquals(count, count2); + r2.close(); + + dir.close(); + } + + public void testRandomExceptionsThreads() throws Throwable { + MockDirectoryWrapper dir = newDirectory(); + MockAnalyzer analyzer = new MockAnalyzer(random); + analyzer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. + MockIndexWriter writer = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer) + .setRAMBufferSizeMB(0.2).setMergeScheduler(new ConcurrentMergeScheduler())); + ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions(); + //writer.setMaxBufferedDocs(10); + writer.commit(); + + if (VERBOSE) { + writer.setInfoStream(System.out); + } + + final int NUM_THREADS = 4; + + final IndexerThread[] threads = new IndexerThread[NUM_THREADS]; + for(int i=0;i= 4) + throw new IOException(CRASH_FAIL_MESSAGE); + return input.incrementToken(); + } + + @Override + public void reset() throws IOException { + super.reset(); + count = 0; + } + } + + public void testExceptionDocumentsWriterInit() throws IOException { + Directory dir = newDirectory(); + MockIndexWriter2 w = new MockIndexWriter2(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + w.setInfoStream(VERBOSE ? System.out : null); + Document doc = new Document(); + doc.add(newField("field", "a field", Field.Store.YES, + Field.Index.ANALYZED)); + w.addDocument(doc); + w.doFail = true; + try { + w.addDocument(doc); + fail("did not hit exception"); + } catch (RuntimeException re) { + // expected + } + w.close(); + dir.close(); + } + + // LUCENE-1208 + public void testExceptionJustBeforeFlush() throws IOException { + Directory dir = newDirectory(); + MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + w.setInfoStream(VERBOSE ? System.out : null); + Document doc = new Document(); + doc.add(newField("field", "a field", Field.Store.YES, + Field.Index.ANALYZED)); + w.addDocument(doc); + + Analyzer analyzer = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. + return new CrashingFilter(fieldName, tokenizer); + } + }; + + Document crashDoc = new Document(); + crashDoc.add(newField("crash", "do it on token 4", Field.Store.YES, + Field.Index.ANALYZED)); + try { + w.addDocument(crashDoc, analyzer); + fail("did not hit expected exception"); + } catch (IOException ioe) { + // expected + } + w.addDocument(doc); + w.close(); + dir.close(); + } + + private static final class MockIndexWriter3 extends IndexWriter { + + public MockIndexWriter3(Directory dir, IndexWriterConfig conf) throws IOException { + super(dir, conf); + } + + boolean doFail; + boolean failed; + + @Override + boolean testPoint(String name) { + if (doFail && name.equals("startMergeInit")) { + failed = true; + throw new RuntimeException("intentionally failing"); + } + return true; + } + } + + + // LUCENE-1210 + public void testExceptionOnMergeInit() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(2).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy()); + ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); + MockIndexWriter3 w = new MockIndexWriter3(dir, conf); + w.doFail = true; + Document doc = new Document(); + doc.add(newField("field", "a field", Field.Store.YES, + Field.Index.ANALYZED)); + for(int i=0;i<10;i++) + try { + w.addDocument(doc); + } catch (RuntimeException re) { + break; + } + + ((ConcurrentMergeScheduler) w.getConfig().getMergeScheduler()).sync(); + assertTrue(w.failed); + w.close(); + dir.close(); + } + + // LUCENE-1072 + public void testExceptionFromTokenStream() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. + return new TokenFilter(tokenizer) { + private int count = 0; + + @Override + public boolean incrementToken() throws IOException { + if (count++ == 5) { + throw new IOException(); + } + return input.incrementToken(); + } + }; + } + + }); + conf.setMaxBufferedDocs(Math.max(3, conf.getMaxBufferedDocs())); + + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + String contents = "aa bb cc dd ee ff gg hh ii jj kk"; + doc.add(newField("content", contents, Field.Store.NO, + Field.Index.ANALYZED)); + try { + writer.addDocument(doc); + fail("did not hit expected exception"); + } catch (Exception e) { + } + + // Make sure we can add another normal document + doc = new Document(); + doc.add(newField("content", "aa bb cc dd", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + + // Make sure we can add another normal document + doc = new Document(); + doc.add(newField("content", "aa bb cc dd", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + + writer.close(); + IndexReader reader = IndexReader.open(dir, true); + final Term t = new Term("content", "aa"); + assertEquals(3, reader.docFreq(t)); + + // Make sure the doc that hit the exception was marked + // as deleted: + TermDocs tdocs = reader.termDocs(t); + int count = 0; + while(tdocs.next()) { + count++; + } + assertEquals(2, count); + + assertEquals(reader.docFreq(new Term("content", "gg")), 0); + reader.close(); + dir.close(); + } + + private static class FailOnlyOnFlush extends MockDirectoryWrapper.Failure { + boolean doFail = false; + int count; + + @Override + public void setDoFail() { + this.doFail = true; + } + @Override + public void clearDoFail() { + this.doFail = false; + } + + @Override + public void eval(MockDirectoryWrapper dir) throws IOException { + if (doFail) { + StackTraceElement[] trace = new Exception().getStackTrace(); + boolean sawAppend = false; + boolean sawFlush = false; + for (int i = 0; i < trace.length; i++) { + if ("org.apache.lucene.index.FreqProxTermsWriter".equals(trace[i].getClassName()) && "appendPostings".equals(trace[i].getMethodName())) + sawAppend = true; + if ("doFlush".equals(trace[i].getMethodName())) + sawFlush = true; + } + + if (sawAppend && sawFlush && count++ >= 30) { + doFail = false; + throw new IOException("now failing during flush"); + } + } + } + } + + // LUCENE-1072: make sure an errant exception on flushing + // one segment only takes out those docs in that one flush + public void testDocumentsWriterAbort() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + FailOnlyOnFlush failure = new FailOnlyOnFlush(); + failure.setDoFail(); + dir.failOn(failure); + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + Document doc = new Document(); + String contents = "aa bb cc dd ee ff gg hh ii jj kk"; + doc.add(newField("content", contents, Field.Store.NO, + Field.Index.ANALYZED)); + boolean hitError = false; + for(int i=0;i<200;i++) { + try { + writer.addDocument(doc); + } catch (IOException ioe) { + // only one flush should fail: + assertFalse(hitError); + hitError = true; + } + } + assertTrue(hitError); + writer.close(); + IndexReader reader = IndexReader.open(dir, true); + assertEquals(198, reader.docFreq(new Term("content", "aa"))); + reader.close(); + dir.close(); + } + + public void testDocumentsWriterExceptions() throws IOException { + Analyzer analyzer = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. + return new CrashingFilter(fieldName, tokenizer); + } + }; + + for(int i=0;i<2;i++) { + if (VERBOSE) { + System.out.println("TEST: cycle i=" + i); + } + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setMergePolicy(newLogMergePolicy())); + writer.setInfoStream(VERBOSE ? System.out : null); + + // don't allow a sudden merge to clean up the deleted + // doc below: + LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); + lmp.setMergeFactor(Math.max(lmp.getMergeFactor(), 5)); + + Document doc = new Document(); + doc.add(newField("contents", "here are some contents", Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.addDocument(doc); + doc.add(newField("crash", "this should crash after 4 terms", Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("other", "this will not get indexed", Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + try { + writer.addDocument(doc); + fail("did not hit expected exception"); + } catch (IOException ioe) { + if (VERBOSE) { + System.out.println("TEST: hit expected exception"); + ioe.printStackTrace(System.out); + } + } + + if (0 == i) { + doc = new Document(); + doc.add(newField("contents", "here are some contents", Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.addDocument(doc); + } + writer.close(); + + if (VERBOSE) { + System.out.println("TEST: open reader"); + } + IndexReader reader = IndexReader.open(dir, true); + if (i == 0) { + int expected = 5; + assertEquals(expected, reader.docFreq(new Term("contents", "here"))); + assertEquals(expected, reader.maxDoc()); + int numDel = 0; + for(int j=0;j thrown = new ArrayList(); + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))) { + @Override + public void message(final String message) { + if (message.startsWith("now flush at close") && 0 == thrown.size()) { + thrown.add(null); + throw new OutOfMemoryError("fake OOME at " + message); + } + } + }; + + // need to set an info stream so message is called + writer.setInfoStream(new PrintStream(new ByteArrayOutputStream())); + try { + writer.close(); + fail("OutOfMemoryError expected"); + } + catch (final OutOfMemoryError expected) {} + + // throws IllegalStateEx w/o bug fix + writer.close(); + dir.close(); + } + + // LUCENE-1347 + private static final class MockIndexWriter4 extends IndexWriter { + + public MockIndexWriter4(Directory dir, IndexWriterConfig conf) throws IOException { + super(dir, conf); + } + + boolean doFail; + + @Override + boolean testPoint(String name) { + if (doFail && name.equals("rollback before checkpoint")) + throw new RuntimeException("intentionally failing"); + return true; + } + } + + // LUCENE-1347 + public void testRollbackExceptionHang() throws Throwable { + Directory dir = newDirectory(); + MockIndexWriter4 w = new MockIndexWriter4(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + addDoc(w); + w.doFail = true; + try { + w.rollback(); + fail("did not hit intentional RuntimeException"); + } catch (RuntimeException re) { + // expected + } + + w.doFail = false; + w.rollback(); + dir.close(); + } + + // LUCENE-1044: Simulate checksum error in segments_N + public void testSegmentsChecksumError() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = null; + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + // add 100 documents + for (int i = 0; i < 100; i++) { + addDoc(writer); + } + + // close + writer.close(); + + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + assertTrue("segment generation should be > 0 but got " + gen, gen > 0); + + final String segmentsFileName = SegmentInfos.getCurrentSegmentFileName(dir); + IndexInput in = dir.openInput(segmentsFileName); + IndexOutput out = dir.createOutput(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", 1+gen)); + out.copyBytes(in, in.length()-1); + byte b = in.readByte(); + out.writeByte((byte) (1+b)); + out.close(); + in.close(); + + IndexReader reader = null; + try { + reader = IndexReader.open(dir, true); + } catch (IOException e) { + e.printStackTrace(System.out); + fail("segmentInfos failed to retry fallback to correct segments_N file"); + } + reader.close(); + dir.close(); + } + + // Simulate a corrupt index by removing last byte of + // latest segments file and make sure we get an + // IOException trying to open the index: + public void testSimulatedCorruptIndex1() throws IOException { + MockDirectoryWrapper dir = newDirectory(); + dir.setCheckIndexOnClose(false); // we are corrupting it! + + IndexWriter writer = null; + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + // add 100 documents + for (int i = 0; i < 100; i++) { + addDoc(writer); + } + + // close + writer.close(); + + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + assertTrue("segment generation should be > 0 but got " + gen, gen > 0); + + String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir); + String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + 1+gen); + IndexInput in = dir.openInput(fileNameIn); + IndexOutput out = dir.createOutput(fileNameOut); + long length = in.length(); + for(int i=0;i 0 but got " + gen, gen > 0); + + String[] files = dir.listAll(); + boolean corrupted = false; + for(int i=0;i 0 but got " + gen, gen > 0); + + // Make the next segments file, with last byte + // missing, to simulate a writer that crashed while + // writing segments file: + String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir); + String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + 1+gen); + IndexInput in = dir.openInput(fileNameIn); + IndexOutput out = dir.createOutput(fileNameOut); + long length = in.length(); + for(int i=0;i docs = new ArrayList(); + for(int docCount=0;docCount<7;docCount++) { + Document doc = new Document(); + docs.add(doc); + doc.add(newField("id", docCount+"", Field.Index.NOT_ANALYZED)); + doc.add(newField("content", "silly content " + docCount, Field.Index.ANALYZED)); + if (docCount == 4) { + Field f = newField("crash", "", Field.Index.ANALYZED); + doc.add(f); + MockTokenizer tokenizer = new MockTokenizer(new StringReader("crash me on the 4th token"), MockTokenizer.WHITESPACE, false); + tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. + f.setTokenStream(new CrashingFilter("crash", tokenizer)); + } + } + try { + w.addDocuments(docs); + // BUG: CrashingFilter didn't + fail("did not hit expected exception"); + } catch (IOException ioe) { + // expected + assertEquals(CRASH_FAIL_MESSAGE, ioe.getMessage()); + } + + final int numDocs2 = random.nextInt(25); + for(int docCount=0;docCount docs = new ArrayList(); + final int numDocs2 = random.nextInt(25); + for(int docCount=0;docCount= 18) { + noOverMerge = true; + } + } + assertTrue(noOverMerge); + + writer.close(); + dir.close(); + } + + // Test the case where flush is forced after every addDoc + public void testForceFlush() throws IOException { + Directory dir = newDirectory(); + + LogDocMergePolicy mp = new LogDocMergePolicy(); + mp.setMinMergeDocs(100); + mp.setMergeFactor(10); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(10).setMergePolicy(mp)); + + for (int i = 0; i < 100; i++) { + addDoc(writer); + writer.close(); + + mp = new LogDocMergePolicy(); + mp.setMergeFactor(10); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setOpenMode( + OpenMode.APPEND).setMaxBufferedDocs(10).setMergePolicy(mp)); + mp.setMinMergeDocs(100); + checkInvariants(writer); + } + + writer.close(); + dir.close(); + } + + // Test the case where mergeFactor changes + public void testMergeFactorChange() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(10). + setMergePolicy(newLogMergePolicy()). + setMergeScheduler(new SerialMergeScheduler()) + ); + + writer.setInfoStream(VERBOSE ? System.out : null); + + for (int i = 0; i < 250; i++) { + addDoc(writer); + checkInvariants(writer); + } + + ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(5); + + // merge policy only fixes segments on levels where merges + // have been triggered, so check invariants after all adds + for (int i = 0; i < 10; i++) { + addDoc(writer); + } + checkInvariants(writer); + + writer.close(); + dir.close(); + } + + // Test the case where both mergeFactor and maxBufferedDocs change + public void testMaxBufferedDocsChange() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(101).setMergePolicy(new LogDocMergePolicy()) + .setMergeScheduler(new SerialMergeScheduler())); + + // leftmost* segment has 1 doc + // rightmost* segment has 100 docs + for (int i = 1; i <= 100; i++) { + for (int j = 0; j < i; j++) { + addDoc(writer); + checkInvariants(writer); + } + writer.close(); + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setOpenMode( + OpenMode.APPEND).setMaxBufferedDocs(101).setMergePolicy(new LogDocMergePolicy()) + .setMergeScheduler(new SerialMergeScheduler())); + } + + writer.close(); + LogDocMergePolicy ldmp = new LogDocMergePolicy(); + ldmp.setMergeFactor(10); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setOpenMode( + OpenMode.APPEND).setMaxBufferedDocs(10).setMergePolicy(ldmp).setMergeScheduler(new SerialMergeScheduler())); + + // merge policy only fixes segments on levels where merges + // have been triggered, so check invariants after all adds + for (int i = 0; i < 100; i++) { + addDoc(writer); + } + checkInvariants(writer); + + for (int i = 100; i < 1000; i++) { + addDoc(writer); + } + writer.commit(); + writer.waitForMerges(); + writer.commit(); + checkInvariants(writer); + + writer.close(); + dir.close(); + } + + // Test the case where a merge results in no doc at all + public void testMergeDocCount0() throws IOException { + Directory dir = newDirectory(); + + LogDocMergePolicy ldmp = new LogDocMergePolicy(); + ldmp.setMergeFactor(100); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(10).setMergePolicy(ldmp)); + + for (int i = 0; i < 250; i++) { + addDoc(writer); + checkInvariants(writer); + } + writer.close(); + + IndexReader reader = IndexReader.open(dir, false); + reader.deleteDocuments(new Term("content", "aaa")); + reader.close(); + + ldmp = new LogDocMergePolicy(); + ldmp.setMergeFactor(5); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setOpenMode( + OpenMode.APPEND).setMaxBufferedDocs(10).setMergePolicy(ldmp).setMergeScheduler(new ConcurrentMergeScheduler())); + + // merge factor is changed, so check invariants after all adds + for (int i = 0; i < 10; i++) { + addDoc(writer); + } + writer.commit(); + writer.waitForMerges(); + writer.commit(); + checkInvariants(writer); + assertEquals(10, writer.maxDoc()); + + writer.close(); + dir.close(); + } + + private void addDoc(IndexWriter writer) throws IOException { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + private void checkInvariants(IndexWriter writer) throws IOException { + writer.waitForMerges(); + int maxBufferedDocs = writer.getConfig().getMaxBufferedDocs(); + int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor(); + int maxMergeDocs = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMaxMergeDocs(); + + int ramSegmentCount = writer.getNumBufferedDocuments(); + assertTrue(ramSegmentCount < maxBufferedDocs); + + int lowerBound = -1; + int upperBound = maxBufferedDocs; + int numSegments = 0; + + int segmentCount = writer.getSegmentCount(); + for (int i = segmentCount - 1; i >= 0; i--) { + int docCount = writer.getDocCount(i); + assertTrue("docCount=" + docCount + " lowerBound=" + lowerBound + " upperBound=" + upperBound + " i=" + i + " segmentCount=" + segmentCount + " index=" + writer.segString() + " config=" + writer.getConfig(), docCount > lowerBound); + + if (docCount <= upperBound) { + numSegments++; + } else { + if (upperBound * mergeFactor <= maxMergeDocs) { + assertTrue("maxMergeDocs=" + maxMergeDocs + "; numSegments=" + numSegments + "; upperBound=" + upperBound + "; mergeFactor=" + mergeFactor + "; segs=" + writer.segString() + " config=" + writer.getConfig(), numSegments < mergeFactor); + } + + do { + lowerBound = upperBound; + upperBound *= mergeFactor; + } while (docCount > upperBound); + numSegments = 1; + } + } + if (upperBound * mergeFactor <= maxMergeDocs) { + assertTrue(numSegments < mergeFactor); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterMerging.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterMerging.java new file mode 100644 index 0000000..e37019d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterMerging.java @@ -0,0 +1,303 @@ +package org.apache.lucene.index; +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.util.LuceneTestCase; + +import java.io.IOException; +import java.util.Random; + + +public class TestIndexWriterMerging extends LuceneTestCase +{ + + /** + * Tests that index merging (specifically addIndexes(Directory...)) doesn't + * change the index order of documents. + */ + public void testLucene() throws IOException { + int num=100; + + Directory indexA = newDirectory(); + Directory indexB = newDirectory(); + + fillIndex(random, indexA, 0, num); + boolean fail = verifyIndex(indexA, 0); + if (fail) + { + fail("Index a is invalid"); + } + + fillIndex(random, indexB, num, num); + fail = verifyIndex(indexB, num); + if (fail) + { + fail("Index b is invalid"); + } + + Directory merged = newDirectory(); + + IndexWriter writer = new IndexWriter( + merged, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(2)) + ); + writer.setInfoStream(VERBOSE ? System.out : null); + writer.addIndexes(new Directory[]{indexA, indexB}); + writer.optimize(); + writer.close(); + + fail = verifyIndex(merged, 0); + + assertFalse("The merged index is invalid", fail); + indexA.close(); + indexB.close(); + merged.close(); + } + + private boolean verifyIndex(Directory directory, int startAt) throws IOException + { + boolean fail = false; + IndexReader reader = IndexReader.open(directory, true); + + int max = reader.maxDoc(); + for (int i = 0; i < max; i++) + { + Document temp = reader.document(i); + //System.out.println("doc "+i+"="+temp.getField("count").stringValue()); + //compare the index doc number to the value that it should be + if (!temp.getField("count").stringValue().equals((i + startAt) + "")) + { + fail = true; + System.out.println("Document " + (i + startAt) + " is returning document " + temp.getField("count").stringValue()); + } + } + reader.close(); + return fail; + } + + private void fillIndex(Random random, Directory dir, int start, int numDocs) throws IOException { + + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setOpenMode(OpenMode.CREATE). + setMaxBufferedDocs(2). + setMergePolicy(newLogMergePolicy(2)) + ); + + for (int i = start; i < (start + numDocs); i++) + { + Document temp = new Document(); + temp.add(newField("count", (""+i), Field.Store.YES, Field.Index.NOT_ANALYZED)); + + writer.addDocument(temp); + } + writer.close(); + } + + // LUCENE-325: test expungeDeletes, when 2 singular merges + // are required + public void testExpungeDeletes() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(2).setRAMBufferSizeMB( + IndexWriterConfig.DISABLE_AUTO_FLUSH)); + writer.setInfoStream(VERBOSE ? System.out : null); + Document document = new Document(); + + document = new Document(); + Field storedField = newField("stored", "stored", Field.Store.YES, + Field.Index.NO); + document.add(storedField); + Field termVectorField = newField("termVector", "termVector", + Field.Store.NO, Field.Index.NOT_ANALYZED, + Field.TermVector.WITH_POSITIONS_OFFSETS); + document.add(termVectorField); + for(int i=0;i<10;i++) + writer.addDocument(document); + writer.close(); + + IndexReader ir = IndexReader.open(dir, false); + assertEquals(10, ir.maxDoc()); + assertEquals(10, ir.numDocs()); + ir.deleteDocument(0); + ir.deleteDocument(7); + assertEquals(8, ir.numDocs()); + ir.close(); + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + assertEquals(8, writer.numDocs()); + assertEquals(10, writer.maxDoc()); + writer.expungeDeletes(); + assertEquals(8, writer.numDocs()); + writer.close(); + ir = IndexReader.open(dir, true); + assertEquals(8, ir.maxDoc()); + assertEquals(8, ir.numDocs()); + ir.close(); + dir.close(); + } + + // LUCENE-325: test expungeDeletes, when many adjacent merges are required + public void testExpungeDeletes2() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH). + setMergePolicy(newLogMergePolicy(50)) + ); + + Document document = new Document(); + + document = new Document(); + Field storedField = newField("stored", "stored", Store.YES, + Index.NO); + document.add(storedField); + Field termVectorField = newField("termVector", "termVector", + Store.NO, Index.NOT_ANALYZED, + TermVector.WITH_POSITIONS_OFFSETS); + document.add(termVectorField); + for(int i=0;i<98;i++) + writer.addDocument(document); + writer.close(); + + IndexReader ir = IndexReader.open(dir, false); + assertEquals(98, ir.maxDoc()); + assertEquals(98, ir.numDocs()); + for(int i=0;i<98;i+=2) + ir.deleteDocument(i); + assertEquals(49, ir.numDocs()); + ir.close(); + + writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(3)) + ); + assertEquals(49, writer.numDocs()); + writer.expungeDeletes(); + writer.close(); + ir = IndexReader.open(dir, true); + assertEquals(49, ir.maxDoc()); + assertEquals(49, ir.numDocs()); + ir.close(); + dir.close(); + } + + // LUCENE-325: test expungeDeletes without waiting, when + // many adjacent merges are required + public void testExpungeDeletes3() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH). + setMergePolicy(newLogMergePolicy(50)) + ); + + Document document = new Document(); + + document = new Document(); + Field storedField = newField("stored", "stored", Field.Store.YES, + Field.Index.NO); + document.add(storedField); + Field termVectorField = newField("termVector", "termVector", + Field.Store.NO, Field.Index.NOT_ANALYZED, + Field.TermVector.WITH_POSITIONS_OFFSETS); + document.add(termVectorField); + for(int i=0;i<98;i++) + writer.addDocument(document); + writer.close(); + + IndexReader ir = IndexReader.open(dir, false); + assertEquals(98, ir.maxDoc()); + assertEquals(98, ir.numDocs()); + for(int i=0;i<98;i+=2) + ir.deleteDocument(i); + assertEquals(49, ir.numDocs()); + ir.close(); + + writer = new IndexWriter( + dir, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(3)) + ); + writer.expungeDeletes(false); + writer.close(); + ir = IndexReader.open(dir, true); + assertEquals(49, ir.maxDoc()); + assertEquals(49, ir.numDocs()); + ir.close(); + dir.close(); + } + + // Just intercepts all merges & verifies that we are never + // merging a segment with >= 20 (maxMergeDocs) docs + private class MyMergeScheduler extends MergeScheduler { + @Override + synchronized public void merge(IndexWriter writer) + throws CorruptIndexException, IOException { + + while(true) { + MergePolicy.OneMerge merge = writer.getNextMerge(); + if (merge == null) { + break; + } + for(int i=0;i= 2.0) { + rate /= 2; + } + if (diskRatio >= 4.0) { + rate /= 2; + } + if (diskRatio >= 6.0) { + rate = 0.0; + } + if (VERBOSE) + testName = "disk full test " + methodName + " with disk full at " + diskFree + " bytes"; + } else { + thisDiskFree = 0; + rate = 0.0; + if (VERBOSE) + testName = "disk full test " + methodName + " with unlimited disk space"; + } + + if (VERBOSE) + System.out.println("\ncycle: " + testName); + + dir.setTrackDiskUsage(true); + dir.setMaxSizeInBytes(thisDiskFree); + dir.setRandomIOExceptionRate(rate); + + try { + + if (0 == method) { + writer.addIndexes(dirs); + writer.optimize(); + } else if (1 == method) { + IndexReader readers[] = new IndexReader[dirs.length]; + for(int i=0;i cmd = new ArrayList(); + cmd.add(System.getProperty("java.home") + + System.getProperty("file.separator") + + "bin" + + System.getProperty("file.separator") + + "java"); + cmd.add("-Xmx512m"); + cmd.add("-Dtests.crashmode=true"); + // passing NIGHTLY to this test makes it run for much longer, easier to catch it in the act... + cmd.add("-Dtests.nightly=true"); + cmd.add("-DtempDir=" + tempDir.getPath()); + cmd.add("-Dtests.seed=" + random.nextLong() + ":" + random.nextLong()); + cmd.add("-ea"); + cmd.add("-cp"); + cmd.add(System.getProperty("java.class.path")); + cmd.add("org.junit.runner.JUnitCore"); + cmd.add(getClass().getName()); + ProcessBuilder pb = new ProcessBuilder(cmd); + pb.directory(tempDir); + pb.redirectErrorStream(true); + Process p = pb.start(); + InputStream is = p.getInputStream(); + BufferedInputStream isl = new BufferedInputStream(is); + byte buffer[] = new byte[1024]; + int len = 0; + if (VERBOSE) System.err.println(">>> Begin subprocess output"); + while ((len = isl.read(buffer)) != -1) { + if (VERBOSE) { + System.err.write(buffer, 0, len); + } + } + if (VERBOSE) System.err.println("<<< End subprocess output"); + p.waitFor(); + } + + /** + * Recursively looks for indexes underneath file, + * and runs checkindex on them. returns true if it found any indexes. + */ + public boolean checkIndexes(File file) throws IOException { + if (file.isDirectory()) { + MockDirectoryWrapper dir = newFSDirectory(file); + dir.setCheckIndexOnClose(false); // don't double-checkindex + if (IndexReader.indexExists(dir)) { + if (VERBOSE) { + System.err.println("Checking index: " + file); + } + _TestUtil.checkIndex(dir); + dir.close(); + return true; + } + dir.close(); + for (File f : file.listFiles()) + if (checkIndexes(f)) + return true; + } + return false; + } + + /** + * currently, this only works/tested on Sun and IBM. + */ + public void crashJRE() { + try { + Class clazz = Class.forName("sun.misc.Unsafe"); + // we should use getUnsafe instead, harmony implements it, etc. + Field field = clazz.getDeclaredField("theUnsafe"); + field.setAccessible(true); + Object o = field.get(null); + Method m = clazz.getMethod("putAddress", long.class, long.class); + m.invoke(o, 0L, 0L); + } catch (Exception e) { e.printStackTrace(); } + fail(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterOptimize.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterOptimize.java new file mode 100644 index 0000000..523e253 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterOptimize.java @@ -0,0 +1,215 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestIndexWriterOptimize extends LuceneTestCase { + public void testOptimizeMaxNumSegments() throws IOException { + + MockDirectoryWrapper dir = newDirectory(); + + final Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); + final int incrMin = TEST_NIGHTLY ? 15 : 40; + for(int numDocs=10;numDocs<500;numDocs += _TestUtil.nextInt(random, incrMin, 5*incrMin)) { + LogDocMergePolicy ldmp = new LogDocMergePolicy(); + ldmp.setMinMergeDocs(1); + ldmp.setMergeFactor(5); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(2).setMergePolicy( + ldmp)); + for(int j=0;j failures = new ArrayList(); + IndexReader[] readers; + boolean didClose = false; + AtomicInteger count = new AtomicInteger(0); + AtomicInteger numaddIndexes = new AtomicInteger(0); + + public AddDirectoriesThreads(int numDirs, IndexWriter mainWriter) throws Throwable { + this.numDirs = numDirs; + this.mainWriter = mainWriter; + addDir = newDirectory(); + IndexWriter writer = new IndexWriter(addDir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + for (int i = 0; i < NUM_INIT_DOCS; i++) { + Document doc = createDocument(i, "addindex", 4); + writer.addDocument(doc); + } + + writer.close(); + + readers = new IndexReader[numDirs]; + for (int i = 0; i < numDirs; i++) + readers[i] = IndexReader.open(addDir, false); + } + + void joinThreads() { + for (int i = 0; i < NUM_THREADS; i++) + try { + threads[i].join(); + } catch (InterruptedException ie) { + throw new ThreadInterruptedException(ie); + } + } + + void close(boolean doWait) throws Throwable { + didClose = true; + if (doWait) { + mainWriter.waitForMerges(); + } + mainWriter.close(doWait); + } + + void closeDir() throws Throwable { + for (int i = 0; i < numDirs; i++) + readers[i].close(); + addDir.close(); + } + + void handle(Throwable t) { + t.printStackTrace(System.out); + synchronized (failures) { + failures.add(t); + } + } + + void launchThreads(final int numIter) { + for (int i = 0; i < NUM_THREADS; i++) { + threads[i] = new Thread() { + @Override + public void run() { + try { + final Directory[] dirs = new Directory[numDirs]; + for (int k = 0; k < numDirs; k++) + dirs[k] = new MockDirectoryWrapper(random, new RAMDirectory(addDir)); + //int j = 0; + //while (true) { + // System.out.println(Thread.currentThread().getName() + ": iter + // j=" + j); + for (int x=0; x < numIter; x++) { + // only do addIndexes + doBody(x, dirs); + } + //if (numIter > 0 && j == numIter) + // break; + //doBody(j++, dirs); + //doBody(5, dirs); + //} + } catch (Throwable t) { + handle(t); + } + } + }; + } + for (int i = 0; i < NUM_THREADS; i++) + threads[i].start(); + } + + void doBody(int j, Directory[] dirs) throws Throwable { + switch (j % 4) { + case 0: + mainWriter.addIndexes(dirs); + mainWriter.optimize(); + break; + case 1: + mainWriter.addIndexes(dirs); + numaddIndexes.incrementAndGet(); + break; + case 2: + mainWriter.addIndexes(readers); + break; + case 3: + mainWriter.commit(); + } + count.addAndGet(dirs.length*NUM_INIT_DOCS); + } + } + + public void testIndexWriterReopenSegmentOptimize() throws Exception { + doTestIndexWriterReopenSegment(true); + } + + public void testIndexWriterReopenSegment() throws Exception { + doTestIndexWriterReopenSegment(false); + } + + /** + * Tests creating a segment, then check to insure the segment can be seen via + * IW.getReader + */ + public void doTestIndexWriterReopenSegment(boolean optimize) throws Exception { + Directory dir1 = newDirectory(); + IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.setInfoStream(infoStream); + IndexReader r1 = writer.getReader(); + assertEquals(0, r1.maxDoc()); + createIndexNoClose(false, "index1", writer); + writer.flush(!optimize, true); + + IndexReader iwr1 = writer.getReader(); + assertEquals(100, iwr1.maxDoc()); + + IndexReader r2 = writer.getReader(); + assertEquals(r2.maxDoc(), 100); + // add 100 documents + for (int x = 10000; x < 10000 + 100; x++) { + Document d = createDocument(x, "index1", 5); + writer.addDocument(d); + } + writer.flush(false, true); + // verify the reader was reopened internally + IndexReader iwr2 = writer.getReader(); + assertTrue(iwr2 != r1); + assertEquals(200, iwr2.maxDoc()); + // should have flushed out a segment + IndexReader r3 = writer.getReader(); + assertTrue(r2 != r3); + assertEquals(200, r3.maxDoc()); + + // dec ref the readers rather than close them because + // closing flushes changes to the writer + r1.close(); + iwr1.close(); + r2.close(); + r3.close(); + iwr2.close(); + writer.close(); + + // test whether the changes made it to the directory + writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + IndexReader w2r1 = writer.getReader(); + // insure the deletes were actually flushed to the directory + assertEquals(200, w2r1.maxDoc()); + w2r1.close(); + writer.close(); + + dir1.close(); + } + + + public static Document createDocument(int n, String indexName, int numFields) { + StringBuilder sb = new StringBuilder(); + Document doc = new Document(); + doc.add(new Field("id", Integer.toString(n), Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("indexname", indexName, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + sb.append("a"); + sb.append(n); + doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + sb.append(" b"); + sb.append(n); + for (int i = 1; i < numFields; i++) { + doc.add(new Field("field" + (i + 1), sb.toString(), Store.YES, + Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + } + return doc; + } + + /* + * Delete a document by term and return the doc id + * + * public static int deleteDocument(Term term, IndexWriter writer) throws + * IOException { IndexReader reader = writer.getReader(); TermDocs td = + * reader.termDocs(term); int doc = -1; //if (td.next()) { // doc = td.doc(); + * //} //writer.deleteDocuments(term); td.close(); return doc; } + */ + + public static void createIndex(Random random, Directory dir1, String indexName, + boolean multiSegment) throws IOException { + IndexWriter w = new IndexWriter(dir1, LuceneTestCase.newIndexWriterConfig(random, + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMergePolicy(new LogDocMergePolicy())); + for (int i = 0; i < 100; i++) { + w.addDocument(createDocument(i, indexName, 4)); + if (multiSegment && (i % 10) == 0) { + } + } + if (!multiSegment) { + w.optimize(); + } + w.close(); + } + + public static void createIndexNoClose(boolean multiSegment, String indexName, + IndexWriter w) throws IOException { + for (int i = 0; i < 100; i++) { + w.addDocument(createDocument(i, indexName, 4)); + } + if (!multiSegment) { + w.optimize(); + } + } + + private static class MyWarmer extends IndexWriter.IndexReaderWarmer { + int warmCount; + @Override + public void warm(IndexReader reader) throws IOException { + warmCount++; + } + } + + public void testMergeWarmer() throws Exception { + + Directory dir1 = newDirectory(); + // Enroll warmer + MyWarmer warmer = new MyWarmer(); + IndexWriter writer = new IndexWriter( + dir1, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setMergedSegmentWarmer(warmer). + setMergeScheduler(new ConcurrentMergeScheduler()). + setMergePolicy(newLogMergePolicy()) + ); + writer.setInfoStream(infoStream); + + // create the index + createIndexNoClose(false, "test", writer); + + // get a reader to put writer into near real-time mode + IndexReader r1 = writer.getReader(); + + ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2); + + int num = atLeast(100); + for (int i = 0; i < num; i++) { + writer.addDocument(createDocument(i, "test", 4)); + } + ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).sync(); + + assertTrue(warmer.warmCount > 0); + final int count = warmer.warmCount; + + writer.addDocument(createDocument(17, "test", 4)); + writer.optimize(); + assertTrue(warmer.warmCount > count); + + writer.close(); + r1.close(); + dir1.close(); + } + + public void testAfterCommit() throws Exception { + Directory dir1 = newDirectory(); + IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergeScheduler(new ConcurrentMergeScheduler())); + writer.commit(); + writer.setInfoStream(infoStream); + + // create the index + createIndexNoClose(false, "test", writer); + + // get a reader to put writer into near real-time mode + IndexReader r1 = writer.getReader(); + _TestUtil.checkIndex(dir1); + writer.commit(); + _TestUtil.checkIndex(dir1); + assertEquals(100, r1.numDocs()); + + for (int i = 0; i < 10; i++) { + writer.addDocument(createDocument(i, "test", 4)); + } + ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).sync(); + + IndexReader r2 = r1.reopen(); + if (r2 != r1) { + r1.close(); + r1 = r2; + } + assertEquals(110, r1.numDocs()); + writer.close(); + r1.close(); + dir1.close(); + } + + // Make sure reader remains usable even if IndexWriter closes + public void testAfterClose() throws Exception { + Directory dir1 = newDirectory(); + IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.setInfoStream(infoStream); + + // create the index + createIndexNoClose(false, "test", writer); + + IndexReader r = writer.getReader(); + writer.close(); + + _TestUtil.checkIndex(dir1); + + // reader should remain usable even after IndexWriter is closed: + assertEquals(100, r.numDocs()); + Query q = new TermQuery(new Term("indexname", "test")); + IndexSearcher searcher = newSearcher(r); + assertEquals(100, searcher.search(q, 10).totalHits); + searcher.close(); + try { + r.reopen(); + fail("failed to hit AlreadyClosedException"); + } catch (AlreadyClosedException ace) { + // expected + } + r.close(); + dir1.close(); + } + + // Stress test reopen during addIndexes + public void testDuringAddIndexes() throws Exception { + MockDirectoryWrapper dir1 = newDirectory(); + final IndexWriter writer = new IndexWriter( + dir1, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(2)) + ); + writer.setInfoStream(infoStream); + ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2); + + // create the index + createIndexNoClose(false, "test", writer); + writer.commit(); + + final Directory[] dirs = new Directory[10]; + for (int i=0;i<10;i++) { + dirs[i] = new MockDirectoryWrapper(random, new RAMDirectory(dir1)); + } + + IndexReader r = writer.getReader(); + + final int NUM_THREAD = 5; + final float SECONDS = 0.5f; + + final long endTime = (long) (System.currentTimeMillis() + 1000.*SECONDS); + final List excs = Collections.synchronizedList(new ArrayList()); + + final Thread[] threads = new Thread[NUM_THREAD]; + for(int i=0;i= lastCount); + lastCount = count; + } + + for(int i=0;i= lastCount); + + assertEquals(0, excs.size()); + r.close(); + assertEquals(0, dir1.getOpenDeletedFiles().size()); + + writer.close(); + + dir1.close(); + } + + // Stress test reopen during add/delete + public void testDuringAddDelete() throws Exception { + Directory dir1 = newDirectory(); + final IndexWriter writer = new IndexWriter( + dir1, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(2)) + ); + writer.setInfoStream(infoStream); + ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(2); + + // create the index + createIndexNoClose(false, "test", writer); + writer.commit(); + + IndexReader r = writer.getReader(); + + final int NUM_THREAD = 5; + final float SECONDS = 0.5f; + + final long endTime = (long) (System.currentTimeMillis() + 1000.*SECONDS); + final List excs = Collections.synchronizedList(new ArrayList()); + + final Thread[] threads = new Thread[NUM_THREAD]; + for(int i=0;i 0); + + assertEquals(0, excs.size()); + writer.close(); + + r.close(); + dir1.close(); + } + + public void testExpungeDeletes() throws Throwable { + Directory dir = newDirectory(); + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + Document doc = new Document(); + doc.add(newField("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + Field id = newField("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED); + doc.add(id); + id.setValue("0"); + w.addDocument(doc); + id.setValue("1"); + w.addDocument(doc); + w.deleteDocuments(new Term("id", "0")); + + IndexReader r = w.getReader(); + w.expungeDeletes(); + w.close(); + r.close(); + r = IndexReader.open(dir, true); + assertEquals(1, r.numDocs()); + assertFalse(r.hasDeletions()); + r.close(); + dir.close(); + } + + public void testDeletesNumDocs() throws Throwable { + Directory dir = newDirectory(); + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(newField("field", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + Field id = newField("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED); + doc.add(id); + id.setValue("0"); + w.addDocument(doc); + id.setValue("1"); + w.addDocument(doc); + IndexReader r = w.getReader(); + assertEquals(2, r.numDocs()); + r.close(); + + w.deleteDocuments(new Term("id", "0")); + r = w.getReader(); + assertEquals(1, r.numDocs()); + r.close(); + + w.deleteDocuments(new Term("id", "1")); + r = w.getReader(); + assertEquals(0, r.numDocs()); + r.close(); + + w.close(); + dir.close(); + } + + public void testEmptyIndex() throws Exception { + // Ensures that getReader works on an empty index, which hasn't been committed yet. + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + IndexReader r = w.getReader(); + assertEquals(0, r.numDocs()); + r.close(); + w.close(); + dir.close(); + } + + public void testSegmentWarmer() throws Exception { + Directory dir = newDirectory(); + final AtomicBoolean didWarm = new AtomicBoolean(); + IndexWriter w = new IndexWriter( + dir, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setReaderPooling(true). + setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() { + @Override + public void warm(IndexReader r) throws IOException { + IndexSearcher s = newSearcher(r); + TopDocs hits = s.search(new TermQuery(new Term("foo", "bar")), 10); + assertEquals(20, hits.totalHits); + didWarm.set(true); + s.close(); + } + }). + setMergePolicy(newLogMergePolicy(10)) + ); + + Document doc = new Document(); + doc.add(newField("foo", "bar", Field.Store.YES, Field.Index.NOT_ANALYZED)); + for(int i=0;i<20;i++) { + w.addDocument(doc); + } + w.waitForMerges(); + w.close(); + dir.close(); + assertTrue(didWarm.get()); + } + + public void testNoTermsIndex() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) + .setReaderTermsIndexDivisor(-1)); + Document doc = new Document(); + doc.add(new Field("f", "val", Store.NO, Index.ANALYZED)); + w.addDocument(doc); + IndexReader r = IndexReader.open(w, true); + try { + r.termDocs(new Term("f", "val")); + fail("should have failed to seek since terms index was not loaded"); + } catch (IllegalStateException e) { + // expected - we didn't load the term index + } finally { + r.close(); + w.close(); + dir.close(); + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java new file mode 100644 index 0000000..f6df136 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestIndexWriterUnicode.java @@ -0,0 +1,288 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Random; +import java.util.Set; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.UnicodeUtil; + +public class TestIndexWriterUnicode extends LuceneTestCase { + + final String[] utf8Data = new String[] { + // unpaired low surrogate + "ab\udc17cd", "ab\ufffdcd", + "\udc17abcd", "\ufffdabcd", + "\udc17", "\ufffd", + "ab\udc17\udc17cd", "ab\ufffd\ufffdcd", + "\udc17\udc17abcd", "\ufffd\ufffdabcd", + "\udc17\udc17", "\ufffd\ufffd", + + // unpaired high surrogate + "ab\ud917cd", "ab\ufffdcd", + "\ud917abcd", "\ufffdabcd", + "\ud917", "\ufffd", + "ab\ud917\ud917cd", "ab\ufffd\ufffdcd", + "\ud917\ud917abcd", "\ufffd\ufffdabcd", + "\ud917\ud917", "\ufffd\ufffd", + + // backwards surrogates + "ab\udc17\ud917cd", "ab\ufffd\ufffdcd", + "\udc17\ud917abcd", "\ufffd\ufffdabcd", + "\udc17\ud917", "\ufffd\ufffd", + "ab\udc17\ud917\udc17\ud917cd", "ab\ufffd\ud917\udc17\ufffdcd", + "\udc17\ud917\udc17\ud917abcd", "\ufffd\ud917\udc17\ufffdabcd", + "\udc17\ud917\udc17\ud917", "\ufffd\ud917\udc17\ufffd" + }; + + private int nextInt(int lim) { + return random.nextInt(lim); + } + + private int nextInt(int start, int end) { + return start + nextInt(end-start); + } + + private boolean fillUnicode(char[] buffer, char[] expected, int offset, int count) { + final int len = offset + count; + boolean hasIllegal = false; + + if (offset > 0 && buffer[offset] >= 0xdc00 && buffer[offset] < 0xe000) + // Don't start in the middle of a valid surrogate pair + offset--; + + for(int i=offset;i> 10) + UnicodeUtil.UNI_SUR_HIGH_START); + chars[len++] = (char) (((ch-0x0010000) & 0x3FFL) + UnicodeUtil.UNI_SUR_LOW_START); + } + + UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); + + String s1 = new String(chars, 0, len); + String s2 = new String(utf8.result, 0, utf8.length, "UTF-8"); + assertEquals("codepoint " + ch, s1, s2); + + UnicodeUtil.UTF8toUTF16(utf8.result, 0, utf8.length, utf16); + assertEquals("codepoint " + ch, s1, new String(utf16.result, 0, utf16.length)); + + byte[] b = s1.getBytes("UTF-8"); + assertEquals(utf8.length, b.length); + for(int j=0;j= 5) + break; + } else { + if (noErrors) { + System.out.println(Thread.currentThread().getName() + ": ERROR: unexpected IOException:"); + ioe.printStackTrace(System.out); + error = ioe; + } + break; + } + } catch (Throwable t) { + //t.printStackTrace(System.out); + if (noErrors) { + System.out.println(Thread.currentThread().getName() + ": ERROR: unexpected Throwable:"); + t.printStackTrace(System.out); + error = t; + } + break; + } + } while(System.currentTimeMillis() < stopTime); + } + } + + // LUCENE-1130: make sure immediate disk full on creating + // an IndexWriter (hit during DW.ThreadState.init()), with + // multiple threads, is OK: + public void testImmediateDiskFullWithThreads() throws Exception { + + int NUM_THREADS = 3; + + for(int iter=0;iter<10;iter++) { + if (VERBOSE) { + System.out.println("\nTEST: iter=" + iter); + } + MockDirectoryWrapper dir = newDirectory(); + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setMergeScheduler(new ConcurrentMergeScheduler()). + setMergePolicy(newLogMergePolicy(4)) + ); + ((ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler()).setSuppressExceptions(); + dir.setMaxSizeInBytes(4*1024+20*iter); + writer.setInfoStream(VERBOSE ? System.out : null); + + IndexerThread[] threads = new IndexerThread[NUM_THREADS]; + + for(int i=0;i 0) { + done = true; + break; + } + } + + writer.close(false); + + // Make sure threads that are adding docs are not hung: + for(int i=0;i 0); + reader.close(); + + dir.close(); + } + } + + // Runs test, with multiple threads, using the specific + // failure to trigger an IOException + public void _testMultipleThreadsFailure(MockDirectoryWrapper.Failure failure) throws Exception { + + int NUM_THREADS = 3; + + for(int iter=0;iter<2;iter++) { + if (VERBOSE) { + System.out.println("TEST: iter=" + iter); + } + MockDirectoryWrapper dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setMaxBufferedDocs(2) + .setMergeScheduler(new ConcurrentMergeScheduler()) + .setMergePolicy(newLogMergePolicy(4)); + // We expect disk full exceptions in the merge threads + ((ConcurrentMergeScheduler) conf.getMergeScheduler()).setSuppressExceptions(); + IndexWriter writer = new IndexWriter(dir, conf); + writer.setInfoStream(VERBOSE ? System.out : null); + + IndexerThread[] threads = new IndexerThread[NUM_THREADS]; + + for(int i=0;i dataset = new HashSet(Arrays.asList(data)); + + private static String MAGIC_FIELD = "f"+(NUM_FIELDS/3); + + private static Directory directory; + + @BeforeClass + public static void beforeClass() throws Exception { + directory = makeIndex(); + } + + @AfterClass + public static void afterClass() throws Exception { + directory.close(); + directory = null; + } + + private static FieldSelector SELECTOR = new FieldSelector() { + public FieldSelectorResult accept(String f) { + if (f.equals(MAGIC_FIELD)) { + return FieldSelectorResult.LOAD; + } + return FieldSelectorResult.LAZY_LOAD; + } + }; + + private static Directory makeIndex() throws Exception { + Directory dir = newDirectory(); + try { + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); + lmp.setUseCompoundFile(false); + + for (int d = 1; d <= NUM_DOCS; d++) { + Document doc = new Document(); + for (int f = 1; f <= NUM_FIELDS; f++ ) { + doc.add(newField("f"+f, + data[f % data.length] + + '#' + data[random.nextInt(data.length)], + Field.Store.NO, + Field.Index.ANALYZED)); + } + writer.addDocument(doc); + } + writer.close(); + } catch (Exception e) { + throw new RuntimeException(e); + } + return dir; + } + + public void doTest(int[] docs) throws Exception { + IndexReader reader = IndexReader.open(directory, true); + for (int i = 0; i < docs.length; i++) { + Document d = reader.document(docs[i], SELECTOR); + d.get(MAGIC_FIELD); + + List fields = d.getFields(); + for (Iterator fi = fields.iterator(); fi.hasNext(); ) { + Fieldable f=null; + try { + f = fi.next(); + String fname = f.name(); + String fval = f.stringValue(); + assertNotNull(docs[i]+" FIELD: "+fname, fval); + String[] vals = fval.split("#"); + if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { + fail("FIELD:"+fname+",VAL:"+fval); + } + } catch (Exception e) { + throw new Exception(docs[i]+" WTF: "+f.name(), e); + } + } + } + reader.close(); + } + + public void testLazyWorks() throws Exception { + doTest(new int[] { NUM_DOCS-1 }); + } + + public void testLazyAlsoWorks() throws Exception { + doTest(new int[] { NUM_DOCS-1, NUM_DOCS/2 }); + } + + public void testLazyBroken() throws Exception { + doTest(new int[] { NUM_DOCS/2, NUM_DOCS-1 }); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestLazyProxSkipping.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestLazyProxSkipping.java new file mode 100755 index 0000000..dcdce4b --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestLazyProxSkipping.java @@ -0,0 +1,209 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests lazy skipping on the proximity file. + * + */ +public class TestLazyProxSkipping extends LuceneTestCase { + private Searcher searcher; + private int seeksCounter = 0; + + private String field = "tokens"; + private String term1 = "xx"; + private String term2 = "yy"; + private String term3 = "zz"; + + private class SeekCountingDirectory extends MockDirectoryWrapper { + public SeekCountingDirectory(Directory delegate) { + super(random, delegate); + } + + @Override + public IndexInput openInput(String name) throws IOException { + IndexInput ii = super.openInput(name); + if (name.endsWith(".prx")) { + // we decorate the proxStream with a wrapper class that allows to count the number of calls of seek() + ii = new SeeksCountingStream(ii); + } + return ii; + } + + } + + private void createIndex(int numHits) throws IOException { + int numDocs = 500; + + Directory directory = new SeekCountingDirectory(new RAMDirectory()); + // note: test explicitly disables payloads + IndexWriter writer = new IndexWriter( + directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)). + setMaxBufferedDocs(10). + setMergePolicy(newLogMergePolicy(false)) + ); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + String content; + if (i % (numDocs / numHits) == 0) { + // add a document that matches the query "term1 term2" + content = this.term1 + " " + this.term2; + } else if (i % 15 == 0) { + // add a document that only contains term1 + content = this.term1 + " " + this.term1; + } else { + // add a document that contains term2 but not term 1 + content = this.term3 + " " + this.term2; + } + + doc.add(newField(this.field, content, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + // make sure the index has only a single segment + writer.optimize(); + writer.close(); + + SegmentReader reader = SegmentReader.getOnlySegmentReader(directory); + + this.searcher = newSearcher(reader); + } + + private ScoreDoc[] search() throws IOException { + // create PhraseQuery "term1 term2" and search + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term(this.field, this.term1)); + pq.add(new Term(this.field, this.term2)); + return this.searcher.search(pq, null, 1000).scoreDocs; + } + + private void performTest(int numHits) throws IOException { + createIndex(numHits); + this.seeksCounter = 0; + ScoreDoc[] hits = search(); + // verify that the right number of docs was found + assertEquals(numHits, hits.length); + + // check if the number of calls of seek() does not exceed the number of hits + assertTrue(this.seeksCounter > 0); + assertTrue(this.seeksCounter <= numHits + 1); + } + + public void testLazySkipping() throws IOException { + // test whether only the minimum amount of seeks() + // are performed + performTest(5); + searcher.close(); + performTest(10); + searcher.close(); + } + + public void testSeek() throws IOException { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + for (int i = 0; i < 10; i++) { + Document doc = new Document(); + doc.add(newField(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + writer.close(); + IndexReader reader = IndexReader.open(directory, true); + TermPositions tp = reader.termPositions(); + tp.seek(new Term(this.field, "b")); + for (int i = 0; i < 10; i++) { + tp.next(); + assertEquals(tp.doc(), i); + assertEquals(tp.nextPosition(), 1); + } + tp.seek(new Term(this.field, "a")); + for (int i = 0; i < 10; i++) { + tp.next(); + assertEquals(tp.doc(), i); + assertEquals(tp.nextPosition(), 0); + } + reader.close(); + directory.close(); + + } + + + // Simply extends IndexInput in a way that we are able to count the number + // of invocations of seek() + class SeeksCountingStream extends IndexInput { + private IndexInput input; + + + SeeksCountingStream(IndexInput input) { + this.input = input; + } + + @Override + public byte readByte() throws IOException { + return this.input.readByte(); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + this.input.readBytes(b, offset, len); + } + + @Override + public void close() throws IOException { + this.input.close(); + } + + @Override + public long getFilePointer() { + return this.input.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + TestLazyProxSkipping.this.seeksCounter++; + this.input.seek(pos); + } + + @Override + public long length() { + return this.input.length(); + } + + @Override + public Object clone() { + return new SeeksCountingStream((IndexInput) this.input.clone()); + } + + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestLongPostings.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestLongPostings.java new file mode 100644 index 0000000..fd473d1 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestLongPostings.java @@ -0,0 +1,261 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.io.StringReader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util._TestUtil; + +public class TestLongPostings extends LuceneTestCase { + + // Produces a realistic unicode random string that + // survives MockAnalyzer unchanged: + private String getRandomTerm(String other) throws IOException { + Analyzer a = new MockAnalyzer(random); + while(true) { + String s = _TestUtil.randomRealisticUnicodeString(random); + if (other != null && s.equals(other)) { + continue; + } + final TokenStream ts = a.tokenStream("foo", new StringReader(s)); + final TermAttribute termAtt = ts.getAttribute(TermAttribute.class); + int count = 0; + ts.reset(); + while(ts.incrementToken()) { + if (count == 0 && !termAtt.term().equals(s)) { + break; + } + count++; + } + if (count == 1) { + return s; + } + } + } + + public void testLongPostings() throws Exception { + // Don't use _TestUtil.getTempDir so that we own the + // randomness (ie same seed will point to same dir): + Directory dir = newFSDirectory(_TestUtil.getTempDir("longpostings" + "." + random.nextLong())); + + final int NUM_DOCS = atLeast(2000); + + if (VERBOSE) { + System.out.println("TEST: NUM_DOCS=" + NUM_DOCS); + } + + final String s1 = getRandomTerm(null); + final String s2 = getRandomTerm(s1); + + if (VERBOSE) { + System.out.println("\nTEST: s1=" + s1 + " s2=" + s2); + /* + for(int idx=0;idx 0); + assertTrue(r.docFreq(new Term("field", s2)) > 0); + + final byte[] payload = new byte[100]; + + int num = atLeast(1000); + for(int iter=0;iter expected = new ArrayList(); + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random, MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy()); + config.setSimilarity(new TestSimilarity()); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); + Document doc = new Document(); + Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(foo); + for (int i = 0; i < 100; i++) { + foo.setValue(addValue()); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + dir.close(); + super.tearDown(); + } + + public void test() throws Exception { + byte fooNorms[] = reader.norms("foo"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff); + } + + /** + * Makes a bunch of single-char tokens (the max freq will at most be 255). + * shuffles them around, and returns the whole list with Arrays.toString(). + * This works fine because we use lettertokenizer. + * puts the max-frequency term into expected, to be checked against the norm. + */ + private String addValue() { + List terms = new ArrayList(); + int maxCeiling = _TestUtil.nextInt(random, 0, 255); + int max = 0; + for (char ch = 'a'; ch <= 'z'; ch++) { + int num = _TestUtil.nextInt(random, 0, maxCeiling); + for (int i = 0; i < num; i++) + terms.add(Character.toString(ch)); + max = Math.max(max, num); + } + expected.add(max); + Collections.shuffle(terms, random); + return Arrays.toString(terms.toArray(new String[terms.size()])); + } + + /** + * Simple similarity that encodes maxTermFrequency directly as a byte + */ + class TestSimilarity extends DefaultSimilarity { + + @Override + public byte encodeNormValue(float f) { + return (byte) f; + } + + @Override + public float decodeNormValue(byte b) { + return (float) b; + } + + @Override + public float computeNorm(String field, FieldInvertState state) { + return (float) state.getMaxTermFrequency(); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java new file mode 100644 index 0000000..2f9ea31 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java @@ -0,0 +1,173 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * This testcase tests whether multi-level skipping is being used + * to reduce I/O while skipping through posting lists. + * + * Skipping in general is already covered by several other + * testcases. + * + */ +public class TestMultiLevelSkipList extends LuceneTestCase { + @Override + public void setUp() throws Exception { + super.setUp(); + PayloadFilter.count = 0; + } + + public void testSimpleSkip() throws IOException { + RAMDirectory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setMergePolicy(newLogMergePolicy())); + Term term = new Term("test", "a"); + for (int i = 0; i < 5000; i++) { + Document d1 = new Document(); + d1.add(newField(term.field(), term.text(), Store.NO, Index.ANALYZED)); + writer.addDocument(d1); + } + writer.commit(); + writer.optimize(); + writer.close(); + + IndexReader reader = SegmentReader.getOnlySegmentReader(dir); + SegmentTermPositions tp = (SegmentTermPositions) reader.termPositions(); + tp.freqStream = new CountingStream(tp.freqStream); + + for (int i = 0; i < 2; i++) { + counter = 0; + tp.seek(term); + + checkSkipTo(tp, 14, 185); // no skips + checkSkipTo(tp, 17, 190); // one skip on level 0 + checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 + + // this test would fail if we had only one skip level, + // because than more bytes would be read from the freqStream + checkSkipTo(tp, 4800, 250);// one skip on level 2 + } + } + + public void checkSkipTo(TermPositions tp, int target, int maxCounter) throws IOException { + tp.skipTo(target); + if (maxCounter < counter) { + fail("Too many bytes read: " + counter + " vs " + maxCounter); + } + + assertEquals("Wrong document " + tp.doc() + " after skipTo target " + target, target, tp.doc()); + assertEquals("Frequency is not 1: " + tp.freq(), 1,tp.freq()); + tp.nextPosition(); + byte[] b = new byte[1]; + tp.getPayload(b, 0); + assertEquals("Wrong payload for the target " + target + ": " + b[0], (byte) target, b[0]); + } + + private static class PayloadAnalyzer extends Analyzer { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new PayloadFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader)); + } + + } + + private static class PayloadFilter extends TokenFilter { + static int count = 0; + + PayloadAttribute payloadAtt; + + protected PayloadFilter(TokenStream input) { + super(input); + payloadAtt = addAttribute(PayloadAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + boolean hasNext = input.incrementToken(); + if (hasNext) { + payloadAtt.setPayload(new Payload(new byte[] { (byte) count++ })); + } + return hasNext; + } + + } + + private int counter = 0; + + // Simply extends IndexInput in a way that we are able to count the number + // of bytes read + class CountingStream extends IndexInput { + private IndexInput input; + + CountingStream(IndexInput input) { + this.input = input; + } + + @Override + public byte readByte() throws IOException { + TestMultiLevelSkipList.this.counter++; + return this.input.readByte(); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + TestMultiLevelSkipList.this.counter += len; + this.input.readBytes(b, offset, len); + } + + @Override + public void close() throws IOException { + this.input.close(); + } + + @Override + public long getFilePointer() { + return this.input.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + this.input.seek(pos); + } + + @Override + public long length() { + return this.input.length(); + } + + @Override + public Object clone() { + return new CountingStream((IndexInput) this.input.clone()); + } + + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java new file mode 100644 index 0000000..9308846 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java @@ -0,0 +1,116 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestNRTReaderWithThreads extends LuceneTestCase { + AtomicInteger seq = new AtomicInteger(1); + + public void testIndexing() throws Exception { + Directory mainDir = newDirectory(); + IndexWriter writer = new IndexWriter( + mainDir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(10). + setMergePolicy(newLogMergePolicy(false,2)) + ); + writer.setInfoStream(VERBOSE ? System.out : null); + IndexReader reader = writer.getReader(); // start pooling readers + reader.close(); + RunThread[] indexThreads = new RunThread[4]; + for (int x=0; x < indexThreads.length; x++) { + indexThreads[x] = new RunThread(x % 2, writer); + indexThreads[x].setName("Thread " + x); + indexThreads[x].start(); + } + long startTime = System.currentTimeMillis(); + long duration = 1000; + while ((System.currentTimeMillis() - startTime) < duration) { + Thread.sleep(100); + } + int delCount = 0; + int addCount = 0; + for (int x=0; x < indexThreads.length; x++) { + indexThreads[x].run = false; + assertNull("Exception thrown: "+indexThreads[x].ex, indexThreads[x].ex); + addCount += indexThreads[x].addCount; + delCount += indexThreads[x].delCount; + } + for (int x=0; x < indexThreads.length; x++) { + indexThreads[x].join(); + } + for (int x=0; x < indexThreads.length; x++) { + assertNull("Exception thrown: "+indexThreads[x].ex, indexThreads[x].ex); + } + //System.out.println("addCount:"+addCount); + //System.out.println("delCount:"+delCount); + writer.close(); + mainDir.close(); + } + + public class RunThread extends Thread { + IndexWriter writer; + volatile boolean run = true; + volatile Throwable ex; + int delCount = 0; + int addCount = 0; + int type; + final Random r = new Random(random.nextLong()); + + public RunThread(int type, IndexWriter writer) { + this.type = type; + this.writer = writer; + } + + @Override + public void run() { + try { + while (run) { + //int n = random.nextInt(2); + if (type == 0) { + int i = seq.addAndGet(1); + Document doc = TestIndexWriterReader.createDocument(i, "index1", 10); + writer.addDocument(doc); + addCount++; + } else if (type == 1) { + // we may or may not delete because the term may not exist, + // however we're opening and closing the reader rapidly + IndexReader reader = writer.getReader(); + int id = r.nextInt(seq.intValue()); + Term term = new Term("id", Integer.toString(id)); + int count = TestIndexWriterReader.count(term, reader); + writer.deleteDocuments(term); + reader.close(); + delCount += count; + } + } + } catch (Throwable ex) { + ex.printStackTrace(System.out); + this.ex = ex; + run = false; + } + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNRTThreads.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNRTThreads.java new file mode 100644 index 0000000..9f30e05 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNRTThreads.java @@ -0,0 +1,570 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.LineFileDocs; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.Test; + +// TODO +// - mix in optimize, addIndexes +// - randomoly mix in non-congruent docs + +public class TestNRTThreads extends LuceneTestCase { + + private static class SubDocs { + public final String packID; + public final List subIDs; + public boolean deleted; + + public SubDocs(String packID, List subIDs) { + this.packID = packID; + this.subIDs = subIDs; + } + } + + // TODO: is there a pre-existing way to do this!!! + private Document cloneDoc(Document doc1) { + final Document doc2 = new Document(); + for(Fieldable f : doc1.getFields()) { + Field field1 = (Field) f; + + Field field2 = new Field(field1.name(), + field1.stringValue(), + field1.isStored() ? Field.Store.YES : Field.Store.NO, + field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO); + if (field1.getOmitNorms()) { + field2.setOmitNorms(true); + } + if (field1.getOmitTermFreqAndPositions()) { + field2.setOmitTermFreqAndPositions(true); + } + doc2.add(field2); + } + + return doc2; + } + + @Test + public void testNRTThreads() throws Exception { + + final long t0 = System.currentTimeMillis(); + + final LineFileDocs docs = new LineFileDocs(random); + final File tempDir = _TestUtil.getTempDir("nrtopenfiles"); + final MockDirectoryWrapper dir = newFSDirectory(tempDir); + dir.setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves. + final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() { + @Override + public void warm(IndexReader reader) throws IOException { + if (VERBOSE) { + System.out.println("TEST: now warm merged reader=" + reader); + } + final int maxDoc = reader.maxDoc(); + int sum = 0; + final int inc = Math.max(1, maxDoc/50); + for(int docID=0;docID delIDs = Collections.synchronizedSet(new HashSet()); + final List allSubDocs = Collections.synchronizedList(new ArrayList()); + + final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC*1000; + Thread[] threads = new Thread[NUM_INDEX_THREADS]; + for(int thread=0;thread toDeleteIDs = new ArrayList(); + final List toDeleteSubDocs = new ArrayList(); + while(System.currentTimeMillis() < stopTime && !failed.get()) { + try { + Document doc = docs.nextDoc(); + if (doc == null) { + break; + } + final String addedField; + if (random.nextBoolean()) { + addedField = "extra" + random.nextInt(10); + doc.add(new Field(addedField, "a random field", Field.Store.NO, Field.Index.ANALYZED)); + } else { + addedField = null; + } + if (random.nextBoolean()) { + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": add doc id:" + doc.get("docid")); + } + + if (random.nextBoolean()) { + // Add a pack of adjacent sub-docs + final String packID; + final SubDocs delSubDocs; + if (toDeleteSubDocs.size() > 0 && random.nextBoolean()) { + delSubDocs = toDeleteSubDocs.get(random.nextInt(toDeleteSubDocs.size())); + assert !delSubDocs.deleted; + toDeleteSubDocs.remove(delSubDocs); + // reuse prior packID + packID = delSubDocs.packID; + } else { + delSubDocs = null; + // make new packID + packID = packCount.getAndIncrement() + ""; + } + + final Field packIDField = newField("packID", packID, Field.Store.YES, Field.Index.NOT_ANALYZED); + final List docIDs = new ArrayList(); + final SubDocs subDocs = new SubDocs(packID, docIDs); + final List docsList = new ArrayList(); + + allSubDocs.add(subDocs); + doc.add(packIDField); + docsList.add(cloneDoc(doc)); + docIDs.add(doc.get("docid")); + + final int maxDocCount = _TestUtil.nextInt(random, 1, 10); + while(docsList.size() < maxDocCount) { + doc = docs.nextDoc(); + if (doc == null) { + break; + } + docsList.add(cloneDoc(doc)); + docIDs.add(doc.get("docid")); + } + addCount.addAndGet(docsList.size()); + + if (delSubDocs != null) { + delSubDocs.deleted = true; + delIDs.addAll(delSubDocs.subIDs); + delCount.addAndGet(delSubDocs.subIDs.size()); + if (VERBOSE) { + System.out.println("TEST: update pack packID=" + delSubDocs.packID + " count=" + docsList.size() + " docs=" + docIDs); + } + writer.updateDocuments(new Term("packID", delSubDocs.packID), docsList); + /* + // non-atomic: + writer.deleteDocuments(new Term("packID", delSubDocs.packID)); + for(Document subDoc : docsList) { + writer.addDocument(subDoc); + } + */ + } else { + if (VERBOSE) { + System.out.println("TEST: add pack packID=" + packID + " count=" + docsList.size() + " docs=" + docIDs); + } + writer.addDocuments(docsList); + + /* + // non-atomic: + for(Document subDoc : docsList) { + writer.addDocument(subDoc); + } + */ + } + doc.removeField("packID"); + + if (random.nextInt(5) == 2) { + if (VERBOSE) { + //System.out.println(Thread.currentThread().getName() + ": buffer del id:" + packID); + } + toDeleteSubDocs.add(subDocs); + } + + } else { + writer.addDocument(doc); + addCount.getAndIncrement(); + + if (random.nextInt(5) == 3) { + if (VERBOSE) { + //System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("docid")); + } + toDeleteIDs.add(doc.get("docid")); + } + } + } else { + // we use update but it never replaces a + // prior doc + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": update doc id:" + doc.get("docid")); + } + writer.updateDocument(new Term("docid", doc.get("docid")), doc); + addCount.getAndIncrement(); + + if (random.nextInt(5) == 3) { + if (VERBOSE) { + //System.out.println(Thread.currentThread().getName() + ": buffer del id:" + doc.get("docid")); + } + toDeleteIDs.add(doc.get("docid")); + } + } + + if (random.nextInt(30) == 17) { + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": apply " + toDeleteIDs.size() + " deletes"); + } + for(String id : toDeleteIDs) { + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": del term=id:" + id); + } + writer.deleteDocuments(new Term("docid", id)); + } + final int count = delCount.addAndGet(toDeleteIDs.size()); + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": tot " + count + " deletes"); + } + delIDs.addAll(toDeleteIDs); + toDeleteIDs.clear(); + + for(SubDocs subDocs : toDeleteSubDocs) { + assert !subDocs.deleted; + writer.deleteDocuments(new Term("packID", subDocs.packID)); + subDocs.deleted = true; + if (VERBOSE) { + System.out.println(" del subs: " + subDocs.subIDs + " packID=" + subDocs.packID); + } + delIDs.addAll(subDocs.subIDs); + delCount.addAndGet(subDocs.subIDs.size()); + } + toDeleteSubDocs.clear(); + } + if (addedField != null) { + doc.removeField(addedField); + } + } catch (Throwable t) { + System.out.println(Thread.currentThread().getName() + ": hit exc"); + t.printStackTrace(); + failed.set(true); + throw new RuntimeException(t); + } + } + if (VERBOSE) { + System.out.println(Thread.currentThread().getName() + ": indexing done"); + } + } + }; + threads[thread].setDaemon(true); + threads[thread].start(); + } + + if (VERBOSE) { + System.out.println("TEST: DONE start indexing threads [" + (System.currentTimeMillis()-t0) + " ms]"); + } + + // let index build up a bit + Thread.sleep(100); + + IndexReader r = IndexReader.open(writer, true); + boolean any = false; + + // silly starting guess: + final AtomicInteger totTermCount = new AtomicInteger(100); + + final ExecutorService es = Executors.newCachedThreadPool(); + + while(System.currentTimeMillis() < stopTime && !failed.get()) { + if (random.nextBoolean()) { + if (VERBOSE) { + System.out.println("TEST: now reopen r=" + r); + } + final IndexReader r2 = r.reopen(); + if (r != r2) { + r.close(); + r = r2; + } + } else { + if (VERBOSE) { + System.out.println("TEST: now close reader=" + r); + } + r.close(); + writer.commit(); + final Set openDeletedFiles = dir.getOpenDeletedFiles(); + if (openDeletedFiles.size() > 0) { + System.out.println("OBD files: " + openDeletedFiles); + } + any |= openDeletedFiles.size() > 0; + //assertEquals("open but deleted: " + openDeletedFiles, 0, openDeletedFiles.size()); + if (VERBOSE) { + System.out.println("TEST: now open"); + } + r = IndexReader.open(writer, true); + } + if (VERBOSE) { + System.out.println("TEST: got new reader=" + r); + } + //System.out.println("numDocs=" + r.numDocs() + " + //openDelFileCount=" + dir.openDeleteFileCount()); + + smokeTestReader(r); + + if (r.numDocs() > 0) { + + final IndexSearcher s = new IndexSearcher(r, es); + + // run search threads + final long searchStopTime = System.currentTimeMillis() + 500; + final Thread[] searchThreads = new Thread[NUM_SEARCH_THREADS]; + final AtomicInteger totHits = new AtomicInteger(); + for(int thread=0;thread 0; + + assertFalse("saw non-zero open-but-deleted count", any); + if (VERBOSE) { + System.out.println("TEST: now join"); + } + for(int thread=0;thread[] ctors = NoDeletionPolicy.class.getDeclaredConstructors(); + assertEquals("expected 1 private ctor only: " + Arrays.toString(ctors), 1, ctors.length); + assertTrue("that 1 should be private: " + ctors[0], Modifier.isPrivate(ctors[0].getModifiers())); + } + + @Test + public void testMethodsOverridden() throws Exception { + // Ensures that all methods of IndexDeletionPolicy are + // overridden/implemented. That's important to ensure that NoDeletionPolicy + // overrides everything, so that no unexpected behavior/error occurs. + // NOTE: even though IndexDeletionPolicy is an interface today, and so all + // methods must be implemented by NoDeletionPolicy, this test is important + // in case one day IDP becomes an abstract class. + for (Method m : NoDeletionPolicy.class.getMethods()) { + // getDeclaredMethods() returns just those methods that are declared on + // NoDeletionPolicy. getMethods() returns those that are visible in that + // context, including ones from Object. So just filter out Object. If in + // the future IndexDeletionPolicy will become a class that extends a + // different class than Object, this will need to change. + if (m.getDeclaringClass() != Object.class) { + assertTrue(m + " is not overridden !", m.getDeclaringClass() == NoDeletionPolicy.class); + } + } + } + + @Test + public void testAllCommitsRemain() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); + for (int i = 0; i < 10; i++) { + Document doc = new Document(); + doc.add(newField("c", "a" + i, Store.YES, Index.ANALYZED)); + writer.addDocument(doc); + writer.commit(); + assertEquals("wrong number of commits !", i + 1, IndexReader.listCommits(dir).size()); + } + writer.close(); + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNoMergePolicy.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNoMergePolicy.java new file mode 100644 index 0000000..e73b0b3 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNoMergePolicy.java @@ -0,0 +1,71 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.util.Arrays; + +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +public class TestNoMergePolicy extends LuceneTestCase { + + @Test + public void testNoMergePolicy() throws Exception { + MergePolicy mp = NoMergePolicy.NO_COMPOUND_FILES; + assertNull(mp.findMerges(null)); + assertNull(mp.findMergesForOptimize(null, 0, null)); + assertNull(mp.findMergesToExpungeDeletes(null)); + assertFalse(mp.useCompoundFile(null, null)); + mp.close(); + } + + @Test + public void testCompoundFiles() throws Exception { + assertFalse(NoMergePolicy.NO_COMPOUND_FILES.useCompoundFile(null, null)); + assertTrue(NoMergePolicy.COMPOUND_FILES.useCompoundFile(null, null)); + } + + @Test + public void testFinalSingleton() throws Exception { + assertTrue(Modifier.isFinal(NoMergePolicy.class.getModifiers())); + Constructor[] ctors = NoMergePolicy.class.getDeclaredConstructors(); + assertEquals("expected 1 private ctor only: " + Arrays.toString(ctors), 1, ctors.length); + assertTrue("that 1 should be private: " + ctors[0], Modifier.isPrivate(ctors[0].getModifiers())); + } + + @Test + public void testMethodsOverridden() throws Exception { + // Ensures that all methods of MergePolicy are overridden. That's important + // to ensure that NoMergePolicy overrides everything, so that no unexpected + // behavior/error occurs + for (Method m : NoMergePolicy.class.getMethods()) { + // getDeclaredMethods() returns just those methods that are declared on + // NoMergePolicy. getMethods() returns those that are visible in that + // context, including ones from Object. So just filter out Object. If in + // the future MergePolicy will extend a different class than Object, this + // will need to change. + if (m.getDeclaringClass() != Object.class) { + assertTrue(m + " is not overridden !", m.getDeclaringClass() == NoMergePolicy.class); + } + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNoMergeScheduler.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNoMergeScheduler.java new file mode 100644 index 0000000..78fb4dc --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNoMergeScheduler.java @@ -0,0 +1,62 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.util.Arrays; + +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +public class TestNoMergeScheduler extends LuceneTestCase { + + @Test + public void testNoMergeScheduler() throws Exception { + MergeScheduler ms = NoMergeScheduler.INSTANCE; + ms.close(); + ms.merge(null); + } + + @Test + public void testFinalSingleton() throws Exception { + assertTrue(Modifier.isFinal(NoMergeScheduler.class.getModifiers())); + Constructor[] ctors = NoMergeScheduler.class.getDeclaredConstructors(); + assertEquals("expected 1 private ctor only: " + Arrays.toString(ctors), 1, ctors.length); + assertTrue("that 1 should be private: " + ctors[0], Modifier.isPrivate(ctors[0].getModifiers())); + } + + @Test + public void testMethodsOverridden() throws Exception { + // Ensures that all methods of MergeScheduler are overridden. That's + // important to ensure that NoMergeScheduler overrides everything, so that + // no unexpected behavior/error occurs + for (Method m : NoMergeScheduler.class.getMethods()) { + // getDeclaredMethods() returns just those methods that are declared on + // NoMergeScheduler. getMethods() returns those that are visible in that + // context, including ones from Object. So just filter out Object. If in + // the future MergeScheduler will extend a different class than Object, + // this will need to change. + if (m.getDeclaringClass() != Object.class) { + assertTrue(m + " is not overridden !", m.getDeclaringClass() == NoMergeScheduler.class); + } + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNorms.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNorms.java new file mode 100755 index 0000000..9d8f412 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestNorms.java @@ -0,0 +1,280 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Random; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Test that norms info is preserved during index life - including + * separate norms, addDocument, addIndexes, optimize. + */ +public class TestNorms extends LuceneTestCase { + + private class SimilarityOne extends DefaultSimilarity { + @Override + public float computeNorm(String fieldName, FieldInvertState state) { + // Disable length norm + return state.getBoost(); + } + } + + private static final int NUM_FIELDS = 10; + + private Similarity similarityOne; + private Analyzer anlzr; + private int numDocNorms; + private ArrayList norms; + private ArrayList modifiedNorms; + private float lastNorm = 0; + private float normDelta = (float) 0.001; + + @Override + public void setUp() throws Exception { + super.setUp(); + similarityOne = new SimilarityOne(); + anlzr = new MockAnalyzer(random); + } + + /** + * Test that norms values are preserved as the index is maintained. + * Including separate norms. + * Including merging indexes with seprate norms. + * Including optimize. + */ + public void testNorms() throws IOException { + Directory dir1 = newDirectory(); + + norms = new ArrayList(); + modifiedNorms = new ArrayList(); + + createIndex(random, dir1); + doTestNorms(random, dir1); + + // test with a single index: index2 + ArrayList norms1 = norms; + ArrayList modifiedNorms1 = modifiedNorms; + int numDocNorms1 = numDocNorms; + + norms = new ArrayList(); + modifiedNorms = new ArrayList(); + numDocNorms = 0; + + Directory dir2 = newDirectory(); + + createIndex(random, dir2); + doTestNorms(random, dir2); + + // add index1 and index2 to a third index: index3 + Directory dir3 = newDirectory(); + + createIndex(random, dir3); + IndexWriter iw = new IndexWriter(dir3, newIndexWriterConfig( + TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) + .setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3))); + iw.addIndexes(new Directory[]{dir1,dir2}); + iw.optimize(); + iw.close(); + + norms1.addAll(norms); + norms = norms1; + modifiedNorms1.addAll(modifiedNorms); + modifiedNorms = modifiedNorms1; + numDocNorms += numDocNorms1; + + // test with index3 + verifyIndex(dir3); + doTestNorms(random, dir3); + + // now with optimize + iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT, + anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3))); + iw.optimize(); + iw.close(); + verifyIndex(dir3); + + dir1.close(); + dir2.close(); + dir3.close(); + } + + private void doTestNorms(Random random, Directory dir) throws IOException { + int num = atLeast(1); + for (int i=0; i storedNorms = (i==1 ? modifiedNorms : norms); + for (int j = 0; j < b.length; j++) { + float norm = similarityOne.decodeNormValue(b[j]); + float norm1 = storedNorms.get(j).floatValue(); + assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001); + } + } + ir.close(); + } + + private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException { + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND) + .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy())); + LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy(); + lmp.setMergeFactor(3); + lmp.setUseCompoundFile(compound); + for (int i = 0; i < ndocs; i++) { + iw.addDocument(newDoc()); + } + iw.close(); + } + + // create the next document + private Document newDoc() { + Document d = new Document(); + float boost = nextNorm(); + for (int i = 0; i < 10; i++) { + Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED); + f.setBoost(boost); + d.add(f); + } + return d; + } + + // return unique norm values that are unchanged by encoding/decoding + private float nextNorm() { + float norm = lastNorm + normDelta; + do { + float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm)); + if (norm1 > lastNorm) { + //System.out.println(norm1+" > "+lastNorm); + norm = norm1; + break; + } + norm += normDelta; + } while (true); + norms.add(numDocNorms, Float.valueOf(norm)); + modifiedNorms.add(numDocNorms, Float.valueOf(norm)); + //System.out.println("creating norm("+numDocNorms+"): "+norm); + numDocNorms ++; + lastNorm = (norm>10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte + return norm; + } + + class CustomNormEncodingSimilarity extends DefaultSimilarity { + @Override + public byte encodeNormValue(float f) { + return (byte) f; + } + + @Override + public float decodeNormValue(byte b) { + return (float) b; + } + + @Override + public float computeNorm(String field, FieldInvertState state) { + return (float) state.getLength(); + } + } + + // LUCENE-1260 + public void testCustomEncoder() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + config.setSimilarity(new CustomNormEncodingSimilarity()); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); + Document doc = new Document(); + Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(foo); + doc.add(bar); + + for (int i = 0; i < 100; i++) { + bar.setValue("singleton"); + writer.addDocument(doc); + } + + IndexReader reader = writer.getReader(); + writer.close(); + + byte fooNorms[] = reader.norms("foo"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(0, fooNorms[i]); + + byte barNorms[] = reader.norms("bar"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(1, barNorms[i]); + + reader.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestParallelReader.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestParallelReader.java new file mode 100644 index 0000000..c7bcefe --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestParallelReader.java @@ -0,0 +1,329 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.MapFieldSelector; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestParallelReader extends LuceneTestCase { + + private IndexSearcher parallel; + private IndexSearcher single; + private Directory dir, dir1, dir2; + + @Override + public void setUp() throws Exception { + super.setUp(); + single = single(random); + parallel = parallel(random); + } + + @Override + public void tearDown() throws Exception { + single.getIndexReader().close(); + single.close(); + parallel.getIndexReader().close(); + parallel.close(); + dir.close(); + dir1.close(); + dir2.close(); + super.tearDown(); + } + + public void testQueries() throws Exception { + queryTest(new TermQuery(new Term("f1", "v1"))); + queryTest(new TermQuery(new Term("f1", "v2"))); + queryTest(new TermQuery(new Term("f2", "v1"))); + queryTest(new TermQuery(new Term("f2", "v2"))); + queryTest(new TermQuery(new Term("f3", "v1"))); + queryTest(new TermQuery(new Term("f3", "v2"))); + queryTest(new TermQuery(new Term("f4", "v1"))); + queryTest(new TermQuery(new Term("f4", "v2"))); + + BooleanQuery bq1 = new BooleanQuery(); + bq1.add(new TermQuery(new Term("f1", "v1")), Occur.MUST); + bq1.add(new TermQuery(new Term("f4", "v1")), Occur.MUST); + queryTest(bq1); + } + + public void testFieldNames() throws Exception { + Directory dir1 = getDir1(random); + Directory dir2 = getDir2(random); + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + Collection fieldNames = pr.getFieldNames(IndexReader.FieldOption.ALL); + assertEquals(4, fieldNames.size()); + assertTrue(fieldNames.contains("f1")); + assertTrue(fieldNames.contains("f2")); + assertTrue(fieldNames.contains("f3")); + assertTrue(fieldNames.contains("f4")); + pr.close(); + dir1.close(); + dir2.close(); + } + + public void testDocument() throws IOException { + Directory dir1 = getDir1(random); + Directory dir2 = getDir2(random); + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + + Document doc11 = pr.document(0, new MapFieldSelector(new String[] {"f1"})); + Document doc24 = pr.document(1, new MapFieldSelector(Arrays.asList(new String[] {"f4"}))); + Document doc223 = pr.document(1, new MapFieldSelector(new String[] {"f2", "f3"})); + + assertEquals(1, doc11.getFields().size()); + assertEquals(1, doc24.getFields().size()); + assertEquals(2, doc223.getFields().size()); + + assertEquals("v1", doc11.get("f1")); + assertEquals("v2", doc24.get("f4")); + assertEquals("v2", doc223.get("f2")); + assertEquals("v2", doc223.get("f3")); + pr.close(); + dir1.close(); + dir2.close(); + } + + public void testIncompatibleIndexes() throws IOException { + // two documents: + Directory dir1 = getDir1(random); + + // one document only: + Directory dir2 = newDirectory(); + IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document d3 = new Document(); + d3.add(newField("f3", "v1", Field.Store.YES, Field.Index.ANALYZED)); + w2.addDocument(d3); + w2.close(); + + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + IndexReader ir = IndexReader.open(dir2, false); + try { + pr.add(ir); + fail("didn't get exptected exception: indexes don't have same number of documents"); + } catch (IllegalArgumentException e) { + // expected exception + } + pr.close(); + ir.close(); + dir1.close(); + dir2.close(); + } + + public void testIsCurrent() throws IOException { + Directory dir1 = getDir1(random); + Directory dir2 = getDir2(random); + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + + assertTrue(pr.isCurrent()); + IndexReader modifier = IndexReader.open(dir1, false); + modifier.setNorm(0, "f1", 100); + modifier.close(); + + // one of the two IndexReaders which ParallelReader is using + // is not current anymore + assertFalse(pr.isCurrent()); + + modifier = IndexReader.open(dir2, false); + modifier.setNorm(0, "f3", 100); + modifier.close(); + + // now both are not current anymore + assertFalse(pr.isCurrent()); + pr.close(); + dir1.close(); + dir2.close(); + } + + public void testIsOptimized() throws IOException { + Directory dir1 = getDir1(random); + Directory dir2 = getDir2(random); + + // add another document to ensure that the indexes are not optimized + IndexWriter modifier = new IndexWriter( + dir1, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(10)) + ); + Document d = new Document(); + d.add(newField("f1", "v1", Field.Store.YES, Field.Index.ANALYZED)); + modifier.addDocument(d); + modifier.close(); + + modifier = new IndexWriter( + dir2, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(10)) + ); + d = new Document(); + d.add(newField("f2", "v2", Field.Store.YES, Field.Index.ANALYZED)); + modifier.addDocument(d); + modifier.close(); + + + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + assertFalse(pr.isOptimized()); + pr.close(); + + modifier = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + modifier.optimize(); + modifier.close(); + + pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + // just one of the two indexes are optimized + assertFalse(pr.isOptimized()); + pr.close(); + + + modifier = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + modifier.optimize(); + modifier.close(); + + pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + // now both indexes are optimized + assertTrue(pr.isOptimized()); + pr.close(); + dir1.close(); + dir2.close(); + } + + public void testAllTermDocs() throws IOException { + Directory dir1 = getDir1(random); + Directory dir2 = getDir2(random); + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + int NUM_DOCS = 2; + TermDocs td = pr.termDocs(null); + for(int i=0;i processors; + + public PerDirPayloadProcessor(Map processors) { + this.processors = processors; + } + + @Override + public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException { + return processors.get(dir); + } + + } + + private static final class PerTermPayloadProcessor extends DirPayloadProcessor { + + @Override + public PayloadProcessor getProcessor(Term term) throws IOException { + // don't process payloads of terms other than "p:p1" + if (!term.field().equals("p") || !term.text().equals("p1")) { + return null; + } + + // All other terms are processed the same way + return new DeletePayloadProcessor(); + } + + } + + /** deletes the incoming payload */ + private static final class DeletePayloadProcessor extends PayloadProcessor { + + @Override + public int payloadLength() throws IOException { + return 0; + } + + @Override + public byte[] processPayload(byte[] payload, int start, int length) throws IOException { + return payload; + } + + } + + private static final class PayloadTokenStream extends TokenStream { + + private final PayloadAttribute payload = addAttribute(PayloadAttribute.class); + private final CharTermAttribute term = addAttribute(CharTermAttribute.class); + + private boolean called = false; + private String t; + + public PayloadTokenStream(String t) { + this.t = t; + } + + @Override + public boolean incrementToken() throws IOException { + if (called) { + return false; + } + + called = true; + byte[] p = new byte[] { 1 }; + payload.setPayload(new Payload(p)); + term.append(t); + return true; + } + + @Override + public void reset() throws IOException { + super.reset(); + called = false; + term.setEmpty(); + } + } + + private static final int NUM_DOCS = 10; + + private IndexWriterConfig getConfig(Random random) { + return newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer( + TEST_VERSION_CURRENT)); + } + + private void populateDirs(Random random, Directory[] dirs, boolean multipleCommits) + throws IOException { + for (int i = 0; i < dirs.length; i++) { + dirs[i] = newDirectory(); + populateDocs(random, dirs[i], multipleCommits); + verifyPayloadExists(dirs[i], new Term("p", "p1"), NUM_DOCS); + verifyPayloadExists(dirs[i], new Term("p", "p2"), NUM_DOCS); + } + } + + private void populateDocs(Random random, Directory dir, boolean multipleCommits) + throws IOException { + IndexWriter writer = new IndexWriter( + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)). + setMergePolicy(newLogMergePolicy(10)) + ); + TokenStream payloadTS1 = new PayloadTokenStream("p1"); + TokenStream payloadTS2 = new PayloadTokenStream("p2"); + for (int i = 0; i < NUM_DOCS; i++) { + Document doc = new Document(); + doc.add(newField("id", "doc" + i, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField("content", "doc content " + i, Store.NO, Index.ANALYZED)); + doc.add(new Field("p", payloadTS1)); + doc.add(new Field("p", payloadTS2)); + writer.addDocument(doc); + if (multipleCommits && (i % 4 == 0)) { + writer.commit(); + } + } + writer.close(); + } + + private void verifyPayloadExists(Directory dir, Term term, int numExpected) + throws IOException { + IndexReader reader = IndexReader.open(dir); + try { + int numPayloads = 0; + TermPositions tp = reader.termPositions(term); + while (tp.next()) { + tp.nextPosition(); + if (tp.isPayloadAvailable()) { + assertEquals(1, tp.getPayloadLength()); + byte[] p = new byte[tp.getPayloadLength()]; + tp.getPayload(p, 0); + assertEquals(1, p[0]); + ++numPayloads; + } + } + assertEquals(numExpected, numPayloads); + } finally { + reader.close(); + } + } + + private void doTest(Random random, boolean addToEmptyIndex, + int numExpectedPayloads, boolean multipleCommits) throws IOException { + Directory[] dirs = new Directory[2]; + populateDirs(random, dirs, multipleCommits); + + Directory dir = newDirectory(); + if (!addToEmptyIndex) { + populateDocs(random, dir, multipleCommits); + verifyPayloadExists(dir, new Term("p", "p1"), NUM_DOCS); + verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS); + } + + // Add two source dirs. By not adding the dest dir, we ensure its payloads + // won't get processed. + Map processors = new HashMap(); + for (Directory d : dirs) { + processors.put(d, new PerTermPayloadProcessor()); + } + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); + writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors)); + + IndexReader[] readers = new IndexReader[dirs.length]; + for (int i = 0; i < readers.length; i++) { + readers[i] = IndexReader.open(dirs[i]); + } + try { + writer.addIndexes(readers); + } finally { + for (IndexReader r : readers) { + r.close(); + } + } + writer.close(); + verifyPayloadExists(dir, new Term("p", "p1"), numExpectedPayloads); + // the second term should always have all payloads + numExpectedPayloads = NUM_DOCS * dirs.length + + (addToEmptyIndex ? 0 : NUM_DOCS); + verifyPayloadExists(dir, new Term("p", "p2"), numExpectedPayloads); + for (Directory d : dirs) + d.close(); + dir.close(); + } + + @Test + public void testAddIndexes() throws Exception { + // addIndexes - single commit in each + doTest(random, true, 0, false); + + // addIndexes - multiple commits in each + doTest(random, true, 0, true); + } + + @Test + public void testAddIndexesIntoExisting() throws Exception { + // addIndexes - single commit in each + doTest(random, false, NUM_DOCS, false); + + // addIndexes - multiple commits in each + doTest(random, false, NUM_DOCS, true); + } + + @Test + public void testRegularMerges() throws Exception { + Directory dir = newDirectory(); + populateDocs(random, dir, true); + verifyPayloadExists(dir, new Term("p", "p1"), NUM_DOCS); + verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS); + + // Add two source dirs. By not adding the dest dir, we ensure its payloads + // won't get processed. + Map processors = new HashMap(); + processors.put(dir, new PerTermPayloadProcessor()); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); + writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors)); + writer.optimize(); + writer.close(); + + verifyPayloadExists(dir, new Term("p", "p1"), 0); + verifyPayloadExists(dir, new Term("p", "p2"), NUM_DOCS); + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPayloads.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPayloads.java new file mode 100644 index 0000000..5ca0c6a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPayloads.java @@ -0,0 +1,608 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util._TestUtil; + + +public class TestPayloads extends LuceneTestCase { + + // Simple tests to test the Payload class + public void testPayload() throws Exception { + byte[] testData = "This is a test!".getBytes(); + Payload payload = new Payload(testData); + assertEquals("Wrong payload length.", testData.length, payload.length()); + + // test copyTo() + byte[] target = new byte[testData.length - 1]; + try { + payload.copyTo(target, 0); + fail("Expected exception not thrown"); + } catch (Exception expected) { + // expected exception + } + + target = new byte[testData.length + 3]; + payload.copyTo(target, 3); + + for (int i = 0; i < testData.length; i++) { + assertEquals(testData[i], target[i + 3]); + } + + + // test toByteArray() + target = payload.toByteArray(); + assertByteArrayEquals(testData, target); + + // test byteAt() + for (int i = 0; i < testData.length; i++) { + assertEquals(payload.byteAt(i), testData[i]); + } + + try { + payload.byteAt(testData.length + 1); + fail("Expected exception not thrown"); + } catch (Exception expected) { + // expected exception + } + + Payload clone = (Payload) payload.clone(); + assertEquals(payload.length(), clone.length()); + for (int i = 0; i < payload.length(); i++) { + assertEquals(payload.byteAt(i), clone.byteAt(i)); + } + + } + + // Tests whether the DocumentWriter and SegmentMerger correctly enable the + // payload bit in the FieldInfo + public void testPayloadFieldBit() throws Exception { + Directory ram = newDirectory(); + PayloadAnalyzer analyzer = new PayloadAnalyzer(); + IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); + Document d = new Document(); + // this field won't have any payloads + d.add(newField("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); + // this field will have payloads in all docs, however not for all term positions, + // so this field is used to check if the DocumentWriter correctly enables the payloads bit + // even if only some term positions have payloads + d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); + // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads + // enabled in only some documents + d.add(newField("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); + // only add payload data for field f2 + analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1); + writer.addDocument(d); + // flush + writer.close(); + + SegmentReader reader = SegmentReader.getOnlySegmentReader(ram); + FieldInfos fi = reader.fieldInfos(); + assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").storePayloads); + assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").storePayloads); + assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").storePayloads); + reader.close(); + + // now we add another document which has payloads for field f3 and verify if the SegmentMerger + // enabled payloads for that field + writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, + analyzer).setOpenMode(OpenMode.CREATE)); + d = new Document(); + d.add(newField("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); + // add payload data for field f2 and f3 + analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1); + analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3); + writer.addDocument(d); + // force merge + writer.optimize(); + // flush + writer.close(); + + reader = SegmentReader.getOnlySegmentReader(ram); + fi = reader.fieldInfos(); + assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").storePayloads); + assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").storePayloads); + assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").storePayloads); + reader.close(); + ram.close(); + } + + // Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory + public void testPayloadsEncoding() throws Exception { + // first perform the test using a RAMDirectory + Directory dir = newDirectory(); + performTest(dir); + dir.close(); + // now use a FSDirectory and repeat same test + File dirName = _TestUtil.getTempDir("test_payloads"); + dir = newFSDirectory(dirName); + performTest(dir); + _TestUtil.rmDir(dirName); + dir.close(); + } + + // builds an index with payloads in the given Directory and performs + // different tests to verify the payload encoding + private void performTest(Directory dir) throws Exception { + PayloadAnalyzer analyzer = new PayloadAnalyzer(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer) + .setOpenMode(OpenMode.CREATE) + .setMergePolicy(newLogMergePolicy())); + + // should be in sync with value in TermInfosWriter + final int skipInterval = 16; + + final int numTerms = 5; + final String fieldName = "f1"; + + int numDocs = skipInterval + 1; + // create content for the test documents with just a few terms + Term[] terms = generateTerms(fieldName, numTerms); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < terms.length; i++) { + sb.append(terms[i].text); + sb.append(" "); + } + String content = sb.toString(); + + + int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; + byte[] payloadData = generateRandomData(payloadDataLength); + + Document d = new Document(); + d.add(newField(fieldName, content, Field.Store.NO, Field.Index.ANALYZED)); + // add the same document multiple times to have the same payload lengths for all + // occurrences within two consecutive skip intervals + int offset = 0; + for (int i = 0; i < 2 * numDocs; i++) { + analyzer.setPayloadData(fieldName, payloadData, offset, 1); + offset += numTerms; + writer.addDocument(d); + } + + // make sure we create more than one segment to test merging + writer.commit(); + + // now we make sure to have different payload lengths next at the next skip point + for (int i = 0; i < numDocs; i++) { + analyzer.setPayloadData(fieldName, payloadData, offset, i); + offset += i * numTerms; + writer.addDocument(d); + } + + writer.optimize(); + // flush + writer.close(); + + + /* + * Verify the index + * first we test if all payloads are stored correctly + */ + IndexReader reader = IndexReader.open(dir, true); + + byte[] verifyPayloadData = new byte[payloadDataLength]; + offset = 0; + TermPositions[] tps = new TermPositions[numTerms]; + for (int i = 0; i < numTerms; i++) { + tps[i] = reader.termPositions(terms[i]); + } + + while (tps[0].next()) { + for (int i = 1; i < numTerms; i++) { + tps[i].next(); + } + int freq = tps[0].freq(); + + for (int i = 0; i < freq; i++) { + for (int j = 0; j < numTerms; j++) { + tps[j].nextPosition(); + if (tps[j].isPayloadAvailable()) { + tps[j].getPayload(verifyPayloadData, offset); + offset += tps[j].getPayloadLength(); + } + } + } + } + + for (int i = 0; i < numTerms; i++) { + tps[i].close(); + } + + assertByteArrayEquals(payloadData, verifyPayloadData); + + /* + * test lazy skipping + */ + TermPositions tp = reader.termPositions(terms[0]); + tp.next(); + tp.nextPosition(); + // now we don't read this payload + tp.nextPosition(); + assertEquals("Wrong payload length.", 1, tp.getPayloadLength()); + byte[] payload = tp.getPayload(null, 0); + assertEquals(payload[0], payloadData[numTerms]); + tp.nextPosition(); + + // we don't read this payload and skip to a different document + tp.skipTo(5); + tp.nextPosition(); + assertEquals("Wrong payload length.", 1, tp.getPayloadLength()); + payload = tp.getPayload(null, 0); + assertEquals(payload[0], payloadData[5 * numTerms]); + + + /* + * Test different lengths at skip points + */ + tp.seek(terms[1]); + tp.next(); + tp.nextPosition(); + assertEquals("Wrong payload length.", 1, tp.getPayloadLength()); + tp.skipTo(skipInterval - 1); + tp.nextPosition(); + assertEquals("Wrong payload length.", 1, tp.getPayloadLength()); + tp.skipTo(2 * skipInterval - 1); + tp.nextPosition(); + assertEquals("Wrong payload length.", 1, tp.getPayloadLength()); + tp.skipTo(3 * skipInterval - 1); + tp.nextPosition(); + assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayloadLength()); + + /* + * Test multiple call of getPayload() + */ + tp.getPayload(null, 0); + try { + // it is forbidden to call getPayload() more than once + // without calling nextPosition() + tp.getPayload(null, 0); + fail("Expected exception not thrown"); + } catch (Exception expected) { + // expected exception + } + + reader.close(); + + // test long payload + analyzer = new PayloadAnalyzer(); + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, + analyzer).setOpenMode(OpenMode.CREATE)); + String singleTerm = "lucene"; + + d = new Document(); + d.add(newField(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED)); + // add a payload whose length is greater than the buffer size of BufferedIndexOutput + payloadData = generateRandomData(2000); + analyzer.setPayloadData(fieldName, payloadData, 100, 1500); + writer.addDocument(d); + + + writer.optimize(); + // flush + writer.close(); + + reader = IndexReader.open(dir, true); + tp = reader.termPositions(new Term(fieldName, singleTerm)); + tp.next(); + tp.nextPosition(); + + verifyPayloadData = new byte[tp.getPayloadLength()]; + tp.getPayload(verifyPayloadData, 0); + byte[] portion = new byte[1500]; + System.arraycopy(payloadData, 100, portion, 0, 1500); + + assertByteArrayEquals(portion, verifyPayloadData); + reader.close(); + + } + + private void generateRandomData(byte[] data) { + random.nextBytes(data); + } + + private byte[] generateRandomData(int n) { + byte[] data = new byte[n]; + generateRandomData(data); + return data; + } + + private Term[] generateTerms(String fieldName, int n) { + int maxDigits = (int) (Math.log(n) / Math.log(10)); + Term[] terms = new Term[n]; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < n; i++) { + sb.setLength(0); + sb.append("t"); + int zeros = maxDigits - (int) (Math.log(i) / Math.log(10)); + for (int j = 0; j < zeros; j++) { + sb.append("0"); + } + sb.append(i); + terms[i] = new Term(fieldName, sb.toString()); + } + return terms; + } + + + void assertByteArrayEquals(byte[] b1, byte[] b2) { + if (b1.length != b2.length) { + fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length); + } + + for (int i = 0; i < b1.length; i++) { + if (b1[i] != b2[i]) { + fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]); + } + } + } + + + /** + * This Analyzer uses an WhitespaceTokenizer and PayloadFilter. + */ + private static class PayloadAnalyzer extends Analyzer { + Map fieldToData = new HashMap(); + + void setPayloadData(String field, byte[] data, int offset, int length) { + fieldToData.put(field, new PayloadData(0, data, offset, length)); + } + + void setPayloadData(String field, int numFieldInstancesToSkip, byte[] data, int offset, int length) { + fieldToData.put(field, new PayloadData(numFieldInstancesToSkip, data, offset, length)); + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + PayloadData payload = fieldToData.get(fieldName); + TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + if (payload != null) { + if (payload.numFieldInstancesToSkip == 0) { + ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length); + } else { + payload.numFieldInstancesToSkip--; + } + } + return ts; + } + + private static class PayloadData { + byte[] data; + int offset; + int length; + int numFieldInstancesToSkip; + + PayloadData(int skip, byte[] data, int offset, int length) { + numFieldInstancesToSkip = skip; + this.data = data; + this.offset = offset; + this.length = length; + } + } + } + + + /** + * This Filter adds payloads to the tokens. + */ + private static class PayloadFilter extends TokenFilter { + private byte[] data; + private int length; + private int offset; + Payload payload = new Payload(); + PayloadAttribute payloadAtt; + + public PayloadFilter(TokenStream in, byte[] data, int offset, int length) { + super(in); + this.data = data; + this.length = length; + this.offset = offset; + payloadAtt = addAttribute(PayloadAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + boolean hasNext = input.incrementToken(); + if (hasNext) { + if (offset + length <= data.length) { + Payload p = new Payload(); + payloadAtt.setPayload(p); + p.setData(data, offset, length); + offset += length; + } else { + payloadAtt.setPayload(null); + } + } + + return hasNext; + } + } + + public void testThreadSafety() throws Exception { + final int numThreads = 5; + final int numDocs = atLeast(50); + final ByteArrayPool pool = new ByteArrayPool(numThreads, 5); + + Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + final String field = "test"; + + Thread[] ingesters = new Thread[numThreads]; + for (int i = 0; i < numThreads; i++) { + ingesters[i] = new Thread() { + @Override + public void run() { + try { + for (int j = 0; j < numDocs; j++) { + Document d = new Document(); + d.add(new Field(field, new PoolingPayloadTokenStream(pool))); + writer.addDocument(d); + } + } catch (Exception e) { + e.printStackTrace(); + fail(e.toString()); + } + } + }; + ingesters[i].start(); + } + + for (int i = 0; i < numThreads; i++) { + ingesters[i].join(); + } + writer.close(); + IndexReader reader = IndexReader.open(dir, true); + TermEnum terms = reader.terms(); + while (terms.next()) { + TermPositions tp = reader.termPositions(terms.term()); + while(tp.next()) { + int freq = tp.freq(); + for (int i = 0; i < freq; i++) { + tp.nextPosition(); + assertEquals(pool.bytesToString(tp.getPayload(new byte[5], 0)), terms.term().text); + } + } + tp.close(); + } + terms.close(); + reader.close(); + dir.close(); + assertEquals(pool.size(), numThreads); + } + + private class PoolingPayloadTokenStream extends TokenStream { + private byte[] payload; + private boolean first; + private ByteArrayPool pool; + private String term; + + CharTermAttribute termAtt; + PayloadAttribute payloadAtt; + + PoolingPayloadTokenStream(ByteArrayPool pool) { + this.pool = pool; + payload = pool.get(); + generateRandomData(payload); + term = pool.bytesToString(payload); + first = true; + payloadAtt = addAttribute(PayloadAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (!first) return false; + first = false; + clearAttributes(); + termAtt.append(term); + payloadAtt.setPayload(new Payload(payload)); + return true; + } + + @Override + public void close() throws IOException { + pool.release(payload); + } + + } + + private static class ByteArrayPool { + private List pool; + + ByteArrayPool(int capacity, int size) { + pool = new ArrayList(); + for (int i = 0; i < capacity; i++) { + pool.add(new byte[size]); + } + } + + private UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result(); + + synchronized String bytesToString(byte[] bytes) { + String s = new String(bytes); + UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result); + try { + return new String(utf8Result.result, 0, utf8Result.length, "UTF-8"); + } catch (UnsupportedEncodingException uee) { + return null; + } + } + + synchronized byte[] get() { + return pool.remove(0); + } + + synchronized void release(byte[] b) { + pool.add(b); + } + + synchronized int size() { + return pool.size(); + } + } + + public void testAcrossFields() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + new MockAnalyzer(random, MockTokenizer.WHITESPACE, true)); + Document doc = new Document(); + doc.add(new Field("hasMaybepayload", "here we go", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + writer = new RandomIndexWriter(random, dir, + new MockAnalyzer(random, MockTokenizer.WHITESPACE, true)); + doc = new Document(); + doc.add(new Field("hasMaybepayload2", "here we go", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.addDocument(doc); + writer.optimize(); + writer.close(); + + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java new file mode 100644 index 0000000..767f19f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java @@ -0,0 +1,278 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.Map; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Version; + +public class TestPerSegmentDeletes extends LuceneTestCase { + public void testDeletes1() throws Exception { + //IndexWriter.debug2 = System.out; + Directory dir = new MockDirectoryWrapper(new Random(), new RAMDirectory()); + IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT, + new MockAnalyzer(random)); + iwc.setMergeScheduler(new SerialMergeScheduler()); + iwc.setMaxBufferedDocs(5000); + iwc.setRAMBufferSizeMB(100); + RangeMergePolicy fsmp = new RangeMergePolicy(false); + iwc.setMergePolicy(fsmp); + IndexWriter writer = new IndexWriter(dir, iwc); + writer.setInfoStream(VERBOSE ? System.out : null); + for (int x = 0; x < 5; x++) { + writer.addDocument(TestIndexWriterReader.createDocument(x, "1", 2)); + //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); + } + //System.out.println("commit1"); + writer.commit(); + assertEquals(1, writer.segmentInfos.size()); + for (int x = 5; x < 10; x++) { + writer.addDocument(TestIndexWriterReader.createDocument(x, "2", 2)); + //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); + } + //System.out.println("commit2"); + writer.commit(); + assertEquals(2, writer.segmentInfos.size()); + + for (int x = 10; x < 15; x++) { + writer.addDocument(TestIndexWriterReader.createDocument(x, "3", 2)); + //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); + } + + writer.deleteDocuments(new Term("id", "1")); + + writer.deleteDocuments(new Term("id", "11")); + + // flushing without applying deletes means + // there will still be deletes in the segment infos + writer.flush(false, false); + assertTrue(writer.bufferedDeletesStream.any()); + + // get reader flushes pending deletes + // so there should not be anymore + IndexReader r1 = writer.getReader(); + assertFalse(writer.bufferedDeletesStream.any()); + r1.close(); + + // delete id:2 from the first segment + // merge segments 0 and 1 + // which should apply the delete id:2 + writer.deleteDocuments(new Term("id", "2")); + writer.flush(false, false); + fsmp.doMerge = true; + fsmp.start = 0; + fsmp.length = 2; + writer.maybeMerge(); + + assertEquals(2, writer.segmentInfos.size()); + + // id:2 shouldn't exist anymore because + // it's been applied in the merge and now it's gone + IndexReader r2 = writer.getReader(); + int[] id2docs = toDocsArray(new Term("id", "2"), r2); + assertTrue(id2docs == null); + r2.close(); + + /** + // added docs are in the ram buffer + for (int x = 15; x < 20; x++) { + writer.addDocument(TestIndexWriterReader.createDocument(x, "4", 2)); + System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); + } + assertTrue(writer.numRamDocs() > 0); + // delete from the ram buffer + writer.deleteDocuments(new Term("id", Integer.toString(13))); + + Term id3 = new Term("id", Integer.toString(3)); + + // delete from the 1st segment + writer.deleteDocuments(id3); + + assertTrue(writer.numRamDocs() > 0); + + //System.out + // .println("segdels1:" + writer.docWriter.deletesToString()); + + //assertTrue(writer.docWriter.segmentDeletes.size() > 0); + + // we cause a merge to happen + fsmp.doMerge = true; + fsmp.start = 0; + fsmp.length = 2; + System.out.println("maybeMerge "+writer.segmentInfos); + + SegmentInfo info0 = writer.segmentInfos.info(0); + SegmentInfo info1 = writer.segmentInfos.info(1); + + writer.maybeMerge(); + System.out.println("maybeMerge after "+writer.segmentInfos); + // there should be docs in RAM + assertTrue(writer.numRamDocs() > 0); + + // assert we've merged the 1 and 2 segments + // and still have a segment leftover == 2 + assertEquals(2, writer.segmentInfos.size()); + assertFalse(segThere(info0, writer.segmentInfos)); + assertFalse(segThere(info1, writer.segmentInfos)); + + //System.out.println("segdels2:" + writer.docWriter.deletesToString()); + + //assertTrue(writer.docWriter.segmentDeletes.size() > 0); + + IndexReader r = writer.getReader(); + IndexReader r1 = r.getSequentialSubReaders()[0]; + printDelDocs(r1.getDeletedDocs()); + int[] docs = toDocsArray(id3, null, r); + System.out.println("id3 docs:"+Arrays.toString(docs)); + // there shouldn't be any docs for id:3 + assertTrue(docs == null); + r.close(); + + part2(writer, fsmp); + **/ + // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString()); + //System.out.println("close"); + writer.close(); + dir.close(); + } + + /** + static boolean hasPendingDeletes(SegmentInfos infos) { + for (SegmentInfo info : infos) { + if (info.deletes.any()) { + return true; + } + } + return false; + } + **/ + void part2(IndexWriter writer, RangeMergePolicy fsmp) throws Exception { + for (int x = 20; x < 25; x++) { + writer.addDocument(TestIndexWriterReader.createDocument(x, "5", 2)); + //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); + } + writer.flush(false, false); + for (int x = 25; x < 30; x++) { + writer.addDocument(TestIndexWriterReader.createDocument(x, "5", 2)); + //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs()); + } + writer.flush(false, false); + + //System.out.println("infos3:"+writer.segmentInfos); + + Term delterm = new Term("id", "8"); + writer.deleteDocuments(delterm); + //System.out.println("segdels3:" + writer.docWriter.deletesToString()); + + fsmp.doMerge = true; + fsmp.start = 1; + fsmp.length = 2; + writer.maybeMerge(); + + // deletes for info1, the newly created segment from the + // merge should have no deletes because they were applied in + // the merge + //SegmentInfo info1 = writer.segmentInfos.info(1); + //assertFalse(exists(info1, writer.docWriter.segmentDeletes)); + + //System.out.println("infos4:"+writer.segmentInfos); + //System.out.println("segdels4:" + writer.docWriter.deletesToString()); + } + + boolean segThere(SegmentInfo info, SegmentInfos infos) { + for (SegmentInfo si : infos) { + if (si.name.equals(info.name)) return true; + } + return false; + } + + public static int[] toDocsArray(Term term, IndexReader reader) + throws IOException { + TermDocs termDocs = reader.termDocs(); + termDocs.seek(term); + return toArray(termDocs); + } + + public static int[] toArray(TermDocs termDocs) throws IOException { + List docs = new ArrayList(); + while (termDocs.next()) { + docs.add(termDocs.doc()); + } + if (docs.size() == 0) { + return null; + } else { + return ArrayUtil.toIntArray(docs); + } + } + + public class RangeMergePolicy extends MergePolicy { + boolean doMerge = false; + int start; + int length; + + private final boolean useCompoundFile; + + private RangeMergePolicy(boolean useCompoundFile) { + this.useCompoundFile = useCompoundFile; + } + + @Override + public void close() {} + + @Override + public MergeSpecification findMerges(SegmentInfos segmentInfos) + throws CorruptIndexException, IOException { + MergeSpecification ms = new MergeSpecification(); + if (doMerge) { + OneMerge om = new OneMerge(segmentInfos.asList().subList(start, start + length)); + ms.add(om); + doMerge = false; + return ms; + } + return null; + } + + @Override + public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, + int maxSegmentCount, Map segmentsToOptimize) + throws CorruptIndexException, IOException { + return null; + } + + @Override + public MergeSpecification findMergesToExpungeDeletes( + SegmentInfos segmentInfos) throws CorruptIndexException, IOException { + return null; + } + + @Override + public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) { + return useCompoundFile; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java new file mode 100644 index 0000000..b18acf2 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java @@ -0,0 +1,191 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.IOException; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestPersistentSnapshotDeletionPolicy extends TestSnapshotDeletionPolicy { + + // Keep it a class member so that getDeletionPolicy can use it + private Directory snapshotDir; + + // so we can close it if called by SDP tests + private PersistentSnapshotDeletionPolicy psdp; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + snapshotDir = newDirectory(); + } + + @After + @Override + public void tearDown() throws Exception { + if (psdp != null) psdp.close(); + snapshotDir.close(); + super.tearDown(); + } + + @Override + protected SnapshotDeletionPolicy getDeletionPolicy() throws IOException { + if (psdp != null) psdp.close(); + snapshotDir.close(); + snapshotDir = newDirectory(); + return psdp = new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.CREATE, + TEST_VERSION_CURRENT); + } + + @Override + protected SnapshotDeletionPolicy getDeletionPolicy(Map snapshots) throws IOException { + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + if (snapshots != null) { + for (Entry e: snapshots.entrySet()) { + sdp.registerSnapshotInfo(e.getKey(), e.getValue(), null); + } + } + return sdp; + } + + @Override + @Test + public void testExistingSnapshots() throws Exception { + int numSnapshots = 3; + Directory dir = newDirectory(); + PersistentSnapshotDeletionPolicy psdp = (PersistentSnapshotDeletionPolicy) getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, psdp)); + prepareIndexAndSnapshots(psdp, writer, numSnapshots, "snapshot"); + writer.close(); + psdp.close(); + + // Re-initialize and verify snapshots were persisted + psdp = new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT); + new IndexWriter(dir, getConfig(random, psdp)).close(); + + assertSnapshotExists(dir, psdp, numSnapshots); + assertEquals(numSnapshots, psdp.getSnapshots().size()); + psdp.close(); + dir.close(); + } + + @Test(expected=IllegalArgumentException.class) + public void testIllegalSnapshotId() throws Exception { + getDeletionPolicy().snapshot("$SNAPSHOTS_DOC$"); + } + + @Test + public void testInvalidSnapshotInfos() throws Exception { + // Add the correct number of documents (1), but without snapshot information + IndexWriter writer = new IndexWriter(snapshotDir, getConfig(random, null)); + writer.addDocument(new Document()); + writer.close(); + try { + new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT); + fail("should not have succeeded to read from an invalid Directory"); + } catch (IllegalStateException e) { + } + } + + @Test + public void testNoSnapshotInfos() throws Exception { + // Initialize an empty index in snapshotDir - PSDP should initialize successfully. + new IndexWriter(snapshotDir, getConfig(random, null)).close(); + new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT).close(); + } + + @Test(expected=IllegalStateException.class) + public void testTooManySnapshotInfos() throws Exception { + // Write two documents to the snapshots directory - illegal. + IndexWriter writer = new IndexWriter(snapshotDir, getConfig(random, null)); + writer.addDocument(new Document()); + writer.addDocument(new Document()); + writer.close(); + + new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT).close(); + fail("should not have succeeded to open an invalid directory"); + } + + @Test + public void testSnapshotRelease() throws Exception { + Directory dir = newDirectory(); + PersistentSnapshotDeletionPolicy psdp = (PersistentSnapshotDeletionPolicy) getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, psdp)); + prepareIndexAndSnapshots(psdp, writer, 1, "snapshot"); + writer.close(); + + psdp.release("snapshot0"); + psdp.close(); + + psdp = new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT); + assertEquals("Should have no snapshots !", 0, psdp.getSnapshots().size()); + psdp.close(); + dir.close(); + } + + @Test + public void testStaticRead() throws Exception { + // While PSDP is open, it keeps a lock on the snapshots directory and thus + // prevents reading the snapshots information. This test checks that the + // static read method works. + int numSnapshots = 1; + Directory dir = newDirectory(); + PersistentSnapshotDeletionPolicy psdp = (PersistentSnapshotDeletionPolicy) getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, psdp)); + prepareIndexAndSnapshots(psdp, writer, numSnapshots, "snapshot"); + writer.close(); + dir.close(); + + try { + // This should fail, since the snapshots directory is locked - we didn't close it ! + new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT); + fail("should not have reached here - the snapshots directory should be locked!"); + } catch (LockObtainFailedException e) { + // expected + } finally { + psdp.close(); + } + + // Reading the snapshots info should succeed though + Map snapshots = PersistentSnapshotDeletionPolicy.readSnapshotsInfo(snapshotDir); + assertEquals("expected " + numSnapshots + " snapshots, got " + snapshots.size(), numSnapshots, snapshots.size()); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java new file mode 100644 index 0000000..814f4eb --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java @@ -0,0 +1,99 @@ +package org.apache.lucene.index; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +import java.io.IOException; +import java.util.BitSet; +import java.util.Map; + +public class TestPositionBasedTermVectorMapper extends LuceneTestCase { + protected String[] tokens; + protected int[][] thePositions; + protected TermVectorOffsetInfo[][] offsets; + protected int numPositions; + + @Override + public void setUp() throws Exception { + super.setUp(); + tokens = new String[]{"here", "is", "some", "text", "to", "test", "extra"}; + thePositions = new int[tokens.length][]; + offsets = new TermVectorOffsetInfo[tokens.length][]; + numPositions = 0; + //save off the last one so we can add it with the same positions as some of the others, but in a predictable way + for (int i = 0; i < tokens.length - 1; i++) + { + thePositions[i] = new int[2 * i + 1];//give 'em all some positions + for (int j = 0; j < thePositions[i].length; j++) + { + thePositions[i][j] = numPositions++; + } + offsets[i] = new TermVectorOffsetInfo[thePositions[i].length]; + for (int j = 0; j < offsets[i].length; j++) { + offsets[i][j] = new TermVectorOffsetInfo(j, j + 1);//the actual value here doesn't much matter + } + } + thePositions[tokens.length - 1] = new int[1]; + thePositions[tokens.length - 1][0] = 0;//put this at the same position as "here" + offsets[tokens.length - 1] = new TermVectorOffsetInfo[1]; + offsets[tokens.length - 1][0] = new TermVectorOffsetInfo(0, 1); + } + + public void test() throws IOException { + PositionBasedTermVectorMapper mapper = new PositionBasedTermVectorMapper(); + + mapper.setExpectations("test", tokens.length, true, true); + //Test single position + for (int i = 0; i < tokens.length; i++) { + String token = tokens[i]; + mapper.map(token, 1, null, thePositions[i]); + + } + Map> map = mapper.getFieldToTerms(); + assertTrue("map is null and it shouldn't be", map != null); + assertTrue("map Size: " + map.size() + " is not: " + 1, map.size() == 1); + Map positions = map.get("test"); + assertTrue("thePositions is null and it shouldn't be", positions != null); + + assertTrue("thePositions Size: " + positions.size() + " is not: " + numPositions, positions.size() == numPositions); + BitSet bits = new BitSet(numPositions); + for (Map.Entry entry : positions.entrySet()) { + + PositionBasedTermVectorMapper.TVPositionInfo info = entry.getValue(); + assertTrue("info is null and it shouldn't be", info != null); + int pos = entry.getKey().intValue(); + bits.set(pos); + assertTrue(info.getPosition() + " does not equal: " + pos, info.getPosition() == pos); + assertTrue("info.getOffsets() is null and it shouldn't be", info.getOffsets() != null); + if (pos == 0) + { + assertTrue("info.getTerms() Size: " + info.getTerms().size() + " is not: " + 2, info.getTerms().size() == 2);//need a test for multiple terms at one pos + assertTrue("info.getOffsets() Size: " + info.getOffsets().size() + " is not: " + 2, info.getOffsets().size() == 2); + } + else + { + assertTrue("info.getTerms() Size: " + info.getTerms().size() + " is not: " + 1, info.getTerms().size() == 1);//need a test for multiple terms at one pos + assertTrue("info.getOffsets() Size: " + info.getOffsets().size() + " is not: " + 1, info.getOffsets().size() == 1); + } + } + assertTrue("Bits are not all on", bits.cardinality() == numPositions); + } + + + + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestRollback.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestRollback.java new file mode 100644 index 0000000..eb3ea10 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestRollback.java @@ -0,0 +1,61 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestRollback extends LuceneTestCase { + + // LUCENE-2536 + public void testRollbackIntegrityWithBufferFlush() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter rw = new RandomIndexWriter(random, dir); + + for (int i = 0; i < 5; i++) { + Document doc = new Document(); + doc.add(newField("pk", Integer.toString(i), Store.YES, Index.ANALYZED_NO_NORMS)); + rw.addDocument(doc); + } + rw.close(); + + // If buffer size is small enough to cause a flush, errors ensue... + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setOpenMode(IndexWriterConfig.OpenMode.APPEND)); + + Term pkTerm = new Term("pk", ""); + for (int i = 0; i < 3; i++) { + Document doc = new Document(); + String value = Integer.toString(i); + doc.add(newField("pk", value, Store.YES, Index.ANALYZED_NO_NORMS)); + doc.add(newField("text", "foo", Store.YES, Index.ANALYZED_NO_NORMS)); + w.updateDocument(pkTerm.createTerm(value), doc); + } + w.rollback(); + + IndexReader r = IndexReader.open(dir, true); + assertEquals("index should contain same number of docs post rollback", 5, r.numDocs()); + r.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestRollingUpdates.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestRollingUpdates.java new file mode 100644 index 0000000..69e9ddb --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestRollingUpdates.java @@ -0,0 +1,145 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.*; +import org.apache.lucene.store.*; +import org.apache.lucene.util.*; +import org.junit.Test; + +public class TestRollingUpdates extends LuceneTestCase { + + // Just updates the same set of N docs over and over, to + // stress out deletions + + @Test + public void testRollingUpdates() throws Exception { + final Directory dir = newDirectory(); + + final LineFileDocs docs = new LineFileDocs(random); + + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + final int SIZE = atLeast(20); + int id = 0; + IndexReader r = null; + final int numUpdates = (int) (SIZE * (2+random.nextDouble())); + for(int docIter=0;docIter= SIZE && random.nextInt(50) == 17) { + if (r != null) { + r.close(); + } + final boolean applyDeletions = random.nextBoolean(); + r = w.getReader(applyDeletions); + assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE); + } + } + + if (r != null) { + r.close(); + } + + w.commit(); + assertEquals(SIZE, w.numDocs()); + + w.close(); + docs.close(); + + dir.close(); + } + + + public void testUpdateSameDoc() throws Exception { + final Directory dir = newDirectory(); + + final LineFileDocs docs = new LineFileDocs(random); + for (int r = 0; r < 3; r++) { + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + final int numUpdates = atLeast(20); + int numThreads = _TestUtil.nextInt(random, 2, 6); + IndexingThread[] threads = new IndexingThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + threads[i] = new IndexingThread(docs, w, numUpdates); + threads[i].start(); + } + + for (int i = 0; i < numThreads; i++) { + threads[i].join(); + } + + w.close(); + } + + IndexReader open = IndexReader.open(dir); + assertEquals(1, open.numDocs()); + open.close(); + docs.close(); + dir.close(); + } + + static class IndexingThread extends Thread { + final LineFileDocs docs; + final IndexWriter writer; + final int num; + + public IndexingThread(LineFileDocs docs, IndexWriter writer, int num) { + super(); + this.docs = docs; + this.writer = writer; + this.num = num; + } + + public void run() { + try { + IndexReader open = null; + for (int i = 0; i < num; i++) { + Document doc = new Document();// docs.nextDoc(); + doc.add(newField("id", "test", Field.Index.NOT_ANALYZED)); + writer.updateDocument(new Term("id", "test"), doc); + if (random.nextInt(3) == 0) { + if (open == null) { + open = IndexReader.open(writer, true); + } + IndexReader reader = open.reopen(); + if (reader != open) { + open.close(); + open = reader; + } + assertEquals("iter: " + i + " numDocs: "+ open.numDocs() + " del: " + open.numDeletedDocs() + " max: " + open.maxDoc(), 1, open.numDocs()); + } + } + if (open != null) { + open.close(); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java new file mode 100644 index 0000000..b46c37d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java @@ -0,0 +1,82 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSameTokenSamePosition extends LuceneTestCase { + + /** + * Attempt to reproduce an assertion error that happens + * only with the trunk version around April 2011. + * @param args + */ + public void test() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer())); + Document doc = new Document(); + doc.add(new Field("eng", "Six drunken" /*This shouldn't matter. */, + Field.Store.YES, Field.Index.ANALYZED)); + riw.addDocument(doc); + riw.close(); + dir.close(); + } +} + +final class BugReproAnalyzer extends Analyzer{ + @Override + public TokenStream tokenStream(String arg0, Reader arg1) { + return new BugReproAnalyzerTokenizer(); + } +} + +final class BugReproAnalyzerTokenizer extends TokenStream { + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + int tokenCount = 4; + int nextTokenIndex = 0; + String terms[] = new String[]{"six", "six", "drunken", "drunken"}; + int starts[] = new int[]{0, 0, 4, 4}; + int ends[] = new int[]{3, 3, 11, 11}; + int incs[] = new int[]{1, 0, 1, 0}; + + @Override + public boolean incrementToken() throws IOException { + if (nextTokenIndex < tokenCount) { + termAtt.setEmpty().append(terms[nextTokenIndex]); + offsetAtt.setOffset(starts[nextTokenIndex], ends[nextTokenIndex]); + posIncAtt.setPositionIncrement(incs[nextTokenIndex]); + nextTokenIndex++; + return true; + } else { + return false; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSegmentInfo.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSegmentInfo.java new file mode 100644 index 0000000..13cf539 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSegmentInfo.java @@ -0,0 +1,90 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestSegmentInfo extends LuceneTestCase { + + public void testSizeInBytesCache() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()); + IndexWriter writer = new IndexWriter(dir, conf); + Document doc = new Document(); + doc.add(new Field("a", "value", Store.YES, Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + SegmentInfo si = sis.info(0); + long sizeInBytesNoStore = si.sizeInBytes(false); + long sizeInBytesWithStore = si.sizeInBytes(true); + assertTrue("sizeInBytesNoStore=" + sizeInBytesNoStore + " sizeInBytesWithStore=" + sizeInBytesWithStore, sizeInBytesWithStore > sizeInBytesNoStore); + dir.close(); + } + + // LUCENE-2584: calling files() by multiple threads could lead to ConcurrentModificationException + public void testFilesConcurrency() throws Exception { + Directory dir = newDirectory(); + // Create many files + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + IndexWriter writer = new IndexWriter(dir, conf); + Document doc = new Document(); + doc.add(new Field("a", "b", Store.YES, Index.ANALYZED, TermVector.YES)); + writer.addDocument(doc); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + final SegmentInfo si = sis.info(0); + Thread[] threads = new Thread[_TestUtil.nextInt(random, 2, 5)]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new Thread() { + @Override + public void run() { + try { + // Verify that files() does not throw an exception and that the + // iteration afterwards succeeds. + Iterator iter = si.files().iterator(); + while (iter.hasNext()) iter.next(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }; + } + + for (Thread t : threads) t.start(); + for (Thread t : threads) t.join(); + + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSegmentTermEnum.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSegmentTermEnum.java new file mode 100644 index 0000000..f5468e8 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSegmentTermEnum.java @@ -0,0 +1,132 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; + + +public class TestSegmentTermEnum extends LuceneTestCase { + + Directory dir; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + public void testTermEnum() throws IOException { + IndexWriter writer = null; + + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + // ADD 100 documents with term : aaa + // add 100 documents with terms: aaa bbb + // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100 + for (int i = 0; i < 100; i++) { + addDoc(writer, "aaa"); + addDoc(writer, "aaa bbb"); + } + + writer.close(); + + // verify document frequency of terms in an unoptimized index + verifyDocFreq(); + + // merge segments by optimizing the index + writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + writer.optimize(); + writer.close(); + + // verify document frequency of terms in an optimized index + verifyDocFreq(); + } + + public void testPrevTermAtEnd() throws IOException + { + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + addDoc(writer, "aaa bbb"); + writer.close(); + SegmentReader reader = SegmentReader.getOnlySegmentReader(dir); + SegmentTermEnum termEnum = (SegmentTermEnum) reader.terms(); + assertTrue(termEnum.next()); + assertEquals("aaa", termEnum.term().text()); + assertTrue(termEnum.next()); + assertEquals("aaa", termEnum.prev().text()); + assertEquals("bbb", termEnum.term().text()); + assertFalse(termEnum.next()); + assertEquals("bbb", termEnum.prev().text()); + reader.close(); + } + + private void verifyDocFreq() + throws IOException + { + IndexReader reader = IndexReader.open(dir, true); + TermEnum termEnum = null; + + // create enumeration of all terms + termEnum = reader.terms(); + // go to the first term (aaa) + termEnum.next(); + // assert that term is 'aaa' + assertEquals("aaa", termEnum.term().text()); + assertEquals(200, termEnum.docFreq()); + // go to the second term (bbb) + termEnum.next(); + // assert that term is 'bbb' + assertEquals("bbb", termEnum.term().text()); + assertEquals(100, termEnum.docFreq()); + + termEnum.close(); + + + // create enumeration of terms after term 'aaa', including 'aaa' + termEnum = reader.terms(new Term("content", "aaa")); + // assert that term is 'aaa' + assertEquals("aaa", termEnum.term().text()); + assertEquals(200, termEnum.docFreq()); + // go to term 'bbb' + termEnum.next(); + // assert that term is 'bbb' + assertEquals("bbb", termEnum.term().text()); + assertEquals(100, termEnum.docFreq()); + termEnum.close(); + reader.close(); + } + + private void addDoc(IndexWriter writer, String value) throws IOException + { + Document doc = new Document(); + doc.add(newField("content", value, Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java new file mode 100644 index 0000000..5de97fa --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java @@ -0,0 +1,368 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSizeBoundedOptimize extends LuceneTestCase { + + private void addDocs(IndexWriter writer, int numDocs) throws IOException { + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + writer.addDocument(doc); + } + writer.commit(); + } + + private static IndexWriterConfig newWriterConfig() throws IOException { + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); + conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); + conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); + // prevent any merges by default. + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); + return conf; + } + + public void testByteSizeLimit() throws Exception { + // tests that the max merge size constraint is applied during optimize. + Directory dir = new RAMDirectory(); + + // Prepare an index w/ several small segments and a large one. + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + final int numSegments = 15; + for (int i = 0; i < numSegments; i++) { + int numDocs = i == 7 ? 30 : 1; + addDocs(writer, numDocs); + } + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + double min = sis.info(0).sizeInBytes(true); + + conf = newWriterConfig(); + LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); + lmp.setMaxMergeMBForOptimize((min + 1) / (1 << 20)); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + // Should only be 3 segments in the index, because one of them exceeds the size limit + sis = new SegmentInfos(); + sis.read(dir); + assertEquals(3, sis.size()); + } + + public void testNumDocsLimit() throws Exception { + // tests that the max merge docs constraint is applied during optimize. + Directory dir = new RAMDirectory(); + + // Prepare an index w/ several small segments and a large one. + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 5); + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + + writer.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(3); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + // Should only be 3 segments in the index, because one of them exceeds the size limit + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(3, sis.size()); + } + + public void testLastSegmentTooLarge() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 5); + + writer.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(3); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(2, sis.size()); + } + + public void testFirstSegmentTooLarge() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 5); + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + + writer.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(3); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(2, sis.size()); + } + + public void testAllSegmentsSmall() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + + writer.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(3); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(1, sis.size()); + } + + public void testAllSegmentsLarge() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + + writer.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(2); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(3, sis.size()); + } + + public void testOneLargeOneSmall() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 3); + addDocs(writer, 5); + addDocs(writer, 3); + addDocs(writer, 5); + + writer.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(3); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(4, sis.size()); + } + + public void testMergeFactor() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 3); + addDocs(writer, 5); + addDocs(writer, 3); + addDocs(writer, 3); + + writer.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(3); + lmp.setMergeFactor(2); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + // Should only be 4 segments in the index, because of the merge factor and + // max merge docs settings. + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(4, sis.size()); + } + + public void testSingleNonOptimizedSegment() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 3); + addDocs(writer, 5); + addDocs(writer, 3); + + writer.close(); + + // delete the last document, so that the last segment is optimized. + IndexReader r = IndexReader.open(dir, false); + r.deleteDocument(r.numDocs() - 1); + r.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(3); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + // Verify that the last segment does not have deletions. + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(3, sis.size()); + assertFalse(sis.info(2).hasDeletions()); + } + + public void testSingleOptimizedSegment() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 3); + + writer.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(3); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + // Verify that the last segment does not have deletions. + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(1, sis.size()); + } + + public void testSingleNonOptimizedTooLargeSegment() throws Exception { + Directory dir = new RAMDirectory(); + + IndexWriterConfig conf = newWriterConfig(); + IndexWriter writer = new IndexWriter(dir, conf); + + addDocs(writer, 5); + + writer.close(); + + // delete the last document + IndexReader r = IndexReader.open(dir, false); + r.deleteDocument(r.numDocs() - 1); + r.close(); + + conf = newWriterConfig(); + LogMergePolicy lmp = new LogDocMergePolicy(); + lmp.setMaxMergeDocs(2); + conf.setMergePolicy(lmp); + + writer = new IndexWriter(dir, conf); + writer.optimize(); + writer.close(); + + // Verify that the last segment does not have deletions. + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + assertEquals(1, sis.size()); + assertTrue(sis.info(0).hasDeletions()); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java new file mode 100644 index 0000000..37bdf23 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java @@ -0,0 +1,445 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Collection; +import java.util.Map; +import java.util.Random; +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.SnapshotDeletionPolicy; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ThreadInterruptedException; +import org.junit.Test; + +// +// This was developed for Lucene In Action, +// http://lucenebook.com +// + +public class TestSnapshotDeletionPolicy extends LuceneTestCase { + public static final String INDEX_PATH = "test.snapshots"; + + protected IndexWriterConfig getConfig(Random random, IndexDeletionPolicy dp) { + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)); + if (dp != null) { + conf.setIndexDeletionPolicy(dp); + } + return conf; + } + + protected void checkSnapshotExists(Directory dir, IndexCommit c) throws Exception { + String segFileName = c.getSegmentsFileName(); + assertTrue("segments file not found in directory: " + segFileName, dir.fileExists(segFileName)); + } + + protected void checkMaxDoc(IndexCommit commit, int expectedMaxDoc) throws Exception { + IndexReader reader = IndexReader.open(commit, true); + try { + assertEquals(expectedMaxDoc, reader.maxDoc()); + } finally { + reader.close(); + } + } + + protected void prepareIndexAndSnapshots(SnapshotDeletionPolicy sdp, + IndexWriter writer, int numSnapshots, String snapshotPrefix) + throws RuntimeException, IOException { + for (int i = 0; i < numSnapshots; i++) { + // create dummy document to trigger commit. + writer.addDocument(new Document()); + writer.commit(); + sdp.snapshot(snapshotPrefix + i); + } + } + + protected SnapshotDeletionPolicy getDeletionPolicy() throws IOException { + return getDeletionPolicy(null); + } + + protected SnapshotDeletionPolicy getDeletionPolicy(Map snapshots) throws IOException { + return new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy(), snapshots); + } + + protected void assertSnapshotExists(Directory dir, SnapshotDeletionPolicy sdp, int numSnapshots) throws Exception { + for (int i = 0; i < numSnapshots; i++) { + IndexCommit snapshot = sdp.getSnapshot("snapshot" + i); + checkMaxDoc(snapshot, i + 1); + checkSnapshotExists(dir, snapshot); + } + } + + @Test + public void testSnapshotDeletionPolicy() throws Exception { + Directory fsDir = newDirectory(); + runTest(random, fsDir); + fsDir.close(); + } + + private void runTest(Random random, Directory dir) throws Exception { + // Run for ~1 seconds + final long stopTime = System.currentTimeMillis() + 1000; + + SnapshotDeletionPolicy dp = getDeletionPolicy(); + final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(dp) + .setMaxBufferedDocs(2)); + writer.commit(); + + final Thread t = new Thread() { + @Override + public void run() { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + do { + for(int i=0;i<27;i++) { + try { + writer.addDocument(doc); + } catch (Throwable t) { + t.printStackTrace(System.out); + fail("addDocument failed"); + } + if (i%2 == 0) { + try { + writer.commit(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + try { + Thread.sleep(1); + } catch (InterruptedException ie) { + throw new ThreadInterruptedException(ie); + } + } while(System.currentTimeMillis() < stopTime); + } + }; + + t.start(); + + // While the above indexing thread is running, take many + // backups: + do { + backupIndex(dir, dp); + Thread.sleep(20); + } while(t.isAlive()); + + t.join(); + + // Add one more document to force writer to commit a + // final segment, so deletion policy has a chance to + // delete again: + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + + // Make sure we don't have any leftover files in the + // directory: + writer.close(); + TestIndexWriter.assertNoUnreferencedFiles(dir, "some files were not deleted but should have been"); + } + + /** + * Example showing how to use the SnapshotDeletionPolicy to take a backup. + * This method does not really do a backup; instead, it reads every byte of + * every file just to test that the files indeed exist and are readable even + * while the index is changing. + */ + public void backupIndex(Directory dir, SnapshotDeletionPolicy dp) throws Exception { + // To backup an index we first take a snapshot: + try { + copyFiles(dir, dp.snapshot("id")); + } finally { + // Make sure to release the snapshot, otherwise these + // files will never be deleted during this IndexWriter + // session: + dp.release("id"); + } + } + + private void copyFiles(Directory dir, IndexCommit cp) throws Exception { + + // While we hold the snapshot, and nomatter how long + // we take to do the backup, the IndexWriter will + // never delete the files in the snapshot: + Collection files = cp.getFileNames(); + for (final String fileName : files) { + // NOTE: in a real backup you would not use + // readFile; you would need to use something else + // that copies the file to a backup location. This + // could even be a spawned shell process (eg "tar", + // "zip") that takes the list of files and builds a + // backup. + readFile(dir, fileName); + } + } + + byte[] buffer = new byte[4096]; + + private void readFile(Directory dir, String name) throws Exception { + IndexInput input = dir.openInput(name); + try { + long size = dir.fileLength(name); + long bytesLeft = size; + while (bytesLeft > 0) { + final int numToRead; + if (bytesLeft < buffer.length) + numToRead = (int) bytesLeft; + else + numToRead = buffer.length; + input.readBytes(buffer, 0, numToRead, false); + bytesLeft -= numToRead; + } + // Don't do this in your real backups! This is just + // to force a backup to take a somewhat long time, to + // make sure we are exercising the fact that the + // IndexWriter should not delete this file even when I + // take my time reading it. + Thread.sleep(1); + } finally { + input.close(); + } + } + + + @Test + public void testBasicSnapshots() throws Exception { + int numSnapshots = 3; + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + + // Create 3 snapshots: snapshot0, snapshot1, snapshot2 + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, sdp)); + prepareIndexAndSnapshots(sdp, writer, numSnapshots, "snapshot"); + writer.close(); + + assertSnapshotExists(dir, sdp, numSnapshots); + + // open a reader on a snapshot - should succeed. + IndexReader.open(sdp.getSnapshot("snapshot0"), true).close(); + + // open a new IndexWriter w/ no snapshots to keep and assert that all snapshots are gone. + sdp = getDeletionPolicy(); + writer = new IndexWriter(dir, getConfig(random, sdp)); + writer.deleteUnusedFiles(); + writer.close(); + assertEquals("no snapshots should exist", 1, IndexReader.listCommits(dir).size()); + + for (int i = 0; i < numSnapshots; i++) { + try { + sdp.getSnapshot("snapshot" + i); + fail("snapshot shouldn't have existed, but did: snapshot" + i); + } catch (IllegalStateException e) { + // expected - snapshot should not exist + } + } + dir.close(); + } + + @Test + public void testMultiThreadedSnapshotting() throws Exception { + Directory dir = newDirectory(); + final SnapshotDeletionPolicy sdp = getDeletionPolicy(); + final IndexWriter writer = new IndexWriter(dir, getConfig(random, sdp)); + + Thread[] threads = new Thread[10]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new Thread() { + @Override + public void run() { + try { + writer.addDocument(new Document()); + writer.commit(); + sdp.snapshot(getName()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }; + threads[i].setName("t" + i); + } + + for (Thread t : threads) { + t.start(); + } + + for (Thread t : threads) { + t.join(); + } + + // Do one last commit, so that after we release all snapshots, we stay w/ one commit + writer.addDocument(new Document()); + writer.commit(); + + for (Thread t : threads) { + sdp.release(t.getName()); + writer.deleteUnusedFiles(); + } + assertEquals(1, IndexReader.listCommits(dir).size()); + writer.close(); + dir.close(); + } + + @Test + public void testRollbackToOldSnapshot() throws Exception { + int numSnapshots = 2; + Directory dir = newDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, sdp)); + prepareIndexAndSnapshots(sdp, writer, numSnapshots, "snapshot"); + writer.close(); + + // now open the writer on "snapshot0" - make sure it succeeds + writer = new IndexWriter(dir, getConfig(random, sdp).setIndexCommit(sdp.getSnapshot("snapshot0"))); + // this does the actual rollback + writer.commit(); + writer.deleteUnusedFiles(); + assertSnapshotExists(dir, sdp, numSnapshots - 1); + writer.close(); + + // but 'snapshot1' files will still exist (need to release snapshot before they can be deleted). + String segFileName = sdp.getSnapshot("snapshot1").getSegmentsFileName(); + assertTrue("snapshot files should exist in the directory: " + segFileName, dir.fileExists(segFileName)); + dir.close(); + } + + @Test + public void testReleaseSnapshot() throws Exception { + Directory dir = newDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, sdp)); + prepareIndexAndSnapshots(sdp, writer, 1, "snapshot"); + + // Create another commit - we must do that, because otherwise the "snapshot" + // files will still remain in the index, since it's the last commit. + writer.addDocument(new Document()); + writer.commit(); + + // Release + String snapId = "snapshot0"; + String segFileName = sdp.getSnapshot(snapId).getSegmentsFileName(); + sdp.release(snapId); + try { + sdp.getSnapshot(snapId); + fail("should not have succeeded to get an unsnapshotted id"); + } catch (IllegalStateException e) { + // expected + } + assertNull(sdp.getSnapshots().get(snapId)); + writer.deleteUnusedFiles(); + writer.close(); + assertFalse("segments file should not be found in dirctory: " + segFileName, dir.fileExists(segFileName)); + dir.close(); + } + + @Test + public void testExistingSnapshots() throws Exception { + // Tests the ability to construct a SDP from existing snapshots, and + // asserts that those snapshots/commit points are protected. + int numSnapshots = 3; + Directory dir = newDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, sdp)); + prepareIndexAndSnapshots(sdp, writer, numSnapshots, "snapshot"); + writer.close(); + + // Make a new policy and initialize with snapshots. + sdp = getDeletionPolicy(sdp.getSnapshots()); + writer = new IndexWriter(dir, getConfig(random, sdp)); + // attempt to delete unused files - the snapshotted files should not be deleted + writer.deleteUnusedFiles(); + writer.close(); + assertSnapshotExists(dir, sdp, numSnapshots); + dir.close(); + } + + @Test + public void testSnapshotLastCommitTwice() throws Exception { + Directory dir = newDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, sdp)); + writer.addDocument(new Document()); + writer.commit(); + + String s1 = "s1"; + String s2 = "s2"; + IndexCommit ic1 = sdp.snapshot(s1); + IndexCommit ic2 = sdp.snapshot(s2); + assertTrue(ic1 == ic2); // should be the same instance + + // create another commit + writer.addDocument(new Document()); + writer.commit(); + + // release "s1" should not delete "s2" + sdp.release(s1); + writer.deleteUnusedFiles(); + checkSnapshotExists(dir, ic2); + + writer.close(); + dir.close(); + } + + @Test + public void testMissingCommits() throws Exception { + // Tests the behavior of SDP when commits that are given at ctor are missing + // on onInit(). + Directory dir = newDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(random, sdp)); + writer.addDocument(new Document()); + writer.commit(); + IndexCommit ic = sdp.snapshot("s1"); + + // create another commit, not snapshotted. + writer.addDocument(new Document()); + writer.close(); + + // open a new writer w/ KeepOnlyLastCommit policy, so it will delete "s1" + // commit. + new IndexWriter(dir, getConfig(random, null)).close(); + + assertFalse("snapshotted commit should not exist", dir.fileExists(ic.getSegmentsFileName())); + + // Now reinit SDP from the commits in the index - the snapshot id should not + // exist anymore. + sdp = getDeletionPolicy(sdp.getSnapshots()); + new IndexWriter(dir, getConfig(random, sdp)).close(); + + try { + sdp.getSnapshot("s1"); + fail("snapshot s1 should not exist"); + } catch (IllegalStateException e) { + // expected. + } + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestStressAdvance.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestStressAdvance.java new file mode 100644 index 0000000..08b8d76 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestStressAdvance.java @@ -0,0 +1,137 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.util.*; +import org.apache.lucene.store.*; +import org.apache.lucene.document.*; + +public class TestStressAdvance extends LuceneTestCase { + + public void testStressAdvance() throws Exception { + for(int iter=0;iter<3;iter++) { + if (VERBOSE) { + System.out.println("\nTEST: iter=" + iter); + } + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, dir); + final Set aDocs = new HashSet(); + final Document doc = new Document(); + final Field f = newField("field", "", Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(f); + final Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(idField); + int num = atLeast(5000); + for(int id=0;id aDocIDs = new ArrayList(); + final List bDocIDs = new ArrayList(); + + final IndexReader r = w.getReader(); + final int[] idToDocID = new int[r.maxDoc()]; + for(int docID=0;docID expected) throws Exception { + if (VERBOSE) { + System.out.println("test"); + } + int upto = -1; + while(upto < expected.size()) { + if (VERBOSE) { + System.out.println(" cycle upto=" + upto + " of " + expected.size()); + } + final int docID; + if (random.nextInt(4) == 1 || upto == expected.size()-1) { + // test nextDoc() + if (VERBOSE) { + System.out.println(" do nextDoc"); + } + upto++; + if (docs.next()) { + docID = docs.doc(); + } else { + docID = -1; + } + } else { + // test advance() + final int inc = _TestUtil.nextInt(random, 1, expected.size()-1-upto); + if (VERBOSE) { + System.out.println(" do advance inc=" + inc); + } + upto += inc; + if (docs.skipTo(expected.get(upto))) { + docID = docs.doc(); + } else { + docID = -1; + } + } + if (upto == expected.size()) { + if (VERBOSE) { + System.out.println(" expect docID=" + -1 + " actual=" + docID); + } + assertEquals(-1, docID); + } else { + if (VERBOSE) { + System.out.println(" expect docID=" + expected.get(upto) + " actual=" + docID); + } + assertTrue(docID != -1); + assertEquals(expected.get(upto).intValue(), docID); + } + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestStressIndexing.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestStressIndexing.java new file mode 100644 index 0000000..539554e --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestStressIndexing.java @@ -0,0 +1,168 @@ +package org.apache.lucene.index; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.*; +import org.apache.lucene.store.*; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.*; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.*; + +public class TestStressIndexing extends LuceneTestCase { + private static abstract class TimedThread extends Thread { + volatile boolean failed; + int count; + private static int RUN_TIME_MSEC = atLeast(1000); + private TimedThread[] allThreads; + + abstract public void doWork() throws Throwable; + + TimedThread(TimedThread[] threads) { + this.allThreads = threads; + } + + @Override + public void run() { + final long stopTime = System.currentTimeMillis() + RUN_TIME_MSEC; + + count = 0; + + try { + do { + if (anyErrors()) break; + doWork(); + count++; + } while(System.currentTimeMillis() < stopTime); + } catch (Throwable e) { + System.out.println(Thread.currentThread() + ": exc"); + e.printStackTrace(System.out); + failed = true; + } + } + + private boolean anyErrors() { + for(int i=0;i docs = indexRandom(5, 3, 100, dir1, maxThreadStates, doReaderPooling); + indexSerial(random, docs, dir2); + + // verifying verify + // verifyEquals(dir1, dir1, "id"); + // verifyEquals(dir2, dir2, "id"); + + verifyEquals(dir1, dir2, "id"); + dir1.close(); + dir2.close(); + } + + public void testMultiConfig() throws Throwable { + // test lots of smaller different params together + int num = atLeast(3); + for (int i = 0; i < num; i++) { // increase iterations for better testing + if (VERBOSE) { + System.out.println("\n\nTEST: top iter=" + i); + } + sameFieldOrder=random.nextBoolean(); + mergeFactor=random.nextInt(3)+2; + maxBufferedDocs=random.nextInt(3)+2; + int maxThreadStates = 1+random.nextInt(10); + boolean doReaderPooling = random.nextBoolean(); + seed++; + + int nThreads=random.nextInt(5)+1; + int iter=random.nextInt(5)+1; + int range=random.nextInt(20)+1; + Directory dir1 = newDirectory(); + Directory dir2 = newDirectory(); + if (VERBOSE) { + System.out.println(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " maxThreadStates=" + maxThreadStates + " sameFieldOrder=" + sameFieldOrder + " mergeFactor=" + mergeFactor); + } + Map docs = indexRandom(nThreads, iter, range, dir1, maxThreadStates, doReaderPooling); + if (VERBOSE) { + System.out.println("TEST: index serial"); + } + indexSerial(random, docs, dir2); + if (VERBOSE) { + System.out.println("TEST: verify"); + } + verifyEquals(dir1, dir2, "id"); + dir1.close(); + dir2.close(); + } + } + + + static Term idTerm = new Term("id",""); + IndexingThread[] threads; + static Comparator fieldNameComparator = new Comparator() { + public int compare(Fieldable o1, Fieldable o2) { + return o1.name().compareTo(o2.name()); + } + }; + + // This test avoids using any extra synchronization in the multiple + // indexing threads to test that IndexWriter does correctly synchronize + // everything. + + public static class DocsAndWriter { + Map docs; + IndexWriter writer; + } + + public DocsAndWriter indexRandomIWReader(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException { + Map docs = new HashMap(); + IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE).setRAMBufferSizeMB( + 0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy())); + w.setInfoStream(VERBOSE ? System.out : null); + w.commit(); + setUseCompoundFile(w.getConfig().getMergePolicy(), false); + setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor); + /*** + w.setMaxMergeDocs(Integer.MAX_VALUE); + w.setMaxFieldLength(10000); + w.setRAMBufferSizeMB(1); + w.setMergeFactor(10); + ***/ + + threads = new IndexingThread[nThreads]; + for (int i=0; i indexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates, + boolean doReaderPooling) throws IOException, InterruptedException { + Map docs = new HashMap(); + for(int iter=0;iter<3;iter++) { + if (VERBOSE) { + System.out.println("TEST: iter=" + iter); + } + IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.CREATE) + .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates) + .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy())); + w.setInfoStream(VERBOSE ? System.out : null); + setUseCompoundFile(w.getConfig().getMergePolicy(), false); + setMergeFactor(w.getConfig().getMergePolicy(), mergeFactor); + + threads = new IndexingThread[nThreads]; + for (int i=0; i docs, Directory dir) throws IOException { + IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy())); + + // index all docs in a single thread + Iterator iter = docs.values().iterator(); + while (iter.hasNext()) { + Document d = iter.next(); + ArrayList fields = new ArrayList(); + fields.addAll(d.getFields()); + // put fields in same order each time + Collections.sort(fields, fieldNameComparator); + + Document d1 = new Document(); + d1.setBoost(d.getBoost()); + for (int i=0; i0) break; + if (!termEnum1.next()) break; + } + + // iterate until we get some docs + int len2; + for(;;) { + len2=0; + term2 = termEnum2.term(); + if (term2==null) break; + termDocs2.seek(termEnum2); + while (termDocs2.next()) { + int d2 = termDocs2.doc(); + int f2 = termDocs2.freq(); + info2[len2] = (((long)r2r1[d2])<<32) | f2; + len2++; + } + if (len2>0) break; + if (!termEnum2.next()) break; + } + + if (!hasDeletes) + assertEquals(termEnum1.docFreq(), termEnum2.docFreq()); + + assertEquals(len1, len2); + if (len1==0) break; // no more terms + + assertEquals(term1, term2); + + // sort info2 to get it into ascending docid + Arrays.sort(info2, 0, len2); + + // now compare + for (int i=0; i ff1 = d1.getFields(); + List ff2 = d2.getFields(); + + Collections.sort(ff1, fieldNameComparator); + Collections.sort(ff2, fieldNameComparator); + + assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size()); + + for (int i=0; i docs = new HashMap(); + Random r; + + public int nextInt(int lim) { + return r.nextInt(lim); + } + + // start is inclusive and end is exclusive + public int nextInt(int start, int end) { + return start + r.nextInt(end-start); + } + + char[] buffer = new char[100]; + + private int addUTF8Token(int start) { + final int end = start + nextInt(20); + if (buffer.length < 1+end) { + char[] newBuffer = new char[(int) ((1+end)*1.25)]; + System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); + buffer = newBuffer; + } + + for(int i=start;i fields = new ArrayList(); + String idString = getIdString(); + Field idField = newField(idTerm.field(), idString, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + fields.add(idField); + + int nFields = nextInt(maxFields); + for (int i=0; i { + String text; + int pos; + int startOffset; + int endOffset; + public int compareTo(TestToken other) { + return pos - other.pos; + } + } + + TestToken[] tokens = new TestToken[testTerms.length * TERM_FREQ]; + + @Override + public void setUp() throws Exception { + super.setUp(); + /* + for (int i = 0; i < testFields.length; i++) { + fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); + } + */ + + Arrays.sort(testTerms); + int tokenUpto = 0; + for (int i = 0; i < testTerms.length; i++) { + positions[i] = new int[TERM_FREQ]; + offsets[i] = new TermVectorOffsetInfo[TERM_FREQ]; + // first position must be 0 + for (int j = 0; j < TERM_FREQ; j++) { + // positions are always sorted in increasing order + positions[i][j] = (int) (j * 10 + Math.random() * 10); + // offsets are always sorted in increasing order + offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].length()); + TestToken token = tokens[tokenUpto++] = new TestToken(); + token.text = testTerms[i]; + token.pos = positions[i][j]; + token.startOffset = offsets[i][j].getStartOffset(); + token.endOffset = offsets[i][j].getEndOffset(); + } + } + Arrays.sort(tokens); + + dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MyAnalyzer()).setMaxBufferedDocs(-1).setMergePolicy(newLogMergePolicy(false, 10))); + + Document doc = new Document(); + for(int i=0;i= tokens.length) + return false; + else { + final TestToken testToken = tokens[tokenUpto++]; + clearAttributes(); + termAtt.append(testToken.text); + offsetAtt.setOffset(testToken.startOffset, testToken.endOffset); + if (tokenUpto > 1) { + posIncrAtt.setPositionIncrement(testToken.pos - tokens[tokenUpto-2].pos); + } else { + posIncrAtt.setPositionIncrement(testToken.pos+1); + } + return true; + } + } + } + + private class MyAnalyzer extends Analyzer { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new MyTokenStream(); + } + } + + public void test() throws IOException { + //Check to see the files were created properly in setup + assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, IndexFileNames.VECTORS_DOCUMENTS_EXTENSION))); + assertTrue(dir.fileExists(IndexFileNames.segmentFileName(seg, IndexFileNames.VECTORS_INDEX_EXTENSION))); + } + + public void testReader() throws IOException { + TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); + for (int j = 0; j < 5; j++) { + TermFreqVector vector = reader.get(j, testFields[0]); + assertTrue(vector != null); + String[] terms = vector.getTerms(); + assertTrue(terms != null); + assertTrue(terms.length == testTerms.length); + for (int i = 0; i < terms.length; i++) { + String term = terms[i]; + //System.out.println("Term: " + term); + assertTrue(term.equals(testTerms[i])); + } + } + reader.close(); + } + + public void testPositionReader() throws IOException { + TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); + TermPositionVector vector; + String[] terms; + vector = (TermPositionVector) reader.get(0, testFields[0]); + assertTrue(vector != null); + terms = vector.getTerms(); + assertTrue(terms != null); + assertTrue(terms.length == testTerms.length); + for (int i = 0; i < terms.length; i++) { + String term = terms[i]; + //System.out.println("Term: " + term); + assertTrue(term.equals(testTerms[i])); + int[] positions = vector.getTermPositions(i); + assertTrue(positions != null); + assertTrue(positions.length == this.positions[i].length); + for (int j = 0; j < positions.length; j++) { + int position = positions[j]; + assertTrue(position == this.positions[i][j]); + } + TermVectorOffsetInfo[] offset = vector.getOffsets(i); + assertTrue(offset != null); + assertTrue(offset.length == this.offsets[i].length); + for (int j = 0; j < offset.length; j++) { + TermVectorOffsetInfo termVectorOffsetInfo = offset[j]; + assertTrue(termVectorOffsetInfo.equals(offsets[i][j])); + } + } + + TermFreqVector freqVector = reader.get(0, testFields[1]); //no pos, no offset + assertTrue(freqVector != null); + assertTrue(freqVector instanceof TermPositionVector == false); + terms = freqVector.getTerms(); + assertTrue(terms != null); + assertTrue(terms.length == testTerms.length); + for (int i = 0; i < terms.length; i++) { + String term = terms[i]; + //System.out.println("Term: " + term); + assertTrue(term.equals(testTerms[i])); + } + reader.close(); + } + + public void testOffsetReader() throws IOException { + TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); + TermPositionVector vector = (TermPositionVector) reader.get(0, testFields[0]); + assertTrue(vector != null); + String[] terms = vector.getTerms(); + assertTrue(terms != null); + assertTrue(terms.length == testTerms.length); + for (int i = 0; i < terms.length; i++) { + String term = terms[i]; + //System.out.println("Term: " + term); + assertTrue(term.equals(testTerms[i])); + int[] positions = vector.getTermPositions(i); + assertTrue(positions != null); + assertTrue(positions.length == this.positions[i].length); + for (int j = 0; j < positions.length; j++) { + int position = positions[j]; + assertTrue(position == this.positions[i][j]); + } + TermVectorOffsetInfo[] offset = vector.getOffsets(i); + assertTrue(offset != null); + assertTrue(offset.length == this.offsets[i].length); + for (int j = 0; j < offset.length; j++) { + TermVectorOffsetInfo termVectorOffsetInfo = offset[j]; + assertTrue(termVectorOffsetInfo.equals(offsets[i][j])); + } + } + reader.close(); + } + + public void testMapper() throws IOException { + TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); + SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); + reader.get(0, mapper); + SortedSet set = mapper.getTermVectorEntrySet(); + assertTrue("set is null and it shouldn't be", set != null); + //three fields, 4 terms, all terms are the same + assertTrue("set Size: " + set.size() + " is not: " + 4, set.size() == 4); + //Check offsets and positions + for (Iterator iterator = set.iterator(); iterator.hasNext();) { + TermVectorEntry tve = iterator.next(); + assertTrue("tve is null and it shouldn't be", tve != null); + assertTrue("tve.getOffsets() is null and it shouldn't be", tve.getOffsets() != null); + assertTrue("tve.getPositions() is null and it shouldn't be", tve.getPositions() != null); + + } + + mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); + reader.get(1, mapper); + set = mapper.getTermVectorEntrySet(); + assertTrue("set is null and it shouldn't be", set != null); + //three fields, 4 terms, all terms are the same + assertTrue("set Size: " + set.size() + " is not: " + 4, set.size() == 4); + //Should have offsets and positions b/c we are munging all the fields together + for (Iterator iterator = set.iterator(); iterator.hasNext();) { + TermVectorEntry tve = iterator.next(); + assertTrue("tve is null and it shouldn't be", tve != null); + assertTrue("tve.getOffsets() is null and it shouldn't be", tve.getOffsets() != null); + assertTrue("tve.getPositions() is null and it shouldn't be", tve.getPositions() != null); + + } + + + FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); + reader.get(0, fsMapper); + Map> map = fsMapper.getFieldToTerms(); + assertTrue("map Size: " + map.size() + " is not: " + testFields.length, map.size() == testFields.length); + for (Map.Entry> entry : map.entrySet()) { + SortedSet sortedSet = entry.getValue(); + assertTrue("sortedSet Size: " + sortedSet.size() + " is not: " + 4, sortedSet.size() == 4); + for (final TermVectorEntry tve : sortedSet) { + assertTrue("tve is null and it shouldn't be", tve != null); + //Check offsets and positions. + assertTrue("tve is null and it shouldn't be", tve != null); + String field = tve.getField(); + if (field.equals(testFields[0])) { + //should have offsets + + assertTrue("tve.getOffsets() is null and it shouldn't be", tve.getOffsets() != null); + assertTrue("tve.getPositions() is null and it shouldn't be", tve.getPositions() != null); + } + else if (field.equals(testFields[1])) { + //should not have offsets + + assertTrue("tve.getOffsets() is not null and it shouldn't be", tve.getOffsets() == null); + assertTrue("tve.getPositions() is not null and it shouldn't be", tve.getPositions() == null); + } + } + } + //Try mapper that ignores offs and positions + fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); + reader.get(0, fsMapper); + map = fsMapper.getFieldToTerms(); + assertTrue("map Size: " + map.size() + " is not: " + testFields.length, map.size() == testFields.length); + for (final Map.Entry> entry : map.entrySet()) { + SortedSet sortedSet = entry.getValue(); + assertTrue("sortedSet Size: " + sortedSet.size() + " is not: " + 4, sortedSet.size() == 4); + for (final TermVectorEntry tve : sortedSet) { + assertTrue("tve is null and it shouldn't be", tve != null); + //Check offsets and positions. + assertTrue("tve is null and it shouldn't be", tve != null); + String field = tve.getField(); + if (field.equals(testFields[0])) { + //should have offsets + + assertTrue("tve.getOffsets() is null and it shouldn't be", tve.getOffsets() == null); + assertTrue("tve.getPositions() is null and it shouldn't be", tve.getPositions() == null); + } + else if (field.equals(testFields[1])) { + //should not have offsets + + assertTrue("tve.getOffsets() is not null and it shouldn't be", tve.getOffsets() == null); + assertTrue("tve.getPositions() is not null and it shouldn't be", tve.getPositions() == null); + } + } + } + + // test setDocumentNumber() + IndexReader ir = IndexReader.open(dir, true); + DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); + assertEquals(-1, docNumAwareMapper.getDocumentNumber()); + + ir.getTermFreqVector(0, docNumAwareMapper); + assertEquals(0, docNumAwareMapper.getDocumentNumber()); + docNumAwareMapper.setDocumentNumber(-1); + + ir.getTermFreqVector(1, docNumAwareMapper); + assertEquals(1, docNumAwareMapper.getDocumentNumber()); + docNumAwareMapper.setDocumentNumber(-1); + + ir.getTermFreqVector(0, "f1", docNumAwareMapper); + assertEquals(0, docNumAwareMapper.getDocumentNumber()); + docNumAwareMapper.setDocumentNumber(-1); + + ir.getTermFreqVector(1, "f2", docNumAwareMapper); + assertEquals(1, docNumAwareMapper.getDocumentNumber()); + docNumAwareMapper.setDocumentNumber(-1); + + ir.getTermFreqVector(0, "f1", docNumAwareMapper); + assertEquals(0, docNumAwareMapper.getDocumentNumber()); + + ir.close(); + reader.close(); + } + + + /** + * Make sure exceptions and bad params are handled appropriately + */ + public void testBadParams() throws IOException { + TermVectorsReader reader = null; + try { + reader = new TermVectorsReader(dir, seg, fieldInfos); + //Bad document number, good field number + reader.get(50, testFields[0]); + fail(); + } catch (IOException e) { + // expected exception + } finally { + reader.close(); + } + try { + reader = new TermVectorsReader(dir, seg, fieldInfos); + //Bad document number, no field + reader.get(50); + fail(); + } catch (IOException e) { + // expected exception + } finally { + reader.close(); + } + try { + reader = new TermVectorsReader(dir, seg, fieldInfos); + //good document number, bad field number + TermFreqVector vector = reader.get(0, "f50"); + assertTrue(vector == null); + reader.close(); + } catch (IOException e) { + fail(); + } finally { + reader.close(); + } + } + + + public static class DocNumAwareMapper extends TermVectorMapper { + + public DocNumAwareMapper() { + } + + private int documentNumber = -1; + + @Override + public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) { + if (documentNumber == -1) { + throw new RuntimeException("Documentnumber should be set at this point!"); + } + } + + @Override + public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { + if (documentNumber == -1) { + throw new RuntimeException("Documentnumber should be set at this point!"); + } + } + + public int getDocumentNumber() { + return documentNumber; + } + + @Override + public void setDocumentNumber(int documentNumber) { + this.documentNumber = documentNumber; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestTermVectorsWriter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestTermVectorsWriter.java new file mode 100644 index 0000000..b6a250c --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestTermVectorsWriter.java @@ -0,0 +1,475 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TeeSinkTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +/** tests for writing term vectors */ +public class TestTermVectorsWriter extends LuceneTestCase { + // LUCENE-1442 + public void testDoubleOffsetCounting() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + Field f = newField("field", "abcd", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f); + doc.add(f); + Field f2 = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f2); + doc.add(f); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); + + // Token "" occurred once + assertEquals(1, termOffsets.length); + assertEquals(8, termOffsets[0].getStartOffset()); + assertEquals(8, termOffsets[0].getEndOffset()); + + // Token "abcd" occurred three times + termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(1); + assertEquals(3, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + assertEquals(4, termOffsets[1].getStartOffset()); + assertEquals(8, termOffsets[1].getEndOffset()); + assertEquals(8, termOffsets[2].getStartOffset()); + assertEquals(12, termOffsets[2].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1442 + public void testDoubleOffsetCounting2() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + Field f = newField("field", "abcd", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f); + doc.add(f); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); + assertEquals(2, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + assertEquals(5, termOffsets[1].getStartOffset()); + assertEquals(9, termOffsets[1].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1448 + public void testEndOffsetPositionCharAnalyzer() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + Field f = newField("field", "abcd ", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f); + doc.add(f); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); + assertEquals(2, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + assertEquals(8, termOffsets[1].getStartOffset()); + assertEquals(12, termOffsets[1].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1448 + public void testEndOffsetPositionWithCachingTokenFilter() throws Exception { + Directory dir = newDirectory(); + Analyzer analyzer = new MockAnalyzer(random); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd ")); + stream.reset(); // TODO: wierd to reset before wrapping with CachingTokenFilter... correct? + stream = new CachingTokenFilter(stream); + Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f); + doc.add(f); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); + assertEquals(2, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + assertEquals(8, termOffsets[1].getStartOffset()); + assertEquals(12, termOffsets[1].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1448 + public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception { + MockDirectoryWrapper dir = newDirectory(); + Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); + TokenStream sink = tee.newSinkTokenStream(); + Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS); + Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f1); + doc.add(f2); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); + assertEquals(2, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + assertEquals(8, termOffsets[1].getStartOffset()); + assertEquals(12, termOffsets[1].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1448 + public void testEndOffsetPositionStopFilter() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); + Document doc = new Document(); + Field f = newField("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f); + doc.add(f); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); + assertEquals(2, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + assertEquals(9, termOffsets[1].getStartOffset()); + assertEquals(13, termOffsets[1].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1448 + public void testEndOffsetPositionStandard() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + Field f = newField("field", "abcd the ", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + Field f2 = newField("field", "crunch man", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f); + doc.add(f2); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); + TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); + assertEquals(1, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + termOffsets = tpv.getOffsets(1); + assertEquals(11, termOffsets[0].getStartOffset()); + assertEquals(17, termOffsets[0].getEndOffset()); + termOffsets = tpv.getOffsets(2); + assertEquals(18, termOffsets[0].getStartOffset()); + assertEquals(21, termOffsets[0].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1448 + public void testEndOffsetPositionStandardEmptyField() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + Field f = newField("field", "", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + Field f2 = newField("field", "crunch man", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f); + doc.add(f2); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); + TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); + assertEquals(1, termOffsets.length); + assertEquals(1, termOffsets[0].getStartOffset()); + assertEquals(7, termOffsets[0].getEndOffset()); + termOffsets = tpv.getOffsets(1); + assertEquals(8, termOffsets[0].getStartOffset()); + assertEquals(11, termOffsets[0].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1448 + public void testEndOffsetPositionStandardEmptyField2() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + + Field f = newField("field", "abcd", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f); + doc.add(newField("field", "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + + Field f2 = newField("field", "crunch", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(f2); + + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); + TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); + assertEquals(1, termOffsets.length); + assertEquals(0, termOffsets[0].getStartOffset()); + assertEquals(4, termOffsets[0].getEndOffset()); + termOffsets = tpv.getOffsets(1); + assertEquals(6, termOffsets[0].getStartOffset()); + assertEquals(12, termOffsets[0].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-1168 + public void testTermVectorCorruption() throws IOException { + + Directory dir = newDirectory(); + for(int iter=0;iter<2;iter++) { + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(2).setRAMBufferSizeMB( + IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler( + new SerialMergeScheduler()).setMergePolicy( + new LogDocMergePolicy())); + + Document document = new Document(); + + Field storedField = newField("stored", "stored", Field.Store.YES, + Field.Index.NO); + document.add(storedField); + writer.addDocument(document); + writer.addDocument(document); + + document = new Document(); + document.add(storedField); + Field termVectorField = newField("termVector", "termVector", + Field.Store.NO, Field.Index.NOT_ANALYZED, + Field.TermVector.WITH_POSITIONS_OFFSETS); + + document.add(termVectorField); + writer.addDocument(document); + writer.optimize(); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + for(int i=0;i= 0) { + clearAttributes(); + termAtt.append(value); + return true; + } + return false; + } +} + + +public class TestTermdocPerf extends LuceneTestCase { + + void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException { + final RepeatingTokenStream ts = new RepeatingTokenStream(val); + + Analyzer analyzer = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + if (random.nextFloat() < percentDocs) ts.num = random.nextInt(maxTF)+1; + else ts.num=0; + return ts; + } + }; + + Document doc = new Document(); + doc.add(newField(field,val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer) + .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(100)); + ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(100); + + for (int i=0; i commits = IndexReader.listCommits(dir); + for (Iterator iterator = commits.iterator(); iterator.hasNext();) { + IndexCommit commit = iterator.next(); + Map ud=commit.getUserData(); + if (ud.size() > 0) + if (ud.get("index").endsWith(ids)) + last=commit; + } + + if (last==null) + throw new RuntimeException("Couldn't find commit point "+id); + + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy( + new RollbackDeletionPolicy(id)).setIndexCommit(last)); + Map data = new HashMap(); + data.put("index", "Rolled back to 1-"+id); + w.commit(data); + w.close(); + } + + public void testRepeatedRollBacks() throws Exception { + + int expectedLastRecordId=100; + while (expectedLastRecordId>10) { + expectedLastRecordId -=10; + rollBackLast(expectedLastRecordId); + + BitSet expecteds = new BitSet(100); + expecteds.set(1,(expectedLastRecordId+1),true); + checkExpecteds(expecteds); + } + } + + private void checkExpecteds(BitSet expecteds) throws Exception { + IndexReader r = IndexReader.open(dir, true); + + //Perhaps not the most efficient approach but meets our needs here. + for (int i = 0; i < r.maxDoc(); i++) { + if(!r.isDeleted(i)) { + String sval=r.document(i).get(FIELD_RECORD_ID); + if(sval!=null) { + int val=Integer.parseInt(sval); + assertTrue("Did not expect document #"+val, expecteds.get(val)); + expecteds.set(val,false); + } + } + } + r.close(); + assertEquals("Should have 0 docs remaining ", 0 ,expecteds.cardinality()); + } + + /* + private void showAvailableCommitPoints() throws Exception { + Collection commits = IndexReader.listCommits(dir); + for (Iterator iterator = commits.iterator(); iterator.hasNext();) { + IndexCommit comm = (IndexCommit) iterator.next(); + System.out.print("\t Available commit point:["+comm.getUserData()+"] files="); + Collection files = comm.getFileNames(); + for (Iterator iterator2 = files.iterator(); iterator2.hasNext();) { + String filename = (String) iterator2.next(); + System.out.print(filename+", "); + } + System.out.println(); + } + } + */ + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + //Build index, of records 1 to 100, committing after each batch of 10 + IndexDeletionPolicy sdp=new KeepAllDeletionPolicy(); + IndexWriter w=new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(sdp)); + for(int currentRecordId=1;currentRecordId<=100;currentRecordId++) { + Document doc=new Document(); + doc.add(newField(FIELD_RECORD_ID,""+currentRecordId,Field.Store.YES,Field.Index.ANALYZED)); + w.addDocument(doc); + + if (currentRecordId%10 == 0) { + Map data = new HashMap(); + data.put("index", "records 1-"+currentRecordId); + w.commit(data); + } + } + + w.close(); + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + // Rolls back to previous commit point + class RollbackDeletionPolicy implements IndexDeletionPolicy { + private int rollbackPoint; + + public RollbackDeletionPolicy(int rollbackPoint) { + this.rollbackPoint = rollbackPoint; + } + + public void onCommit(List commits) throws IOException { + } + + public void onInit(List commits) throws IOException { + for (final IndexCommit commit : commits) { + Map userData=commit.getUserData(); + if (userData.size() > 0) { + // Label for a commit point is "Records 1-30" + // This code reads the last id ("30" in this example) and deletes it + // if it is after the desired rollback point + String x = userData.get("index"); + String lastVal = x.substring(x.lastIndexOf("-")+1); + int last = Integer.parseInt(lastVal); + if (last>rollbackPoint) { + /* + System.out.print("\tRolling back commit point:" + + " UserData="+commit.getUserData() +") ("+(commits.size()-1)+" commit points left) files="); + Collection files = commit.getFileNames(); + for (Iterator iterator2 = files.iterator(); iterator2.hasNext();) { + System.out.print(" "+iterator2.next()); + } + System.out.println(); + */ + + commit.delete(); + } + } + } + } + } + + class DeleteLastCommitPolicy implements IndexDeletionPolicy { + + public void onCommit(List commits) throws IOException {} + + public void onInit(List commits) throws IOException { + commits.get(commits.size()-1).delete(); + } + } + + public void testRollbackDeletionPolicy() throws Exception { + for(int i=0;i<2;i++) { + // Unless you specify a prior commit point, rollback + // should not work: + new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setIndexDeletionPolicy(new DeleteLastCommitPolicy())).close(); + IndexReader r = IndexReader.open(dir, true); + assertEquals(100, r.numDocs()); + r.close(); + } + } + + // Keeps all commit points (used to build index) + class KeepAllDeletionPolicy implements IndexDeletionPolicy { + public void onCommit(List commits) throws IOException {} + public void onInit(List commits) throws IOException {} + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestTransactions.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestTransactions.java new file mode 100644 index 0000000..6e5419f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/TestTransactions.java @@ -0,0 +1,237 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; + +public class TestTransactions extends LuceneTestCase { + + private static volatile boolean doFail; + + private class RandomFailure extends MockDirectoryWrapper.Failure { + @Override + public void eval(MockDirectoryWrapper dir) throws IOException { + if (TestTransactions.doFail && random.nextInt() % 10 <= 3) + throw new IOException("now failing randomly but on purpose"); + } + } + + private static abstract class TimedThread extends Thread { + volatile boolean failed; + private static float RUN_TIME_MSEC = atLeast(500); + private TimedThread[] allThreads; + + abstract public void doWork() throws Throwable; + + TimedThread(TimedThread[] threads) { + this.allThreads = threads; + } + + @Override + public void run() { + final long stopTime = System.currentTimeMillis() + (long) (RUN_TIME_MSEC); + + try { + do { + if (anyErrors()) break; + doWork(); + } while (System.currentTimeMillis() < stopTime); + } catch (Throwable e) { + System.out.println(Thread.currentThread() + ": exc"); + e.printStackTrace(System.out); + failed = true; + } + } + + private boolean anyErrors() { + for(int i=0;i wordSet1 = WordlistLoader.getWordSet(new StringReader(s)); + checkSet(wordSet1); + HashSet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s))); + checkSet(wordSet2); + } + + public void testComments() throws Exception { + String s = "ONE\n two \nthree\n#comment"; + HashSet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#"); + checkSet(wordSet1); + assertFalse(wordSet1.contains("#comment")); + assertFalse(wordSet1.contains("comment")); + } + + + private void checkSet(HashSet wordset) { + assertEquals(3, wordset.size()); + assertTrue(wordset.contains("ONE")); // case is not modified + assertTrue(wordset.contains("two")); // surrounding whitespace is removed + assertTrue(wordset.contains("three")); + assertFalse(wordset.contains("four")); + } + + /** + * Test stopwords in snowball format + */ + public void testSnowballListLoading() throws IOException { + String s = + "|comment\n" + // commented line + " |comment\n" + // commented line with leading whitespace + "\n" + // blank line + " \t\n" + // line with only whitespace + " |comment | comment\n" + // commented line with comment + "ONE\n" + // stopword, in uppercase + " two \n" + // stopword with leading/trailing space + " three four five \n" + // multiple stopwords + "six seven | comment\n"; //multiple stopwords + comment + Set wordset = WordlistLoader.getSnowballWordSet(new StringReader(s)); + assertEquals(7, wordset.size()); + assertTrue(wordset.contains("ONE")); + assertTrue(wordset.contains("two")); + assertTrue(wordset.contains("three")); + assertTrue(wordset.contains("four")); + assertTrue(wordset.contains("five")); + assertTrue(wordset.contains("six")); + assertTrue(wordset.contains("seven")); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.19.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.19.cfs.zip new file mode 100644 index 0000000..4fd9b32 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.19.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.19.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.19.nocfs.zip new file mode 100644 index 0000000..e0d9142 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.19.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.20.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.20.cfs.zip new file mode 100644 index 0000000..4b931ae Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.20.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.20.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.20.nocfs.zip new file mode 100644 index 0000000..1275cdf Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.20.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.21.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.21.cfs.zip new file mode 100644 index 0000000..473c138 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.21.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.21.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.21.nocfs.zip new file mode 100644 index 0000000..d0582d0 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.21.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.22.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.22.cfs.zip new file mode 100644 index 0000000..1236307 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.22.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.22.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.22.nocfs.zip new file mode 100644 index 0000000..216ddf3 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.22.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.23.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.23.cfs.zip new file mode 100644 index 0000000..b5fdeef Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.23.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.23.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.23.nocfs.zip new file mode 100644 index 0000000..9137ae6 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.23.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.24.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.24.cfs.zip new file mode 100644 index 0000000..2c666a9 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.24.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.24.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.24.nocfs.zip new file mode 100644 index 0000000..c223875 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.24.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.29.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.29.cfs.zip new file mode 100644 index 0000000..c694c78 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.29.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.29.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.29.nocfs.zip new file mode 100644 index 0000000..298cab7 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.29.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.30.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.30.cfs.zip new file mode 100644 index 0000000..d5978c8 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.30.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.30.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.30.nocfs.zip new file mode 100644 index 0000000..28cd83b Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.30.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.cfs.zip new file mode 100644 index 0000000..8f123a7 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.nocfs.zip new file mode 100644 index 0000000..21434e1 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip new file mode 100644 index 0000000..200c710 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip new file mode 100644 index 0000000..9a158f1 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/wordliststopwords.txt b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/wordliststopwords.txt new file mode 100644 index 0000000..7d35507 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/wordliststopwords.txt @@ -0,0 +1,5 @@ +#comment +ONE +two +#comment +three diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt new file mode 100644 index 0000000..59cb04e --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt @@ -0,0 +1,3 @@ +ONE +two +three diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle.java new file mode 100644 index 0000000..d12b6d5 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle.java @@ -0,0 +1,40 @@ +package org.apache.lucene.messages; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +public class MessagesTestBundle extends NLS { + + private static final String BUNDLE_NAME = MessagesTestBundle.class.getName(); + + private MessagesTestBundle() { + // should never be instantiated + } + + static { + // register all string ids with NLS class and initialize static string + // values + NLS.initializeMessages(BUNDLE_NAME, MessagesTestBundle.class); + } + + // static string must match the strings in the property files. + public static String Q0001E_INVALID_SYNTAX; + public static String Q0004E_INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION; + + // this message is missing from the properties file + public static String Q0005E_MESSAGE_NOT_IN_BUNDLE; +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle.properties b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle.properties new file mode 100644 index 0000000..870ff73 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle.properties @@ -0,0 +1,3 @@ +Q0001E_INVALID_SYNTAX = Syntax Error: {0} + +Q0004E_INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION = Truncated unicode escape sequence. diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle_ja.properties b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle_ja.properties new file mode 100644 index 0000000..2235294 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/MessagesTestBundle_ja.properties @@ -0,0 +1,3 @@ +Q0001E_INVALID_SYNTAX = \u69cb\u6587\u30a8\u30e9\u30fc: {0} + +Q0004E_INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION = \u5207\u308a\u6368\u3066\u3089\u308c\u305f\u30e6\u30cb\u30b3\u30fc\u30c9\u30fb\u30a8\u30b9\u30b1\u30fc\u30d7\u30fb\u30b7\u30fc\u30b1\u30f3\u30b9\u3002 diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/TestNLS.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/TestNLS.java new file mode 100644 index 0000000..c9ea9c1 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/messages/TestNLS.java @@ -0,0 +1,106 @@ +package org.apache.lucene.messages; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Locale; + +import org.apache.lucene.util.LuceneTestCase; + +/** + */ +public class TestNLS extends LuceneTestCase { + public void testMessageLoading() { + Message invalidSyntax = new MessageImpl( + MessagesTestBundle.Q0001E_INVALID_SYNTAX, "XXX"); + /* + * if the default locale is ja, you get ja as a fallback: + * see ResourceBundle.html#getBundle(java.lang.String, java.util.Locale, java.lang.ClassLoader) + */ + if (!Locale.getDefault().getLanguage().equals("ja")) + assertEquals("Syntax Error: XXX", invalidSyntax.getLocalizedMessage(Locale.ENGLISH)); + } + + public void testMessageLoading_ja() { + Message invalidSyntax = new MessageImpl( + MessagesTestBundle.Q0001E_INVALID_SYNTAX, "XXX"); + assertEquals("構文エラー: XXX", invalidSyntax + .getLocalizedMessage(Locale.JAPANESE)); + } + + public void testNLSLoading() { + String message = NLS + .getLocalizedMessage(MessagesTestBundle.Q0004E_INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION, Locale.ENGLISH); + /* + * if the default locale is ja, you get ja as a fallback: + * see ResourceBundle.html#getBundle(java.lang.String, java.util.Locale, java.lang.ClassLoader) + */ + if (!Locale.getDefault().getLanguage().equals("ja")) + assertEquals("Truncated unicode escape sequence.", message); + + message = NLS.getLocalizedMessage(MessagesTestBundle.Q0001E_INVALID_SYNTAX, Locale.ENGLISH, + "XXX"); + /* + * if the default locale is ja, you get ja as a fallback: + * see ResourceBundle.html#getBundle(java.lang.String, java.util.Locale, java.lang.ClassLoader) + */ + if (!Locale.getDefault().getLanguage().equals("ja")) + assertEquals("Syntax Error: XXX", message); + } + + public void testNLSLoading_ja() { + String message = NLS.getLocalizedMessage( + MessagesTestBundle.Q0004E_INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION, + Locale.JAPANESE); + assertEquals("切り捨てられたユニコード・エスケープ・シーケンス。", message); + + message = NLS.getLocalizedMessage(MessagesTestBundle.Q0001E_INVALID_SYNTAX, + Locale.JAPANESE, "XXX"); + assertEquals("構文エラー: XXX", message); + } + + public void testNLSLoading_xx_XX() { + Locale locale = new Locale("xx", "XX", ""); + String message = NLS.getLocalizedMessage( + MessagesTestBundle.Q0004E_INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION, + locale); + /* + * if the default locale is ja, you get ja as a fallback: + * see ResourceBundle.html#getBundle(java.lang.String, java.util.Locale, java.lang.ClassLoader) + */ + if (!Locale.getDefault().getLanguage().equals("ja")) + assertEquals("Truncated unicode escape sequence.", message); + + message = NLS.getLocalizedMessage(MessagesTestBundle.Q0001E_INVALID_SYNTAX, + locale, "XXX"); + /* + * if the default locale is ja, you get ja as a fallback: + * see ResourceBundle.html#getBundle(java.lang.String, java.util.Locale, java.lang.ClassLoader) + */ + if (!Locale.getDefault().getLanguage().equals("ja")) + assertEquals("Syntax Error: XXX", message); + } + + public void testMissingMessage() { + Locale locale = Locale.ENGLISH; + String message = NLS.getLocalizedMessage( + MessagesTestBundle.Q0005E_MESSAGE_NOT_IN_BUNDLE, locale); + + assertEquals("Message with key:Q0005E_MESSAGE_NOT_IN_BUNDLE and locale: " + + locale.toString() + " not found.", message); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java new file mode 100644 index 0000000..c7b641a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java @@ -0,0 +1,278 @@ +package org.apache.lucene.queryParser; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.search.Query; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.util.Version; + +/** + * Test QueryParser's ability to deal with Analyzers that return more + * than one token per position or that return tokens with a position + * increment > 1. + * + */ +public class TestMultiAnalyzer extends BaseTokenStreamTestCase { + + private static int multiToken = 0; + + public void testMultiAnalyzer() throws ParseException { + + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "", new MultiAnalyzer()); + + // trivial, no multiple tokens: + assertEquals("foo", qp.parse("foo").toString()); + assertEquals("foo", qp.parse("\"foo\"").toString()); + assertEquals("foo foobar", qp.parse("foo foobar").toString()); + assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"").toString()); + assertEquals("\"foo foobar blah\"", qp.parse("\"foo foobar blah\"").toString()); + + // two tokens at the same position: + assertEquals("(multi multi2) foo", qp.parse("multi foo").toString()); + assertEquals("foo (multi multi2)", qp.parse("foo multi").toString()); + assertEquals("(multi multi2) (multi multi2)", qp.parse("multi multi").toString()); + assertEquals("+(foo (multi multi2)) +(bar (multi multi2))", + qp.parse("+(foo multi) +(bar multi)").toString()); + assertEquals("+(foo (multi multi2)) field:\"bar (multi multi2)\"", + qp.parse("+(foo multi) field:\"bar multi\"").toString()); + + // phrases: + assertEquals("\"(multi multi2) foo\"", qp.parse("\"multi foo\"").toString()); + assertEquals("\"foo (multi multi2)\"", qp.parse("\"foo multi\"").toString()); + assertEquals("\"foo (multi multi2) foobar (multi multi2)\"", + qp.parse("\"foo multi foobar multi\"").toString()); + + // fields: + assertEquals("(field:multi field:multi2) field:foo", qp.parse("field:multi field:foo").toString()); + assertEquals("field:\"(multi multi2) foo\"", qp.parse("field:\"multi foo\"").toString()); + + // three tokens at one position: + assertEquals("triplemulti multi3 multi2", qp.parse("triplemulti").toString()); + assertEquals("foo (triplemulti multi3 multi2) foobar", + qp.parse("foo triplemulti foobar").toString()); + + // phrase with non-default slop: + assertEquals("\"(multi multi2) foo\"~10", qp.parse("\"multi foo\"~10").toString()); + + // phrase with non-default boost: + assertEquals("\"(multi multi2) foo\"^2.0", qp.parse("\"multi foo\"^2").toString()); + + // phrase after changing default slop + qp.setPhraseSlop(99); + assertEquals("\"(multi multi2) foo\"~99 bar", + qp.parse("\"multi foo\" bar").toString()); + assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2", + qp.parse("\"multi foo\" \"foo bar\"~2").toString()); + qp.setPhraseSlop(0); + + // non-default operator: + qp.setDefaultOperator(QueryParser.AND_OPERATOR); + assertEquals("+(multi multi2) +foo", qp.parse("multi foo").toString()); + + } + + public void testMultiAnalyzerWithSubclassOfQueryParser() throws ParseException { + + DumbQueryParser qp = new DumbQueryParser("", new MultiAnalyzer()); + qp.setPhraseSlop(99); // modified default slop + + // direct call to (super's) getFieldQuery to demonstrate differnce + // between phrase and multiphrase with modified default slop + assertEquals("\"foo bar\"~99", + qp.getSuperFieldQuery("","foo bar", true).toString()); + assertEquals("\"(multi multi2) bar\"~99", + qp.getSuperFieldQuery("","multi bar", true).toString()); + + + // ask sublcass to parse phrase with modified default slop + assertEquals("\"(multi multi2) foo\"~99 bar", + qp.parse("\"multi foo\" bar").toString()); + + } + + public void testPosIncrementAnalyzer() throws ParseException { + QueryParser qp = new QueryParser(Version.LUCENE_24, "", new PosIncrementAnalyzer()); + assertEquals("quick brown", qp.parse("the quick brown").toString()); + assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"").toString()); + assertEquals("quick brown fox", qp.parse("the quick brown fox").toString()); + assertEquals("\"quick brown fox\"", qp.parse("\"the quick brown fox\"").toString()); + } + + /** + * Expands "multi" to "multi" and "multi2", both at the same position, + * and expands "triplemulti" to "triplemulti", "multi3", and "multi2". + */ + private class MultiAnalyzer extends Analyzer { + + public MultiAnalyzer() { + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + result = new TestFilter(result); + result = new LowerCaseFilter(TEST_VERSION_CURRENT, result); + return result; + } + } + + private final class TestFilter extends TokenFilter { + + private String prevType; + private int prevStartOffset; + private int prevEndOffset; + + CharTermAttribute termAtt; + PositionIncrementAttribute posIncrAtt; + OffsetAttribute offsetAtt; + TypeAttribute typeAtt; + + public TestFilter(TokenStream in) { + super(in); + termAtt = addAttribute(CharTermAttribute.class); + posIncrAtt = addAttribute(PositionIncrementAttribute.class); + offsetAtt = addAttribute(OffsetAttribute.class); + typeAtt = addAttribute(TypeAttribute.class); + } + + @Override + public final boolean incrementToken() throws java.io.IOException { + if (multiToken > 0) { + termAtt.setEmpty().append("multi"+(multiToken+1)); + offsetAtt.setOffset(prevStartOffset, prevEndOffset); + typeAtt.setType(prevType); + posIncrAtt.setPositionIncrement(0); + multiToken--; + return true; + } else { + boolean next = input.incrementToken(); + if (next == false) { + return false; + } + prevType = typeAtt.type(); + prevStartOffset = offsetAtt.startOffset(); + prevEndOffset = offsetAtt.endOffset(); + String text = termAtt.toString(); + if (text.equals("triplemulti")) { + multiToken = 2; + return true; + } else if (text.equals("multi")) { + multiToken = 1; + return true; + } else { + return true; + } + } + } + } + + /** + * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). + * Does not work correctly for input other than "the quick brown ...". + */ + private class PosIncrementAnalyzer extends Analyzer { + + public PosIncrementAnalyzer() { + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + result = new TestPosIncrementFilter(result); + result = new LowerCaseFilter(TEST_VERSION_CURRENT, result); + return result; + } + } + + private final class TestPosIncrementFilter extends TokenFilter { + + CharTermAttribute termAtt; + PositionIncrementAttribute posIncrAtt; + + public TestPosIncrementFilter(TokenStream in) { + super(in); + termAtt = addAttribute(CharTermAttribute.class); + posIncrAtt = addAttribute(PositionIncrementAttribute.class); + } + + @Override + public final boolean incrementToken () throws java.io.IOException { + while(input.incrementToken()) { + if (termAtt.toString().equals("the")) { + // stopword, do nothing + } else if (termAtt.toString().equals("quick")) { + posIncrAtt.setPositionIncrement(2); + return true; + } else { + posIncrAtt.setPositionIncrement(1); + return true; + } + } + return false; + } + } + + /** a very simple subclass of QueryParser */ + private final static class DumbQueryParser extends QueryParser { + + public DumbQueryParser(String f, Analyzer a) { + super(TEST_VERSION_CURRENT, f, a); + } + + /** expose super's version */ + public Query getSuperFieldQuery(String f, String t, boolean quoted) + throws ParseException { + return super.getFieldQuery(f,t,quoted); + } + /** wrap super's version */ + @Override + protected Query getFieldQuery(String f, String t, boolean quoted) + throws ParseException { + return new DumbQueryWrapper(getSuperFieldQuery(f,t,quoted)); + } + } + + /** + * A very simple wrapper to prevent instanceof checks but uses + * the toString of the query it wraps. + */ + private final static class DumbQueryWrapper extends Query { + + private Query q; + public DumbQueryWrapper(Query q) { + super(); + this.q = q; + } + @Override + public String toString(String f) { + return q.toString(f); + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java new file mode 100644 index 0000000..9aea4db --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java @@ -0,0 +1,329 @@ +package org.apache.lucene.queryParser; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests QueryParser. + */ +public class TestMultiFieldQueryParser extends LuceneTestCase { + + /** test stop words parsing for both the non static form, and for the + * corresponding static form (qtxt, fields[]). */ + public void testStopwordsParsing() throws Exception { + assertStopQueryEquals("one", "b:one t:one"); + assertStopQueryEquals("one stop", "b:one t:one"); + assertStopQueryEquals("one (stop)", "b:one t:one"); + assertStopQueryEquals("one ((stop))", "b:one t:one"); + assertStopQueryEquals("stop", ""); + assertStopQueryEquals("(stop)", ""); + assertStopQueryEquals("((stop))", ""); + } + + // verify parsing of query using a stopping analyzer + private void assertStopQueryEquals (String qtxt, String expectedRes) throws Exception { + String[] fields = {"b", "t"}; + Occur occur[] = {Occur.SHOULD, Occur.SHOULD}; + TestQueryParser.QPTestAnalyzer a = new TestQueryParser.QPTestAnalyzer(); + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, a); + + Query q = mfqp.parse(qtxt); + assertEquals(expectedRes, q.toString()); + + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, qtxt, fields, occur, a); + assertEquals(expectedRes, q.toString()); + } + + public void testSimple() throws Exception { + String[] fields = {"b", "t"}; + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random)); + + Query q = mfqp.parse("one"); + assertEquals("b:one t:one", q.toString()); + + q = mfqp.parse("one two"); + assertEquals("(b:one t:one) (b:two t:two)", q.toString()); + + q = mfqp.parse("+one +two"); + assertEquals("+(b:one t:one) +(b:two t:two)", q.toString()); + + q = mfqp.parse("+one -two -three"); + assertEquals("+(b:one t:one) -(b:two t:two) -(b:three t:three)", q.toString()); + + q = mfqp.parse("one^2 two"); + assertEquals("((b:one t:one)^2.0) (b:two t:two)", q.toString()); + + q = mfqp.parse("one~ two"); + assertEquals("(b:one~0.5 t:one~0.5) (b:two t:two)", q.toString()); + + q = mfqp.parse("one~0.8 two^2"); + assertEquals("(b:one~0.8 t:one~0.8) ((b:two t:two)^2.0)", q.toString()); + + q = mfqp.parse("one* two*"); + assertEquals("(b:one* t:one*) (b:two* t:two*)", q.toString()); + + q = mfqp.parse("[a TO c] two"); + assertEquals("(b:[a TO c] t:[a TO c]) (b:two t:two)", q.toString()); + + q = mfqp.parse("w?ldcard"); + assertEquals("b:w?ldcard t:w?ldcard", q.toString()); + + q = mfqp.parse("\"foo bar\""); + assertEquals("b:\"foo bar\" t:\"foo bar\"", q.toString()); + + q = mfqp.parse("\"aa bb cc\" \"dd ee\""); + assertEquals("(b:\"aa bb cc\" t:\"aa bb cc\") (b:\"dd ee\" t:\"dd ee\")", q.toString()); + + q = mfqp.parse("\"foo bar\"~4"); + assertEquals("b:\"foo bar\"~4 t:\"foo bar\"~4", q.toString()); + + // LUCENE-1213: MultiFieldQueryParser was ignoring slop when phrase had a field. + q = mfqp.parse("b:\"foo bar\"~4"); + assertEquals("b:\"foo bar\"~4", q.toString()); + + // make sure that terms which have a field are not touched: + q = mfqp.parse("one f:two"); + assertEquals("(b:one t:one) f:two", q.toString()); + + // AND mode: + mfqp.setDefaultOperator(QueryParser.AND_OPERATOR); + q = mfqp.parse("one two"); + assertEquals("+(b:one t:one) +(b:two t:two)", q.toString()); + q = mfqp.parse("\"aa bb cc\" \"dd ee\""); + assertEquals("+(b:\"aa bb cc\" t:\"aa bb cc\") +(b:\"dd ee\" t:\"dd ee\")", q.toString()); + + } + + public void testBoostsSimple() throws Exception { + Map boosts = new HashMap(); + boosts.put("b", Float.valueOf(5)); + boosts.put("t", Float.valueOf(10)); + String[] fields = {"b", "t"}; + MultiFieldQueryParser mfqp = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new MockAnalyzer(random), boosts); + + + //Check for simple + Query q = mfqp.parse("one"); + assertEquals("b:one^5.0 t:one^10.0", q.toString()); + + //Check for AND + q = mfqp.parse("one AND two"); + assertEquals("+(b:one^5.0 t:one^10.0) +(b:two^5.0 t:two^10.0)", q.toString()); + + //Check for OR + q = mfqp.parse("one OR two"); + assertEquals("(b:one^5.0 t:one^10.0) (b:two^5.0 t:two^10.0)", q.toString()); + + //Check for AND and a field + q = mfqp.parse("one AND two AND foo:test"); + assertEquals("+(b:one^5.0 t:one^10.0) +(b:two^5.0 t:two^10.0) +foo:test", q.toString()); + + q = mfqp.parse("one^3 AND two^4"); + assertEquals("+((b:one^5.0 t:one^10.0)^3.0) +((b:two^5.0 t:two^10.0)^4.0)", q.toString()); + } + + public void testStaticMethod1() throws ParseException { + String[] fields = {"b", "t"}; + String[] queries = {"one", "two"}; + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, new MockAnalyzer(random)); + assertEquals("b:one t:two", q.toString()); + + String[] queries2 = {"+one", "+two"}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries2, fields, new MockAnalyzer(random)); + assertEquals("(+b:one) (+t:two)", q.toString()); + + String[] queries3 = {"one", "+two"}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries3, fields, new MockAnalyzer(random)); + assertEquals("b:one (+t:two)", q.toString()); + + String[] queries4 = {"one +more", "+two"}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries4, fields, new MockAnalyzer(random)); + assertEquals("(b:one +b:more) (+t:two)", q.toString()); + + String[] queries5 = {"blah"}; + try { + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries5, fields, new MockAnalyzer(random)); + fail(); + } catch(IllegalArgumentException e) { + // expected exception, array length differs + } + + // check also with stop words for this static form (qtxts[], fields[]). + TestQueryParser.QPTestAnalyzer stopA = new TestQueryParser.QPTestAnalyzer(); + + String[] queries6 = {"((+stop))", "+((stop))"}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries6, fields, stopA); + assertEquals("", q.toString()); + + String[] queries7 = {"one ((+stop)) +more", "+((stop)) +two"}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries7, fields, stopA); + assertEquals("(b:one +b:more) (+t:two)", q.toString()); + + } + + public void testStaticMethod2() throws ParseException { + String[] fields = {"b", "t"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer(random)); + assertEquals("+b:one -t:one", q.toString()); + + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer(random)); + assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); + + try { + BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer(random)); + fail(); + } catch(IllegalArgumentException e) { + // expected exception, array length differs + } + } + + public void testStaticMethod2Old() throws ParseException { + String[] fields = {"b", "t"}; + //int[] flags = {MultiFieldQueryParser.REQUIRED_FIELD, MultiFieldQueryParser.PROHIBITED_FIELD}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; + + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one", fields, flags, new MockAnalyzer(random));//, fields, flags, new MockAnalyzer(random)); + assertEquals("+b:one -t:one", q.toString()); + + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "one two", fields, flags, new MockAnalyzer(random)); + assertEquals("+(b:one b:two) -(t:one t:two)", q.toString()); + + try { + BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, "blah", fields, flags2, new MockAnalyzer(random)); + fail(); + } catch(IllegalArgumentException e) { + // expected exception, array length differs + } + } + + public void testStaticMethod3() throws ParseException { + String[] queries = {"one", "two", "three"}; + String[] fields = {"f1", "f2", "f3"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, + BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.SHOULD}; + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer(random)); + assertEquals("+f1:one -f2:two f3:three", q.toString()); + + try { + BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer(random)); + fail(); + } catch(IllegalArgumentException e) { + // expected exception, array length differs + } + } + + public void testStaticMethod3Old() throws ParseException { + String[] queries = {"one", "two"}; + String[] fields = {"b", "t"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT}; + Query q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags, new MockAnalyzer(random)); + assertEquals("+b:one -t:two", q.toString()); + + try { + BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST}; + q = MultiFieldQueryParser.parse(TEST_VERSION_CURRENT, queries, fields, flags2, new MockAnalyzer(random)); + fail(); + } catch(IllegalArgumentException e) { + // expected exception, array length differs + } + } + + public void testAnalyzerReturningNull() throws ParseException { + String[] fields = new String[] { "f1", "f2", "f3" }; + MultiFieldQueryParser parser = new MultiFieldQueryParser(TEST_VERSION_CURRENT, fields, new AnalyzerReturningNull()); + Query q = parser.parse("bla AND blo"); + assertEquals("+(f2:bla f3:bla) +(f2:blo f3:blo)", q.toString()); + // the following queries are not affected as their terms are not analyzed anyway: + q = parser.parse("bla*"); + assertEquals("f1:bla* f2:bla* f3:bla*", q.toString()); + q = parser.parse("bla~"); + assertEquals("f1:bla~0.5 f2:bla~0.5 f3:bla~0.5", q.toString()); + q = parser.parse("[a TO c]"); + assertEquals("f1:[a TO c] f2:[a TO c] f3:[a TO c]", q.toString()); + } + + public void testStopWordSearching() throws Exception { + Analyzer analyzer = new MockAnalyzer(random); + Directory ramDir = newDirectory(); + IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + doc.add(newField("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED)); + iw.addDocument(doc); + iw.close(); + + MultiFieldQueryParser mfqp = + new MultiFieldQueryParser(TEST_VERSION_CURRENT, new String[] {"body"}, analyzer); + mfqp.setDefaultOperator(QueryParser.Operator.AND); + Query q = mfqp.parse("the footest"); + IndexSearcher is = new IndexSearcher(ramDir, true); + ScoreDoc[] hits = is.search(q, null, 1000).scoreDocs; + assertEquals(1, hits.length); + is.close(); + ramDir.close(); + } + + /** + * Return empty tokens for field "f1". + */ + private static class AnalyzerReturningNull extends Analyzer { + MockAnalyzer stdAnalyzer = new MockAnalyzer(random); + + public AnalyzerReturningNull() { + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + if ("f1".equals(fieldName)) { + return new EmptyTokenStream(); + } else { + return stdAnalyzer.tokenStream(fieldName, reader); + } + } + + private static class EmptyTokenStream extends TokenStream { + @Override + public boolean incrementToken() throws IOException { + return false; + } + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestQueryParser.java new file mode 100644 index 0000000..93e08f5 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -0,0 +1,1134 @@ +package org.apache.lucene.queryParser; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.text.Collator; +import java.text.DateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.LowerCaseTokenizer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.StopAnalyzer; +import org.apache.lucene.analysis.StopFilter; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.DateField; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests QueryParser. + */ +public class TestQueryParser extends LuceneTestCase { + + public static Analyzer qpAnalyzer = new QPTestAnalyzer(); + + public static final class QPTestFilter extends TokenFilter { + CharTermAttribute termAtt; + OffsetAttribute offsetAtt; + + /** + * Filter which discards the token 'stop' and which expands the + * token 'phrase' into 'phrase1 phrase2' + */ + public QPTestFilter(TokenStream in) { + super(in); + termAtt = addAttribute(CharTermAttribute.class); + offsetAtt = addAttribute(OffsetAttribute.class); + } + + boolean inPhrase = false; + int savedStart = 0, savedEnd = 0; + + @Override + public boolean incrementToken() throws IOException { + if (inPhrase) { + inPhrase = false; + clearAttributes(); + termAtt.append("phrase2"); + offsetAtt.setOffset(savedStart, savedEnd); + return true; + } else + while (input.incrementToken()) { + if (termAtt.toString().equals("phrase")) { + inPhrase = true; + savedStart = offsetAtt.startOffset(); + savedEnd = offsetAtt.endOffset(); + termAtt.setEmpty().append("phrase1"); + offsetAtt.setOffset(savedStart, savedEnd); + return true; + } else if (!termAtt.toString().equals("stop")) + return true; + } + return false; + } + } + + + public static final class QPTestAnalyzer extends Analyzer { + + /** Filters LowerCaseTokenizer with StopFilter. */ + @Override + public final TokenStream tokenStream(String fieldName, Reader reader) { + return new QPTestFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader)); + } + } + + public static class QPTestParser extends QueryParser { + public QPTestParser(String f, Analyzer a) { + super(TEST_VERSION_CURRENT, f, a); + } + + @Override + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { + throw new ParseException("Fuzzy queries not allowed"); + } + + @Override + protected Query getWildcardQuery(String field, String termStr) throws ParseException { + throw new ParseException("Wildcard queries not allowed"); + } + } + + private int originalMaxClauses; + + @Override + public void setUp() throws Exception { + super.setUp(); + originalMaxClauses = BooleanQuery.getMaxClauseCount(); + } + + public QueryParser getParser(Analyzer a) throws Exception { + if (a == null) + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", a); + qp.setDefaultOperator(QueryParser.OR_OPERATOR); + return qp; + } + + public Query getQuery(String query, Analyzer a) throws Exception { + return getParser(a).parse(query); + } + + public void assertQueryEquals(String query, Analyzer a, String result) + throws Exception { + Query q = getQuery(query, a); + String s = q.toString("field"); + if (!s.equals(result)) { + fail("Query /" + query + "/ yielded /" + s + + "/, expecting /" + result + "/"); + } + } + + public void assertQueryEquals(QueryParser qp, String field, String query, String result) + throws Exception { + Query q = qp.parse(query); + String s = q.toString(field); + if (!s.equals(result)) { + fail("Query /" + query + "/ yielded /" + s + + "/, expecting /" + result + "/"); + } + } + + public void assertEscapedQueryEquals(String query, Analyzer a, String result) + throws Exception { + String escapedQuery = QueryParser.escape(query); + if (!escapedQuery.equals(result)) { + fail("Query /" + query + "/ yielded /" + escapedQuery + + "/, expecting /" + result + "/"); + } + } + + public void assertWildcardQueryEquals(String query, boolean lowercase, String result, boolean allowLeadingWildcard) + throws Exception { + QueryParser qp = getParser(null); + qp.setLowercaseExpandedTerms(lowercase); + qp.setAllowLeadingWildcard(allowLeadingWildcard); + Query q = qp.parse(query); + String s = q.toString("field"); + if (!s.equals(result)) { + fail("WildcardQuery /" + query + "/ yielded /" + s + + "/, expecting /" + result + "/"); + } + } + + public void assertWildcardQueryEquals(String query, boolean lowercase, String result) + throws Exception { + assertWildcardQueryEquals(query, lowercase, result, false); + } + + public void assertWildcardQueryEquals(String query, String result) throws Exception { + QueryParser qp = getParser(null); + Query q = qp.parse(query); + String s = q.toString("field"); + if (!s.equals(result)) { + fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /" + + result + "/"); + } + } + + public Query getQueryDOA(String query, Analyzer a) + throws Exception { + if (a == null) + a = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", a); + qp.setDefaultOperator(QueryParser.AND_OPERATOR); + return qp.parse(query); + } + + public void assertQueryEqualsDOA(String query, Analyzer a, String result) + throws Exception { + Query q = getQueryDOA(query, a); + String s = q.toString("field"); + if (!s.equals(result)) { + fail("Query /" + query + "/ yielded /" + s + + "/, expecting /" + result + "/"); + } + } + + public void testCJK() throws Exception { + // Test Ideographic Space - As wide as a CJK character cell (fullwidth) + // used google to translate the word "term" to japanese -> 用語 + assertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term"); + assertQueryEquals("用語\u3000用語\u3000用語", null, "用語\u0020用語\u0020用語"); + } + + public void testCJKTerm() throws Exception { + // individual CJK chars as terms + StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); + + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); + + assertEquals(expected, getQuery("中国", analyzer)); + } + + public void testCJKBoostedTerm() throws Exception { + // individual CJK chars as terms + StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); + + BooleanQuery expected = new BooleanQuery(); + expected.setBoost(0.5f); + expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); + expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); + + assertEquals(expected, getQuery("中国^0.5", analyzer)); + } + + public void testCJKPhrase() throws Exception { + // individual CJK chars as terms + StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); + + PhraseQuery expected = new PhraseQuery(); + expected.add(new Term("field", "中")); + expected.add(new Term("field", "国")); + + assertEquals(expected, getQuery("\"中国\"", analyzer)); + } + + public void testCJKBoostedPhrase() throws Exception { + // individual CJK chars as terms + StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); + + PhraseQuery expected = new PhraseQuery(); + expected.setBoost(0.5f); + expected.add(new Term("field", "中")); + expected.add(new Term("field", "国")); + + assertEquals(expected, getQuery("\"中国\"^0.5", analyzer)); + } + + public void testCJKSloppyPhrase() throws Exception { + // individual CJK chars as terms + StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); + + PhraseQuery expected = new PhraseQuery(); + expected.setSlop(3); + expected.add(new Term("field", "中")); + expected.add(new Term("field", "国")); + + assertEquals(expected, getQuery("\"中国\"~3", analyzer)); + } + + public void testAutoGeneratePhraseQueriesOn() throws Exception { + // individual CJK chars as terms + StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); + + PhraseQuery expected = new PhraseQuery(); + expected.add(new Term("field", "中")); + expected.add(new Term("field", "国")); + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer); + parser.setAutoGeneratePhraseQueries(true); + assertEquals(expected, parser.parse("中国")); + } + + public void testSimple() throws Exception { + assertQueryEquals("term term term", null, "term term term"); + assertQueryEquals("türm term term", new MockAnalyzer(random), "türm term term"); + assertQueryEquals("ümlaut", new MockAnalyzer(random), "ümlaut"); + + assertQueryEquals("\"\"", new KeywordAnalyzer(), ""); + assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:"); + + assertQueryEquals("a AND b", null, "+a +b"); + assertQueryEquals("(a AND b)", null, "+a +b"); + assertQueryEquals("c OR (a AND b)", null, "c (+a +b)"); + assertQueryEquals("a AND NOT b", null, "+a -b"); + assertQueryEquals("a AND -b", null, "+a -b"); + assertQueryEquals("a AND !b", null, "+a -b"); + assertQueryEquals("a && b", null, "+a +b"); + assertQueryEquals("a && ! b", null, "+a -b"); + + assertQueryEquals("a OR b", null, "a b"); + assertQueryEquals("a || b", null, "a b"); + assertQueryEquals("a OR !b", null, "a -b"); + assertQueryEquals("a OR ! b", null, "a -b"); + assertQueryEquals("a OR -b", null, "a -b"); + + assertQueryEquals("+term -term term", null, "+term -term term"); + assertQueryEquals("foo:term AND field:anotherTerm", null, + "+foo:term +anotherterm"); + assertQueryEquals("term AND \"phrase phrase\"", null, + "+term +\"phrase phrase\""); + assertQueryEquals("\"hello there\"", null, "\"hello there\""); + assertTrue(getQuery("a AND b", null) instanceof BooleanQuery); + assertTrue(getQuery("hello", null) instanceof TermQuery); + assertTrue(getQuery("\"hello there\"", null) instanceof PhraseQuery); + + assertQueryEquals("germ term^2.0", null, "germ term^2.0"); + assertQueryEquals("(term)^2.0", null, "term^2.0"); + assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0"); + assertQueryEquals("term^2.0", null, "term^2.0"); + assertQueryEquals("term^2", null, "term^2.0"); + assertQueryEquals("\"germ term\"^2.0", null, "\"germ term\"^2.0"); + assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0"); + + assertQueryEquals("(foo OR bar) AND (baz OR boo)", null, + "+(foo bar) +(baz boo)"); + assertQueryEquals("((a OR b) AND NOT c) OR d", null, + "(+(a b) -c) d"); + assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, + "+(apple \"steve jobs\") -(foo bar baz)"); + assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, + "+(title:dog title:cat) -author:\"bob dole\""); + + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random)); + // make sure OR is the default: + assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator()); + qp.setDefaultOperator(QueryParser.AND_OPERATOR); + assertEquals(QueryParser.AND_OPERATOR, qp.getDefaultOperator()); + qp.setDefaultOperator(QueryParser.OR_OPERATOR); + assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator()); + } + + public void testPunct() throws Exception { + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); + assertQueryEquals("a&b", a, "a&b"); + assertQueryEquals("a&&b", a, "a&&b"); + assertQueryEquals(".NET", a, ".NET"); + } + + public void testSlop() throws Exception { + assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2"); + assertQueryEquals("\"term germ\"~2 flork", null, "\"term germ\"~2 flork"); + assertQueryEquals("\"term\"~2", null, "term"); + assertQueryEquals("\" \"~2 germ", null, "germ"); + assertQueryEquals("\"term germ\"~2^2", null, "\"term germ\"~2^2.0"); + } + + public void testNumber() throws Exception { +// The numbers go away because SimpleAnalzyer ignores them + assertQueryEquals("3", null, ""); + assertQueryEquals("term 1.0 1 2", null, "term"); + assertQueryEquals("term term1 term2", null, "term term term"); + + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, true); + assertQueryEquals("3", a, "3"); + assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); + assertQueryEquals("term term1 term2", a, "term term1 term2"); + } + + public void testWildcard() throws Exception { + assertQueryEquals("term*", null, "term*"); + assertQueryEquals("term*^2", null, "term*^2.0"); + assertQueryEquals("term~", null, "term~0.5"); + assertQueryEquals("term~0.7", null, "term~0.7"); + assertQueryEquals("term~^2", null, "term~0.5^2.0"); + assertQueryEquals("term^2~", null, "term~0.5^2.0"); + assertQueryEquals("term*germ", null, "term*germ"); + assertQueryEquals("term*germ^3", null, "term*germ^3.0"); + + assertTrue(getQuery("term*", null) instanceof PrefixQuery); + assertTrue(getQuery("term*^2", null) instanceof PrefixQuery); + assertTrue(getQuery("term~", null) instanceof FuzzyQuery); + assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery); + FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null); + assertEquals(0.7f, fq.getMinSimilarity(), 0.1f); + assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); + fq = (FuzzyQuery)getQuery("term~", null); + assertEquals(0.5f, fq.getMinSimilarity(), 0.1f); + assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); + + assertParseException("term~1.1"); // value > 1, throws exception + + assertTrue(getQuery("term*germ", null) instanceof WildcardQuery); + +/* Tests to see that wild card terms are (or are not) properly + * lower-cased with propery parser configuration + */ +// First prefix queries: + // by default, convert to lowercase: + assertWildcardQueryEquals("Term*", true, "term*"); + // explicitly set lowercase: + assertWildcardQueryEquals("term*", true, "term*"); + assertWildcardQueryEquals("Term*", true, "term*"); + assertWildcardQueryEquals("TERM*", true, "term*"); + // explicitly disable lowercase conversion: + assertWildcardQueryEquals("term*", false, "term*"); + assertWildcardQueryEquals("Term*", false, "Term*"); + assertWildcardQueryEquals("TERM*", false, "TERM*"); +// Then 'full' wildcard queries: + // by default, convert to lowercase: + assertWildcardQueryEquals("Te?m", "te?m"); + // explicitly set lowercase: + assertWildcardQueryEquals("te?m", true, "te?m"); + assertWildcardQueryEquals("Te?m", true, "te?m"); + assertWildcardQueryEquals("TE?M", true, "te?m"); + assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ"); + // explicitly disable lowercase conversion: + assertWildcardQueryEquals("te?m", false, "te?m"); + assertWildcardQueryEquals("Te?m", false, "Te?m"); + assertWildcardQueryEquals("TE?M", false, "TE?M"); + assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM"); +// Fuzzy queries: + assertWildcardQueryEquals("Term~", "term~0.5"); + assertWildcardQueryEquals("Term~", true, "term~0.5"); + assertWildcardQueryEquals("Term~", false, "Term~0.5"); +// Range queries: + assertWildcardQueryEquals("[A TO C]", "[a TO c]"); + assertWildcardQueryEquals("[A TO C]", true, "[a TO c]"); + assertWildcardQueryEquals("[A TO C]", false, "[A TO C]"); + // Test suffix queries: first disallow + try { + assertWildcardQueryEquals("*Term", true, "*term"); + fail(); + } catch(ParseException pe) { + // expected exception + } + try { + assertWildcardQueryEquals("?Term", true, "?term"); + fail(); + } catch(ParseException pe) { + // expected exception + } + // Test suffix queries: then allow + assertWildcardQueryEquals("*Term", true, "*term", true); + assertWildcardQueryEquals("?Term", true, "?term", true); + } + + public void testLeadingWildcardType() throws Exception { + QueryParser qp = getParser(null); + qp.setAllowLeadingWildcard(true); + assertEquals(WildcardQuery.class, qp.parse("t*erm*").getClass()); + assertEquals(WildcardQuery.class, qp.parse("?term*").getClass()); + assertEquals(WildcardQuery.class, qp.parse("*term*").getClass()); + } + + public void testQPA() throws Exception { + assertQueryEquals("term term^3.0 term", qpAnalyzer, "term term^3.0 term"); + assertQueryEquals("term stop^3.0 term", qpAnalyzer, "term term"); + + assertQueryEquals("term term term", qpAnalyzer, "term term term"); + assertQueryEquals("term +stop term", qpAnalyzer, "term term"); + assertQueryEquals("term -stop term", qpAnalyzer, "term term"); + + assertQueryEquals("drop AND (stop) AND roll", qpAnalyzer, "+drop +roll"); + assertQueryEquals("term +(stop) term", qpAnalyzer, "term term"); + assertQueryEquals("term -(stop) term", qpAnalyzer, "term term"); + + assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll"); + assertQueryEquals("term phrase term", qpAnalyzer, + "term (phrase1 phrase2) term"); + assertQueryEquals("term AND NOT phrase term", qpAnalyzer, + "+term -(phrase1 phrase2) term"); + assertQueryEquals("stop^3", qpAnalyzer, ""); + assertQueryEquals("stop", qpAnalyzer, ""); + assertQueryEquals("(stop)^3", qpAnalyzer, ""); + assertQueryEquals("((stop))^3", qpAnalyzer, ""); + assertQueryEquals("(stop^3)", qpAnalyzer, ""); + assertQueryEquals("((stop)^3)", qpAnalyzer, ""); + assertQueryEquals("(stop)", qpAnalyzer, ""); + assertQueryEquals("((stop))", qpAnalyzer, ""); + assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery); + assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery); + } + + public void testRange() throws Exception { + assertQueryEquals("[ a TO z]", null, "[a TO z]"); + assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod()); + + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.SIMPLE, true)); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod()); + + assertQueryEquals("[ a TO z ]", null, "[a TO z]"); + assertQueryEquals("{ a TO z}", null, "{a TO z}"); + assertQueryEquals("{ a TO z }", null, "{a TO z}"); + assertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0"); + assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar"); + assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar"); + assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}"); + assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})"); + } + + public void testFarsiRangeCollating() throws Exception { + Directory ramDir = newDirectory(); + IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + Document doc = new Document(); + doc.add(newField("content","\u0633\u0627\u0628", + Field.Store.YES, Field.Index.NOT_ANALYZED)); + iw.addDocument(doc); + iw.close(); + IndexSearcher is = new IndexSearcher(ramDir, true); + + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + Collator c = Collator.getInstance(new Locale("ar")); + qp.setRangeCollator(c); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a ConstantScoreRangeQuery + // with a Farsi Collator (or an Arabic one for the case when Farsi is not + // supported). + + // Test ConstantScoreRangeQuery + qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, result.length); + + result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, result.length); + + // Test TermRangeQuery + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, result.length); + + result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, result.length); + + is.close(); + ramDir.close(); + } + + private String escapeDateString(String s) { + if (s.indexOf(" ") > -1) { + return "\"" + s + "\""; + } else { + return s; + } + } + + /** for testing legacy DateField support */ + private String getLegacyDate(String s) throws Exception { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + return DateField.dateToString(df.parse(s)); + } + + /** for testing DateTools support */ + private String getDate(String s, DateTools.Resolution resolution) throws Exception { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + return getDate(df.parse(s), resolution); + } + + /** for testing DateTools support */ + private String getDate(Date d, DateTools.Resolution resolution) throws Exception { + if (resolution == null) { + return DateField.dateToString(d); + } else { + return DateTools.dateToString(d, resolution); + } + } + + private String getLocalizedDate(int year, int month, int day) { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + Calendar calendar = new GregorianCalendar(); + calendar.clear(); + calendar.set(year, month, day); + calendar.set(Calendar.HOUR_OF_DAY, 23); + calendar.set(Calendar.MINUTE, 59); + calendar.set(Calendar.SECOND, 59); + calendar.set(Calendar.MILLISECOND, 999); + return df.format(calendar.getTime()); + } + + /** for testing legacy DateField support */ + public void testLegacyDateRange() throws Exception { + String startDate = getLocalizedDate(2002, 1, 1); + String endDate = getLocalizedDate(2002, 1, 4); + Calendar endDateExpected = new GregorianCalendar(); + endDateExpected.clear(); + endDateExpected.set(2002, 1, 4, 23, 59, 59); + endDateExpected.set(Calendar.MILLISECOND, 999); + assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, + "[" + getLegacyDate(startDate) + " TO " + DateField.dateToString(endDateExpected.getTime()) + "]"); + assertQueryEquals("{ " + escapeDateString(startDate) + " " + escapeDateString(endDate) + " }", null, + "{" + getLegacyDate(startDate) + " TO " + getLegacyDate(endDate) + "}"); + } + + public void testDateRange() throws Exception { + String startDate = getLocalizedDate(2002, 1, 1); + String endDate = getLocalizedDate(2002, 1, 4); + Calendar endDateExpected = new GregorianCalendar(); + endDateExpected.clear(); + endDateExpected.set(2002, 1, 4, 23, 59, 59); + endDateExpected.set(Calendar.MILLISECOND, 999); + final String defaultField = "default"; + final String monthField = "month"; + final String hourField = "hour"; + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.SIMPLE, true)); + + // Don't set any date resolution and verify if DateField is used + assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, + endDateExpected.getTime(), null); + + // set a field specific date resolution + qp.setDateResolution(monthField, DateTools.Resolution.MONTH); + + // DateField should still be used for defaultField + assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, + endDateExpected.getTime(), null); + + // set default date resolution to MILLISECOND + qp.setDateResolution(DateTools.Resolution.MILLISECOND); + + // set second field specific date resolution + qp.setDateResolution(hourField, DateTools.Resolution.HOUR); + + // for this field no field specific date resolution has been set, + // so verify if the default resolution is used + assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, + endDateExpected.getTime(), DateTools.Resolution.MILLISECOND); + + // verify if field specific date resolutions are used for these two fields + assertDateRangeQueryEquals(qp, monthField, startDate, endDate, + endDateExpected.getTime(), DateTools.Resolution.MONTH); + + assertDateRangeQueryEquals(qp, hourField, startDate, endDate, + endDateExpected.getTime(), DateTools.Resolution.HOUR); + } + + public void assertDateRangeQueryEquals(QueryParser qp, String field, String startDate, String endDate, + Date endDateInclusive, DateTools.Resolution resolution) throws Exception { + assertQueryEquals(qp, field, field + ":[" + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", + "[" + getDate(startDate, resolution) + " TO " + getDate(endDateInclusive, resolution) + "]"); + assertQueryEquals(qp, field, field + ":{" + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "}", + "{" + getDate(startDate, resolution) + " TO " + getDate(endDate, resolution) + "}"); + } + + public void testEscaped() throws Exception { + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); + + /*assertQueryEquals("\\[brackets", a, "\\[brackets"); + assertQueryEquals("\\[brackets", null, "brackets"); + assertQueryEquals("\\\\", a, "\\\\"); + assertQueryEquals("\\+blah", a, "\\+blah"); + assertQueryEquals("\\(blah", a, "\\(blah"); + + assertQueryEquals("\\-blah", a, "\\-blah"); + assertQueryEquals("\\!blah", a, "\\!blah"); + assertQueryEquals("\\{blah", a, "\\{blah"); + assertQueryEquals("\\}blah", a, "\\}blah"); + assertQueryEquals("\\:blah", a, "\\:blah"); + assertQueryEquals("\\^blah", a, "\\^blah"); + assertQueryEquals("\\[blah", a, "\\[blah"); + assertQueryEquals("\\]blah", a, "\\]blah"); + assertQueryEquals("\\\"blah", a, "\\\"blah"); + assertQueryEquals("\\(blah", a, "\\(blah"); + assertQueryEquals("\\)blah", a, "\\)blah"); + assertQueryEquals("\\~blah", a, "\\~blah"); + assertQueryEquals("\\*blah", a, "\\*blah"); + assertQueryEquals("\\?blah", a, "\\?blah"); + //assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar"); + //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); + //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/ + + assertQueryEquals("\\a", a, "a"); + + assertQueryEquals("a\\-b:c", a, "a-b:c"); + assertQueryEquals("a\\+b:c", a, "a+b:c"); + assertQueryEquals("a\\:b:c", a, "a:b:c"); + assertQueryEquals("a\\\\b:c", a, "a\\b:c"); + + assertQueryEquals("a:b\\-c", a, "a:b-c"); + assertQueryEquals("a:b\\+c", a, "a:b+c"); + assertQueryEquals("a:b\\:c", a, "a:b:c"); + assertQueryEquals("a:b\\\\c", a, "a:b\\c"); + + assertQueryEquals("a:b\\-c*", a, "a:b-c*"); + assertQueryEquals("a:b\\+c*", a, "a:b+c*"); + assertQueryEquals("a:b\\:c*", a, "a:b:c*"); + + assertQueryEquals("a:b\\\\c*", a, "a:b\\c*"); + + assertQueryEquals("a:b\\-?c", a, "a:b-?c"); + assertQueryEquals("a:b\\+?c", a, "a:b+?c"); + assertQueryEquals("a:b\\:?c", a, "a:b:?c"); + + assertQueryEquals("a:b\\\\?c", a, "a:b\\?c"); + + assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5"); + assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5"); + assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5"); + assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5"); + + assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]"); + assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]"); + assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]"); + + assertQueryEquals("[\"c\\:\\\\temp\\\\\\~foo0.txt\" TO \"c\\:\\\\temp\\\\\\~foo9.txt\"]", a, + "[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]"); + + assertQueryEquals("a\\\\\\+b", a, "a\\+b"); + + assertQueryEquals("a \\\"b c\\\" d", a, "a \"b c\" d"); + assertQueryEquals("\"a \\\"b c\\\" d\"", a, "\"a \"b c\" d\""); + assertQueryEquals("\"a \\+b c d\"", a, "\"a +b c d\""); + + assertQueryEquals("c\\:\\\\temp\\\\\\~foo.txt", a, "c:\\temp\\~foo.txt"); + + assertParseException("XY\\"); // there must be a character after the escape char + + // test unicode escaping + assertQueryEquals("a\\u0062c", a, "abc"); + assertQueryEquals("XY\\u005a", a, "XYZ"); + assertQueryEquals("XY\\u005A", a, "XYZ"); + assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "\"a \\(b\" c\""); + + assertParseException("XY\\u005G"); // test non-hex character in escaped unicode sequence + assertParseException("XY\\u005"); // test incomplete escaped unicode sequence + + // Tests bug LUCENE-800 + assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\"); + assertParseException("(item:\\\\ item:ABCD\\\\))"); // unmatched closing paranthesis + assertQueryEquals("\\*", a, "*"); + assertQueryEquals("\\\\", a, "\\"); // escaped backslash + + assertParseException("\\"); // a backslash must always be escaped + + // LUCENE-1189 + assertQueryEquals("(\"a\\\\\") or (\"b\")", a ,"a\\ or b"); + } + + public void testQueryStringEscaping() throws Exception { + Analyzer a = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); + + assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); + assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); + assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c"); + assertEscapedQueryEquals("a\\b:c", a, "a\\\\b\\:c"); + + assertEscapedQueryEquals("a:b-c", a, "a\\:b\\-c"); + assertEscapedQueryEquals("a:b+c", a, "a\\:b\\+c"); + assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c"); + assertEscapedQueryEquals("a:b\\c", a, "a\\:b\\\\c"); + + assertEscapedQueryEquals("a:b-c*", a, "a\\:b\\-c\\*"); + assertEscapedQueryEquals("a:b+c*", a, "a\\:b\\+c\\*"); + assertEscapedQueryEquals("a:b:c*", a, "a\\:b\\:c\\*"); + + assertEscapedQueryEquals("a:b\\\\c*", a, "a\\:b\\\\\\\\c\\*"); + + assertEscapedQueryEquals("a:b-?c", a, "a\\:b\\-\\?c"); + assertEscapedQueryEquals("a:b+?c", a, "a\\:b\\+\\?c"); + assertEscapedQueryEquals("a:b:?c", a, "a\\:b\\:\\?c"); + + assertEscapedQueryEquals("a:b?c", a, "a\\:b\\?c"); + + assertEscapedQueryEquals("a:b-c~", a, "a\\:b\\-c\\~"); + assertEscapedQueryEquals("a:b+c~", a, "a\\:b\\+c\\~"); + assertEscapedQueryEquals("a:b:c~", a, "a\\:b\\:c\\~"); + assertEscapedQueryEquals("a:b\\c~", a, "a\\:b\\\\c\\~"); + + assertEscapedQueryEquals("[ a - TO a+ ]", null, "\\[ a \\- TO a\\+ \\]"); + assertEscapedQueryEquals("[ a : TO a~ ]", null, "\\[ a \\: TO a\\~ \\]"); + assertEscapedQueryEquals("[ a\\ TO a* ]", null, "\\[ a\\\\ TO a\\* \\]"); + + // LUCENE-881 + assertEscapedQueryEquals("|| abc ||", a, "\\|\\| abc \\|\\|"); + assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&"); + } + + public void testTabNewlineCarriageReturn() + throws Exception { + assertQueryEqualsDOA("+weltbank +worlbank", null, + "+weltbank +worlbank"); + + assertQueryEqualsDOA("+weltbank\n+worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \n+worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \n +worlbank", null, + "+weltbank +worlbank"); + + assertQueryEqualsDOA("+weltbank\r+worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r+worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r +worlbank", null, + "+weltbank +worlbank"); + + assertQueryEqualsDOA("+weltbank\r\n+worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r\n+worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r\n +worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r \n +worlbank", null, + "+weltbank +worlbank"); + + assertQueryEqualsDOA("+weltbank\t+worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \t+worlbank", null, + "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \t +worlbank", null, + "+weltbank +worlbank"); + } + + public void testSimpleDAO() + throws Exception { + assertQueryEqualsDOA("term term term", null, "+term +term +term"); + assertQueryEqualsDOA("term +term term", null, "+term +term +term"); + assertQueryEqualsDOA("term term +term", null, "+term +term +term"); + assertQueryEqualsDOA("term +term +term", null, "+term +term +term"); + assertQueryEqualsDOA("-term term term", null, "-term +term +term"); + } + + public void testBoost() + throws Exception { + Set stopWords = new HashSet(1); + stopWords.add("on"); + StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", oneStopAnalyzer); + Query q = qp.parse("on^1.0"); + assertNotNull(q); + q = qp.parse("\"hello\"^2.0"); + assertNotNull(q); + assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); + q = qp.parse("hello^2.0"); + assertNotNull(q); + assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); + q = qp.parse("\"on\"^1.0"); + assertNotNull(q); + + QueryParser qp2 = new QueryParser(TEST_VERSION_CURRENT, "field", new StandardAnalyzer(TEST_VERSION_CURRENT)); + q = qp2.parse("the^3"); + // "the" is a stop word so the result is an empty query: + assertNotNull(q); + assertEquals("", q.toString()); + assertEquals(1.0f, q.getBoost(), 0.01f); + } + + public void assertParseException(String queryString) throws Exception { + try { + getQuery(queryString, null); + } catch (ParseException expected) { + return; + } + fail("ParseException expected, not thrown"); + } + + public void testException() throws Exception { + assertParseException("\"some phrase"); + assertParseException("(foo bar"); + assertParseException("foo bar))"); + assertParseException("field:term:with:colon some more terms"); + assertParseException("(sub query)^5.0^2.0 plus more"); + assertParseException("secret AND illegal) AND access:confidential"); + } + + + public void testCustomQueryParserWildcard() { + try { + new QPTestParser("contents", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("a?t"); + fail("Wildcard queries should not be allowed"); + } catch (ParseException expected) { + // expected exception + } + } + + public void testCustomQueryParserFuzzy() throws Exception { + try { + new QPTestParser("contents", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).parse("xunit~"); + fail("Fuzzy queries should not be allowed"); + } catch (ParseException expected) { + // expected exception + } + } + + public void testBooleanQuery() throws Exception { + BooleanQuery.setMaxClauseCount(2); + try { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); + qp.parse("one two three"); + fail("ParseException expected due to too many boolean clauses"); + } catch (ParseException expected) { + // too many boolean clauses, so ParseException is expected + } + } + + /** + * This test differs from TestPrecedenceQueryParser + */ + public void testPrecedence() throws Exception { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); + Query query1 = qp.parse("A AND B OR C AND D"); + Query query2 = qp.parse("+A +B +C +D"); + assertEquals(query1, query2); + } + + public void testLocalDateFormat() throws IOException, ParseException { + Directory ramDir = newDirectory(); + IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + + addDateDoc("a", 2005, 12, 2, 10, 15, 33, iw); + addDateDoc("b", 2005, 12, 4, 22, 15, 00, iw); + iw.close(); + IndexSearcher is = new IndexSearcher(ramDir, true); + assertHits(1, "[12/1/2005 TO 12/3/2005]", is); + assertHits(2, "[12/1/2005 TO 12/4/2005]", is); + assertHits(1, "[12/3/2005 TO 12/4/2005]", is); + assertHits(1, "{12/1/2005 TO 12/3/2005}", is); + assertHits(1, "{12/1/2005 TO 12/4/2005}", is); + assertHits(0, "{12/3/2005 TO 12/4/2005}", is); + is.close(); + ramDir.close(); + } + + public void testStarParsing() throws Exception { + final int[] type = new int[1]; + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) { + @Override + protected Query getWildcardQuery(String field, String termStr) throws ParseException { + // override error checking of superclass + type[0]=1; + return new TermQuery(new Term(field,termStr)); + } + @Override + protected Query getPrefixQuery(String field, String termStr) throws ParseException { + // override error checking of superclass + type[0]=2; + return new TermQuery(new Term(field,termStr)); + } + + @Override + protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { + type[0]=3; + return super.getFieldQuery(field, queryText, quoted); + } + }; + + TermQuery tq; + + tq = (TermQuery)qp.parse("foo:zoo*"); + assertEquals("zoo",tq.getTerm().text()); + assertEquals(2,type[0]); + + tq = (TermQuery)qp.parse("foo:zoo*^2"); + assertEquals("zoo",tq.getTerm().text()); + assertEquals(2,type[0]); + assertEquals(tq.getBoost(),2,0); + + tq = (TermQuery)qp.parse("foo:*"); + assertEquals("*",tq.getTerm().text()); + assertEquals(1,type[0]); // could be a valid prefix query in the future too + + tq = (TermQuery)qp.parse("foo:*^2"); + assertEquals("*",tq.getTerm().text()); + assertEquals(1,type[0]); + assertEquals(tq.getBoost(),2,0); + + tq = (TermQuery)qp.parse("*:foo"); + assertEquals("*",tq.getTerm().field()); + assertEquals("foo",tq.getTerm().text()); + assertEquals(3,type[0]); + + tq = (TermQuery)qp.parse("*:*"); + assertEquals("*",tq.getTerm().field()); + assertEquals("*",tq.getTerm().text()); + assertEquals(1,type[0]); // could be handled as a prefix query in the future + + tq = (TermQuery)qp.parse("(*:*)"); + assertEquals("*",tq.getTerm().field()); + assertEquals("*",tq.getTerm().text()); + assertEquals(1,type[0]); + + } + + public void testStopwords() throws Exception { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "foo"))); + Query result = qp.parse("a:the OR a:foo"); + assertNotNull("result is null and it shouldn't be", result); + assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); + assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0); + result = qp.parse("a:woo OR a:the"); + assertNotNull("result is null and it shouldn't be", result); + assertTrue("result is not a TermQuery", result instanceof TermQuery); + result = qp.parse("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)"); + assertNotNull("result is null and it shouldn't be", result); + assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); + if (VERBOSE) System.out.println("Result: " + result); + assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); + } + + public void testPositionIncrement() throws Exception { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this"))); + qp.setEnablePositionIncrements(true); + String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; + // 0 2 5 7 8 + int expectedPositions[] = {1,3,4,6,9}; + PhraseQuery pq = (PhraseQuery) qp.parse(qtxt); + //System.out.println("Query text: "+qtxt); + //System.out.println("Result: "+pq); + Term t[] = pq.getTerms(); + int pos[] = pq.getPositions(); + for (int i = 0; i < t.length; i++) { + //System.out.println(i+". "+t[i]+" pos: "+pos[i]); + assertEquals("term "+i+" = "+t[i]+" has wrong term-position!",expectedPositions[i],pos[i]); + } + } + + public void testMatchAllDocs() throws Exception { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); + assertEquals(new MatchAllDocsQuery(), qp.parse("*:*")); + assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)")); + BooleanQuery bq = (BooleanQuery)qp.parse("+*:* -*:*"); + assertTrue(bq.getClauses()[0].getQuery() instanceof MatchAllDocsQuery); + assertTrue(bq.getClauses()[1].getQuery() instanceof MatchAllDocsQuery); + } + + private void assertHits(int expected, String query, IndexSearcher is) throws ParseException, IOException { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "date", new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); + qp.setLocale(Locale.ENGLISH); + Query q = qp.parse(query); + ScoreDoc[] hits = is.search(q, null, 1000).scoreDocs; + assertEquals(expected, hits.length); + } + + private void addDateDoc(String content, int year, int month, + int day, int hour, int minute, int second, IndexWriter iw) throws IOException { + Document d = new Document(); + d.add(newField("f", content, Field.Store.YES, Field.Index.ANALYZED)); + Calendar cal = Calendar.getInstance(Locale.ENGLISH); + cal.set(year, month-1, day, hour, minute, second); + d.add(newField("date", DateField.dateToString(cal.getTime()), Field.Store.YES, Field.Index.NOT_ANALYZED)); + iw.addDocument(d); + } + + @Override + public void tearDown() throws Exception { + BooleanQuery.setMaxClauseCount(originalMaxClauses); + super.tearDown(); + } + + // LUCENE-2002: make sure defaults for StandardAnalyzer's + // enableStopPositionIncr & QueryParser's enablePosIncr + // "match" + public void testPositionIncrements() throws Exception { + Directory dir = newDirectory(); + Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, a)); + Document doc = new Document(); + doc.add(newField("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED)); + w.addDocument(doc); + IndexReader r = IndexReader.open(w, true); + w.close(); + IndexSearcher s = newSearcher(r); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "f", a); + Query q = qp.parse("\"wizard of ozzy\""); + assertEquals(1, s.search(q, 1).totalHits); + s.close(); + r.close(); + dir.close(); + } + + // LUCENE-2002: when we run javacc to regen QueryParser, + // we also run a replaceregexp step to fix 2 of the public + // ctors (change them to protected): + // + // protected QueryParser(CharStream stream) + // + // protected QueryParser(QueryParserTokenManager tm) + // + // This test is here as a safety, in case that ant step + // doesn't work for some reason. + public void testProtectedCtors() throws Exception { + try { + QueryParser.class.getConstructor(new Class[] {CharStream.class}); + fail("please switch public QueryParser(CharStream) to be protected"); + } catch (NoSuchMethodException nsme) { + // expected + } + try { + QueryParser.class.getConstructor(new Class[] {QueryParserTokenManager.class}); + fail("please switch public QueryParser(QueryParserTokenManager) to be protected"); + } catch (NoSuchMethodException nsme) { + // expected + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/BaseTestRangeFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/BaseTestRangeFilter.java new file mode 100644 index 0000000..2be0300 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/BaseTestRangeFilter.java @@ -0,0 +1,188 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class BaseTestRangeFilter extends LuceneTestCase { + + public static final boolean F = false; + public static final boolean T = true; + + /** + * Collation interacts badly with hyphens -- collation produces different + * ordering than Unicode code-point ordering -- so two indexes are created: + * one which can't have negative random integers, for testing collated ranges, + * and the other which can have negative random integers, for all other tests. + */ + static class TestIndex { + int maxR; + int minR; + boolean allowNegativeRandomInts; + Directory index; + + TestIndex(Random random, int minR, int maxR, boolean allowNegativeRandomInts) { + this.minR = minR; + this.maxR = maxR; + this.allowNegativeRandomInts = allowNegativeRandomInts; + try { + index = newDirectory(random); + } catch (IOException e) { throw new RuntimeException(e); } + } + } + + static IndexReader signedIndexReader; + static IndexReader unsignedIndexReader; + + static TestIndex signedIndexDir; + static TestIndex unsignedIndexDir; + + static int minId = 0; + static int maxId = atLeast(500); + + static final int intLength = Integer.toString(Integer.MAX_VALUE).length(); + + /** + * a simple padding function that should work with any int + */ + public static String pad(int n) { + StringBuilder b = new StringBuilder(40); + String p = "0"; + if (n < 0) { + p = "-"; + n = Integer.MAX_VALUE + n + 1; + } + b.append(p); + String s = Integer.toString(n); + for (int i = s.length(); i <= intLength; i++) { + b.append("0"); + } + b.append(s); + + return b.toString(); + } + + @BeforeClass + public static void beforeClassBaseTestRangeFilter() throws Exception { + signedIndexDir = new TestIndex(random, Integer.MAX_VALUE, Integer.MIN_VALUE, true); + unsignedIndexDir = new TestIndex(random, Integer.MAX_VALUE, 0, false); + signedIndexReader = build(random, signedIndexDir); + unsignedIndexReader = build(random, unsignedIndexDir); + } + + @AfterClass + public static void afterClassBaseTestRangeFilter() throws Exception { + signedIndexReader.close(); + unsignedIndexReader.close(); + signedIndexDir.index.close(); + unsignedIndexDir.index.close(); + signedIndexReader = null; + unsignedIndexReader = null; + signedIndexDir = null; + unsignedIndexDir = null; + } + + private static IndexReader build(Random random, TestIndex index) throws IOException { + /* build an index */ + + Document doc = new Document(); + Field idField = newField(random, "id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + Field randField = newField(random, "rand", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + Field bodyField = newField(random, "body", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(idField); + doc.add(randField); + doc.add(bodyField); + + RandomIndexWriter writer = new RandomIndexWriter(random, index.index, + newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy())); + _TestUtil.reduceOpenFiles(writer.w); + while(true) { + + int minCount = 0; + int maxCount = 0; + + for (int d = minId; d <= maxId; d++) { + idField.setValue(pad(d)); + int r = index.allowNegativeRandomInts ? random.nextInt() : random + .nextInt(Integer.MAX_VALUE); + if (index.maxR < r) { + index.maxR = r; + maxCount = 1; + } else if (index.maxR == r) { + maxCount++; + } + + if (r < index.minR) { + index.minR = r; + minCount = 1; + } else if (r == index.minR) { + minCount++; + } + randField.setValue(pad(r)); + bodyField.setValue("body"); + writer.addDocument(doc); + } + + if (minCount == 1 && maxCount == 1) { + // our subclasses rely on only 1 doc having the min or + // max, so, we loop until we satisfy that. it should be + // exceedingly rare (Yonik calculates 1 in ~429,000) + // times) that this loop requires more than one try: + IndexReader ir = writer.getReader(); + writer.close(); + return ir; + } + + // try again + writer.deleteAll(); + } + } + + @Test + public void testPad() { + + int[] tests = new int[] {-9999999, -99560, -100, -3, -1, 0, 3, 9, 10, 1000, + 999999999}; + for (int i = 0; i < tests.length - 1; i++) { + int a = tests[i]; + int b = tests[i + 1]; + String aa = pad(a); + String bb = pad(b); + String label = a + ":" + aa + " vs " + b + ":" + bb; + assertEquals("length of " + label, aa.length(), bb.length()); + assertTrue("compare less than " + label, aa.compareTo(bb) < 0); + } + + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java new file mode 100644 index 0000000..80df572 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java @@ -0,0 +1,73 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import junit.framework.Assert; +import org.apache.lucene.index.IndexReader; + +/** + * A unit test helper class to test when the filter is getting cached and when it is not. + */ +public class CachingWrapperFilterHelper extends CachingWrapperFilter { + + private boolean shouldHaveCache = false; + + /** + * @param filter Filter to cache results of + */ + public CachingWrapperFilterHelper(Filter filter) { + super(filter); + } + + public void setShouldHaveCache(boolean shouldHaveCache) { + this.shouldHaveCache = shouldHaveCache; + } + + @Override + public synchronized DocIdSet getDocIdSet(IndexReader reader) throws IOException { + + final int saveMissCount = missCount; + DocIdSet docIdSet = super.getDocIdSet(reader); + + if (shouldHaveCache) { + Assert.assertEquals("Cache should have data ", saveMissCount, missCount); + } else { + Assert.assertTrue("Cache should be null " + docIdSet, missCount > saveMissCount); + } + + return docIdSet; + } + + @Override + public String toString() { + return "CachingWrapperFilterHelper("+filter+")"; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof CachingWrapperFilterHelper)) return false; + return this.filter.equals(o); + } + + @Override + public int hashCode() { + return this.filter.hashCode() ^ 0x5525aacb; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/JustCompileSearch.java new file mode 100644 index 0000000..4ec4556 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -0,0 +1,501 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.PriorityQueue; + +/** + * Holds all implementations of classes in the o.a.l.search package as a + * back-compatibility test. It does not run any tests per-se, however if + * someone adds a method to an interface or abstract method to an abstract + * class, one of the implementations here will fail to compile and so we know + * back-compat policy was violated. + */ +final class JustCompileSearch { + + private static final String UNSUPPORTED_MSG = "unsupported: used for back-compat testing only !"; + + static final class JustCompileSearcher extends Searcher { + + @Override + public Weight createNormalizedWeight(Query query) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void close() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Document doc(int i) throws CorruptIndexException, IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int[] docFreqs(Term[] terms) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Explanation explain(Query query, int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Similarity getSimilarity() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void search(Query query, Collector results) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void search(Query query, Filter filter, Collector results) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public TopDocs search(Query query, Filter filter, int n) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public TopFieldDocs search(Query query, Filter filter, int n, Sort sort) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public TopDocs search(Query query, int n) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void setSimilarity(Similarity similarity) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int docFreq(Term term) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Explanation explain(Weight weight, int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int maxDoc() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Query rewrite(Query query) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void search(Weight weight, Filter filter, Collector results) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public TopDocs search(Weight weight, Filter filter, int n) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Document doc(int n, FieldSelector fieldSelector) + throws CorruptIndexException, IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileCollector extends Collector { + + @Override + public void collect(int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileDocIdSet extends DocIdSet { + + @Override + public DocIdSetIterator iterator() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileDocIdSetIterator extends DocIdSetIterator { + + @Override + public int docID() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int nextDoc() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + } + + static final class JustCompileExtendedFieldCacheLongParser implements FieldCache.LongParser { + + public long parseLong(String string) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileExtendedFieldCacheDoubleParser implements FieldCache.DoubleParser { + + public double parseDouble(String string) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFieldComparator extends FieldComparator { + + @Override + public int compare(int slot1, int slot2) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int compareBottom(int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void copy(int slot, int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void setBottom(int slot) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Object value(int slot) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFieldComparatorSource extends FieldComparatorSource { + + @Override + public FieldComparator newComparator(String fieldname, int numHits, + int sortPos, boolean reversed) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFilter extends Filter { + // Filter is just an abstract class with no abstract methods. However it is + // still added here in case someone will add abstract methods in the future. + + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + return null; + } + } + + static final class JustCompileFilteredDocIdSet extends FilteredDocIdSet { + + public JustCompileFilteredDocIdSet(DocIdSet innerSet) { + super(innerSet); + } + + @Override + protected boolean match(int docid) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFilteredDocIdSetIterator extends FilteredDocIdSetIterator { + + public JustCompileFilteredDocIdSetIterator(DocIdSetIterator innerIter) { + super(innerIter); + } + + @Override + protected boolean match(int doc) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFilteredTermEnum extends FilteredTermEnum { + + @Override + public float difference() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + protected boolean endEnum() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + protected boolean termCompare(Term term) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompilePhraseScorer extends PhraseScorer { + + JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, + Similarity similarity, byte[] norms) { + super(weight, postings, similarity, norms); + } + + @Override + protected float phraseFreq() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileQuery extends Query { + + @Override + public String toString(String field) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileScorer extends Scorer { + + protected JustCompileScorer(Weight weight) { + super(weight); + } + + @Override + protected boolean score(Collector collector, int max, int firstDocID) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public float score() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int docID() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int nextDoc() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + } + + static final class JustCompileSimilarity extends Similarity { + + @Override + public float coord(int overlap, int maxOverlap) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public float idf(int docFreq, int numDocs) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public float computeNorm(String fieldName, FieldInvertState state) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public float queryNorm(float sumOfSquaredWeights) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public float sloppyFreq(int distance) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public float tf(float freq) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileSpanFilter extends SpanFilter { + + @Override + public SpanFilterResult bitSpans(IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + return null; + } + } + + static final class JustCompileTopDocsCollector extends TopDocsCollector { + + protected JustCompileTopDocsCollector(PriorityQueue pq) { + super(pq); + } + + @Override + public void collect(int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public TopDocs topDocs() { + throw new UnsupportedOperationException( UNSUPPORTED_MSG ); + } + + @Override + public TopDocs topDocs( int start ) { + throw new UnsupportedOperationException( UNSUPPORTED_MSG ); + } + + @Override + public TopDocs topDocs( int start, int end ) { + throw new UnsupportedOperationException( UNSUPPORTED_MSG ); + } + + } + + static final class JustCompileWeight extends Weight { + + @Override + public Explanation explain(IndexReader reader, int doc) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Query getQuery() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public float getValue() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public void normalize(float norm) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public float sumOfSquaredWeights() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/MockFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/MockFilter.java new file mode 100644 index 0000000..36b4247 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/MockFilter.java @@ -0,0 +1,40 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.DocIdBitSet; +import java.util.BitSet; + +public class MockFilter extends Filter { + private boolean wasCalled; + + @Override + public DocIdSet getDocIdSet(IndexReader reader) { + wasCalled = true; + return new DocIdBitSet(new BitSet()); + } + + public void clear() { + wasCalled = false; + } + + public boolean wasCalled() { + return wasCalled; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/MultiCollectorTest.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/MultiCollectorTest.java new file mode 100644 index 0000000..ae988c0 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/MultiCollectorTest.java @@ -0,0 +1,110 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +public class MultiCollectorTest extends LuceneTestCase { + + private static class DummyCollector extends Collector { + + boolean acceptsDocsOutOfOrderCalled = false; + boolean collectCalled = false; + boolean setNextReaderCalled = false; + boolean setScorerCalled = false; + + @Override + public boolean acceptsDocsOutOfOrder() { + acceptsDocsOutOfOrderCalled = true; + return true; + } + + @Override + public void collect(int doc) throws IOException { + collectCalled = true; + } + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + setNextReaderCalled = true; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + setScorerCalled = true; + } + + } + + @Test + public void testNullCollectors() throws Exception { + // Tests that the collector rejects all null collectors. + try { + MultiCollector.wrap(null, null); + fail("only null collectors should not be supported"); + } catch (IllegalArgumentException e) { + // expected + } + + // Tests that the collector handles some null collectors well. If it + // doesn't, an NPE would be thrown. + Collector c = MultiCollector.wrap(new DummyCollector(), null, new DummyCollector()); + assertTrue(c instanceof MultiCollector); + assertTrue(c.acceptsDocsOutOfOrder()); + c.collect(1); + c.setNextReader(null, 0); + c.setScorer(null); + } + + @Test + public void testSingleCollector() throws Exception { + // Tests that if a single Collector is input, it is returned (and not MultiCollector). + DummyCollector dc = new DummyCollector(); + assertSame(dc, MultiCollector.wrap(dc)); + assertSame(dc, MultiCollector.wrap(dc, null)); + } + + @Test + public void testCollector() throws Exception { + // Tests that the collector delegates calls to input collectors properly. + + // Tests that the collector handles some null collectors well. If it + // doesn't, an NPE would be thrown. + DummyCollector[] dcs = new DummyCollector[] { new DummyCollector(), new DummyCollector() }; + Collector c = MultiCollector.wrap(dcs); + assertTrue(c.acceptsDocsOutOfOrder()); + c.collect(1); + c.setNextReader(null, 0); + c.setScorer(null); + + for (DummyCollector dc : dcs) { + assertTrue(dc.acceptsDocsOutOfOrderCalled); + assertTrue(dc.collectCalled); + assertTrue(dc.setNextReaderCalled); + assertTrue(dc.setScorerCalled); + } + + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/SingleDocTestFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/SingleDocTestFilter.java new file mode 100644 index 0000000..bd1df4e --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/SingleDocTestFilter.java @@ -0,0 +1,39 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.DocIdBitSet; + +import java.util.BitSet; +import java.io.IOException; + +public class SingleDocTestFilter extends Filter { + private int doc; + + public SingleDocTestFilter(int doc) { + this.doc = doc; + } + + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + BitSet bits = new BitSet(reader.maxDoc()); + bits.set(doc); + return new DocIdBitSet(bits); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBoolean2.java new file mode 100644 index 0000000..a16818f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBoolean2.java @@ -0,0 +1,324 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** Test BooleanQuery2 against BooleanQuery by overriding the standard query parser. + * This also tests the scoring order of BooleanQuery. + */ +public class TestBoolean2 extends LuceneTestCase { + private static IndexSearcher searcher; + private static IndexSearcher bigSearcher; + private static IndexReader reader; + private static int NUM_EXTRA_DOCS = 6000; + + public static final String field = "field"; + private static Directory directory; + private static Directory dir2; + private static int mulFactor; + + @BeforeClass + public static void beforeClass() throws Exception { + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newField(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + writer.close(); + searcher = new IndexSearcher(directory, true); + + // Make big index + dir2 = new MockDirectoryWrapper(random, new RAMDirectory(directory)); + + // First multiply small test index: + mulFactor = 1; + int docCount = 0; + do { + final Directory copy = new MockDirectoryWrapper(random, new RAMDirectory(dir2)); + RandomIndexWriter w = new RandomIndexWriter(random, dir2); + w.addIndexes(new Directory[] {copy}); + docCount = w.maxDoc(); + w.close(); + mulFactor *= 2; + } while(docCount < 3000); + + RandomIndexWriter w = new RandomIndexWriter(random, dir2, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); + Document doc = new Document(); + doc.add(newField("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED)); + for(int i=0;i0) { + qType = rnd.nextInt(10); + } + Query q; + if (qType < 3) { + q = new TermQuery(new Term(field, vals[rnd.nextInt(vals.length)])); + } else if (qType < 7) { + q = new WildcardQuery(new Term(field, "w*")); + } else { + q = randBoolQuery(rnd, allowMust, level-1, field, vals, cb); + } + + int r = rnd.nextInt(10); + BooleanClause.Occur occur; + if (r<2) { + occur=BooleanClause.Occur.MUST_NOT; + } + else if (r<5) { + if (allowMust) { + occur=BooleanClause.Occur.MUST; + } else { + occur=BooleanClause.Occur.SHOULD; + } + } else { + occur=BooleanClause.Occur.SHOULD; + } + + current.add(q, occur); + } + if (cb!=null) cb.postCreate(current); + return current; + } + + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java new file mode 100644 index 0000000..0bf05ad --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java @@ -0,0 +1,390 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import java.text.DecimalFormat; +import java.util.Random; + +/** Test that BooleanQuery.setMinimumNumberShouldMatch works. + */ +public class TestBooleanMinShouldMatch extends LuceneTestCase { + + private static Directory index; + private static IndexReader r; + private static IndexSearcher s; + + @BeforeClass + public static void beforeClass() throws Exception { + String[] data = new String [] { + "A 1 2 3 4 5 6", + "Z 4 5 6", + null, + "B 2 4 5 6", + "Y 3 5 6", + null, + "C 3 6", + "X 4 5 6" + }; + + index = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, index); + + for (int i = 0; i < data.length; i++) { + Document doc = new Document(); + doc.add(newField("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Keyword("id",String.valueOf(i))); + doc.add(newField("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Keyword("all","all")); + if (null != data[i]) { + doc.add(newField("data", data[i], Field.Store.YES, Field.Index.ANALYZED));//Field.Text("data",data[i])); + } + w.addDocument(doc); + } + + r = w.getReader(); + s = newSearcher(r); + w.close(); +//System.out.println("Set up " + getName()); + } + + @AfterClass + public static void afterClass() throws Exception { + s.close(); + s = null; + r.close(); + r = null; + index.close(); + index = null; + } + + + public void verifyNrHits(Query q, int expected) throws Exception { + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + if (expected != h.length) { + printHits(getName(), h, s); + } + assertEquals("result count", expected, h.length); + QueryUtils.check(random, q,s); + } + + public void testAllOptional() throws Exception { + + BooleanQuery q = new BooleanQuery(); + for (int i = 1; i <=4; i++) { + q.add(new TermQuery(new Term("data",""+i)), BooleanClause.Occur.SHOULD);//false, false); + } + q.setMinimumNumberShouldMatch(2); // match at least two of 4 + verifyNrHits(q, 2); + } + + public void testOneReqAndSomeOptional() throws Exception { + + /* one required, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "5" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.SHOULD);//false, false); + + q.setMinimumNumberShouldMatch(2); // 2 of 3 optional + + verifyNrHits(q, 5); + } + + public void testSomeReqAndSomeOptional() throws Exception { + + /* two required, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "6" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "5" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.SHOULD);//false, false); + + q.setMinimumNumberShouldMatch(2); // 2 of 3 optional + + verifyNrHits(q, 5); + } + + public void testOneProhibAndSomeOptional() throws Exception { + + /* one prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("data", "1" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.MUST_NOT);//false, true ); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + + q.setMinimumNumberShouldMatch(2); // 2 of 3 optional + + verifyNrHits(q, 1); + } + + public void testSomeProhibAndSomeOptional() throws Exception { + + /* two prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("data", "1" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.MUST_NOT);//false, true ); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "C" )), BooleanClause.Occur.MUST_NOT);//false, true ); + + q.setMinimumNumberShouldMatch(2); // 2 of 3 optional + + verifyNrHits(q, 1); + } + + public void testOneReqOneProhibAndSomeOptional() throws Exception { + + /* one required, one prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("data", "6" )), BooleanClause.Occur.MUST);// true, false); + q.add(new TermQuery(new Term("data", "5" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.MUST_NOT);//false, true ); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "1" )), BooleanClause.Occur.SHOULD);//false, false); + + q.setMinimumNumberShouldMatch(3); // 3 of 4 optional + + verifyNrHits(q, 1); + } + + public void testSomeReqOneProhibAndSomeOptional() throws Exception { + + /* two required, one prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "6" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "5" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.MUST_NOT);//false, true ); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "1" )), BooleanClause.Occur.SHOULD);//false, false); + + q.setMinimumNumberShouldMatch(3); // 3 of 4 optional + + verifyNrHits(q, 1); + } + + public void testOneReqSomeProhibAndSomeOptional() throws Exception { + + /* one required, two prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("data", "6" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "5" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.MUST_NOT);//false, true ); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "1" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "C" )), BooleanClause.Occur.MUST_NOT);//false, true ); + + q.setMinimumNumberShouldMatch(3); // 3 of 4 optional + + verifyNrHits(q, 1); + } + + public void testSomeReqSomeProhibAndSomeOptional() throws Exception { + + /* two required, two prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "6" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "5" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.MUST_NOT);//false, true ); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "1" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "C" )), BooleanClause.Occur.MUST_NOT);//false, true ); + + q.setMinimumNumberShouldMatch(3); // 3 of 4 optional + + verifyNrHits(q, 1); + } + + public void testMinHigherThenNumOptional() throws Exception { + + /* two required, two prohibited, some optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "6" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "5" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "4" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.MUST_NOT);//false, true ); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "1" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "C" )), BooleanClause.Occur.MUST_NOT);//false, true ); + + q.setMinimumNumberShouldMatch(90); // 90 of 4 optional ?!?!?! + + verifyNrHits(q, 0); + } + + public void testMinEqualToNumOptional() throws Exception { + + /* two required, two optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "6" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.SHOULD);//false, false); + + q.setMinimumNumberShouldMatch(2); // 2 of 2 optional + + verifyNrHits(q, 1); + } + + public void testOneOptionalEqualToMin() throws Exception { + + /* two required, one optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "3" )), BooleanClause.Occur.SHOULD);//false, false); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.MUST);//true, false); + + q.setMinimumNumberShouldMatch(1); // 1 of 1 optional + + verifyNrHits(q, 1); + } + + public void testNoOptionalButMin() throws Exception { + + /* two required, no optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all" )), BooleanClause.Occur.MUST);//true, false); + q.add(new TermQuery(new Term("data", "2" )), BooleanClause.Occur.MUST);//true, false); + + q.setMinimumNumberShouldMatch(1); // 1 of 0 optional + + verifyNrHits(q, 0); + } + + public void testNoOptionalButMin2() throws Exception { + + /* one required, no optional */ + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("all", "all" )), BooleanClause.Occur.MUST);//true, false); + + q.setMinimumNumberShouldMatch(1); // 1 of 0 optional + + verifyNrHits(q, 0); + } + + public void testRandomQueries() throws Exception { + String field="data"; + String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"}; + int maxLev=4; + + // callback object to set a random setMinimumNumberShouldMatch + TestBoolean2.Callback minNrCB = new TestBoolean2.Callback() { + public void postCreate(BooleanQuery q) { + BooleanClause[] c =q.getClauses(); + int opt=0; + for (int i=0; i top1.totalHits) { + fail("Constrained results not a subset:\n" + + CheckHits.topdocsString(top1,0,0) + + CheckHits.topdocsString(top2,0,0) + + "for query:" + q2.toString()); + } + + for (int hit=0; hit1.0e-6f) { + fail("Doc " + id + " scores don't match\n" + + CheckHits.topdocsString(top1,0,0) + + CheckHits.topdocsString(top2,0,0) + + "for query:" + q2.toString()); + } + } + } + + // check if subset + if (!found) fail("Doc " + id + " not found\n" + + CheckHits.topdocsString(top1,0,0) + + CheckHits.topdocsString(top2,0,0) + + "for query:" + q2.toString()); + } + } + // System.out.println("Total hits:"+tot); + } + + + + protected void printHits(String test, ScoreDoc[] h, Searcher searcher) throws Exception { + + System.err.println("------- " + test + " -------"); + + DecimalFormat f = new DecimalFormat("0.000000"); + + for (int i = 0; i < h.length; i++) { + Document d = searcher.doc(h[i].doc); + float score = h[i].score; + System.err.println("#" + i + ": " + f.format(score) + " - " + + d.get("id") + " - " + d.get("data")); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanOr.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanOr.java new file mode 100644 index 0000000..169cae1 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanOr.java @@ -0,0 +1,168 @@ +package org.apache.lucene.search; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; + +public class TestBooleanOr extends LuceneTestCase { + + private static String FIELD_T = "T"; + private static String FIELD_C = "C"; + + private TermQuery t1 = new TermQuery(new Term(FIELD_T, "files")); + private TermQuery t2 = new TermQuery(new Term(FIELD_T, "deleting")); + private TermQuery c1 = new TermQuery(new Term(FIELD_C, "production")); + private TermQuery c2 = new TermQuery(new Term(FIELD_C, "optimize")); + + private IndexSearcher searcher = null; + private Directory dir; + private IndexReader reader; + + + private int search(Query q) throws IOException { + QueryUtils.check(random, q,searcher); + return searcher.search(q, null, 1000).totalHits; + } + + public void testElements() throws IOException { + assertEquals(1, search(t1)); + assertEquals(1, search(t2)); + assertEquals(1, search(c1)); + assertEquals(1, search(c2)); + } + + /** + * T:files T:deleting C:production C:optimize + * it works. + * + * @throws IOException + */ + public void testFlat() throws IOException { + BooleanQuery q = new BooleanQuery(); + q.add(new BooleanClause(t1, BooleanClause.Occur.SHOULD)); + q.add(new BooleanClause(t2, BooleanClause.Occur.SHOULD)); + q.add(new BooleanClause(c1, BooleanClause.Occur.SHOULD)); + q.add(new BooleanClause(c2, BooleanClause.Occur.SHOULD)); + assertEquals(1, search(q)); + } + + /** + * (T:files T:deleting) (+C:production +C:optimize) + * it works. + * + * @throws IOException + */ + public void testParenthesisMust() throws IOException { + BooleanQuery q3 = new BooleanQuery(); + q3.add(new BooleanClause(t1, BooleanClause.Occur.SHOULD)); + q3.add(new BooleanClause(t2, BooleanClause.Occur.SHOULD)); + BooleanQuery q4 = new BooleanQuery(); + q4.add(new BooleanClause(c1, BooleanClause.Occur.MUST)); + q4.add(new BooleanClause(c2, BooleanClause.Occur.MUST)); + BooleanQuery q2 = new BooleanQuery(); + q2.add(q3, BooleanClause.Occur.SHOULD); + q2.add(q4, BooleanClause.Occur.SHOULD); + assertEquals(1, search(q2)); + } + + /** + * (T:files T:deleting) +(C:production C:optimize) + * not working. results NO HIT. + * + * @throws IOException + */ + public void testParenthesisMust2() throws IOException { + BooleanQuery q3 = new BooleanQuery(); + q3.add(new BooleanClause(t1, BooleanClause.Occur.SHOULD)); + q3.add(new BooleanClause(t2, BooleanClause.Occur.SHOULD)); + BooleanQuery q4 = new BooleanQuery(); + q4.add(new BooleanClause(c1, BooleanClause.Occur.SHOULD)); + q4.add(new BooleanClause(c2, BooleanClause.Occur.SHOULD)); + BooleanQuery q2 = new BooleanQuery(); + q2.add(q3, BooleanClause.Occur.SHOULD); + q2.add(q4, BooleanClause.Occur.MUST); + assertEquals(1, search(q2)); + } + + /** + * (T:files T:deleting) (C:production C:optimize) + * not working. results NO HIT. + * + * @throws IOException + */ + public void testParenthesisShould() throws IOException { + BooleanQuery q3 = new BooleanQuery(); + q3.add(new BooleanClause(t1, BooleanClause.Occur.SHOULD)); + q3.add(new BooleanClause(t2, BooleanClause.Occur.SHOULD)); + BooleanQuery q4 = new BooleanQuery(); + q4.add(new BooleanClause(c1, BooleanClause.Occur.SHOULD)); + q4.add(new BooleanClause(c2, BooleanClause.Occur.SHOULD)); + BooleanQuery q2 = new BooleanQuery(); + q2.add(q3, BooleanClause.Occur.SHOULD); + q2.add(q4, BooleanClause.Occur.SHOULD); + assertEquals(1, search(q2)); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + + // + dir = newDirectory(); + + + // + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + + // + Document d = new Document(); + d.add(newField( + FIELD_T, + "Optimize not deleting all files", + Field.Store.YES, + Field.Index.ANALYZED)); + d.add(newField( + FIELD_C, + "Deleted When I run an optimize in our production environment.", + Field.Store.YES, + Field.Index.ANALYZED)); + + // + writer.addDocument(d); + + reader = writer.getReader(); + // + searcher = newSearcher(reader); + writer.close(); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + dir.close(); + super.tearDown(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanQuery.java new file mode 100644 index 0000000..eea83e8 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanQuery.java @@ -0,0 +1,158 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestBooleanQuery extends LuceneTestCase { + + public void testEquality() throws Exception { + BooleanQuery bq1 = new BooleanQuery(); + bq1.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.SHOULD); + bq1.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.SHOULD); + BooleanQuery nested1 = new BooleanQuery(); + nested1.add(new TermQuery(new Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD); + nested1.add(new TermQuery(new Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD); + bq1.add(nested1, BooleanClause.Occur.SHOULD); + + BooleanQuery bq2 = new BooleanQuery(); + bq2.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.SHOULD); + bq2.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.SHOULD); + BooleanQuery nested2 = new BooleanQuery(); + nested2.add(new TermQuery(new Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD); + nested2.add(new TermQuery(new Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD); + bq2.add(nested2, BooleanClause.Occur.SHOULD); + + assertEquals(bq1, bq2); + } + + public void testException() { + try { + BooleanQuery.setMaxClauseCount(0); + fail(); + } catch (IllegalArgumentException e) { + // okay + } + } + + // LUCENE-1630 + public void testNullOrSubScorer() throws Throwable { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, dir); + Document doc = new Document(); + doc.add(newField("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); + w.addDocument(doc); + + IndexReader r = w.getReader(); + IndexSearcher s = newSearcher(r); + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); + + // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor + float score = s.search(q, 10).getMaxScore(); + Query subQuery = new TermQuery(new Term("field", "not_in_index")); + subQuery.setBoost(0); + q.add(subQuery, BooleanClause.Occur.SHOULD); + float score2 = s.search(q, 10).getMaxScore(); + assertEquals(score*.5, score2, 1e-6); + + // now test BooleanScorer2 + subQuery = new TermQuery(new Term("field", "b")); + subQuery.setBoost(0); + q.add(subQuery, BooleanClause.Occur.MUST); + score2 = s.search(q, 10).getMaxScore(); + assertEquals(score*(2.0/3), score2, 1e-6); + + // PhraseQuery w/ no terms added returns a null scorer + PhraseQuery pq = new PhraseQuery(); + q.add(pq, BooleanClause.Occur.SHOULD); + assertEquals(1, s.search(q, 10).totalHits); + + // A required clause which returns null scorer should return null scorer to + // IndexSearcher. + q = new BooleanQuery(); + pq = new PhraseQuery(); + q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD); + q.add(pq, BooleanClause.Occur.MUST); + assertEquals(0, s.search(q, 10).totalHits); + + DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f); + dmq.add(new TermQuery(new Term("field", "a"))); + dmq.add(pq); + assertEquals(1, s.search(dmq, 10).totalHits); + + s.close(); + r.close(); + w.close(); + dir.close(); + } + + public void testDeMorgan() throws Exception { + Directory dir1 = newDirectory(); + RandomIndexWriter iw1 = new RandomIndexWriter(random, dir1); + Document doc1 = new Document(); + doc1.add(newField("field", "foo bar", Field.Index.ANALYZED)); + iw1.addDocument(doc1); + IndexReader reader1 = iw1.getReader(); + iw1.close(); + + Directory dir2 = newDirectory(); + RandomIndexWriter iw2 = new RandomIndexWriter(random, dir2); + Document doc2 = new Document(); + doc2.add(newField("field", "foo baz", Field.Index.ANALYZED)); + iw2.addDocument(doc2); + IndexReader reader2 = iw2.getReader(); + iw2.close(); + + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random)); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + + MultiReader multireader = new MultiReader(reader1, reader2); + IndexSearcher searcher = newSearcher(multireader); + assertEquals(0, searcher.search(qp.parse("+foo -ba*"), 10).totalHits); + searcher.close(); + + final ExecutorService es = Executors.newCachedThreadPool(); + searcher = new IndexSearcher(multireader, es); + if (VERBOSE) + System.out.println("rewritten form: " + searcher.rewrite(qp.parse("+foo -ba*"))); + assertEquals(0, searcher.search(qp.parse("+foo -ba*"), 10).totalHits); + es.shutdown(); + es.awaitTermination(1, TimeUnit.SECONDS); + + multireader.close(); + reader1.close(); + reader2.close(); + dir1.close(); + dir2.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java new file mode 100644 index 0000000..f4ce78d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -0,0 +1,93 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestBooleanScorer extends LuceneTestCase +{ + private static final String FIELD = "category"; + + public void testMethod() throws Exception { + Directory directory = newDirectory(); + + String[] values = new String[] { "1", "2", "3", "4" }; + + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + for (int i = 0; i < values.length; i++) { + Document doc = new Document(); + doc.add(newField(FIELD, values[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + IndexReader ir = writer.getReader(); + writer.close(); + + BooleanQuery booleanQuery1 = new BooleanQuery(); + booleanQuery1.add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD); + booleanQuery1.add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD); + + BooleanQuery query = new BooleanQuery(); + query.add(booleanQuery1, BooleanClause.Occur.MUST); + query.add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT); + + IndexSearcher indexSearcher = newSearcher(ir); + ScoreDoc[] hits = indexSearcher.search(query, null, 1000).scoreDocs; + assertEquals("Number of matched documents", 2, hits.length); + indexSearcher.close(); + ir.close(); + directory.close(); + } + + public void testEmptyBucketWithMoreDocs() throws Exception { + // This test checks the logic of nextDoc() when all sub scorers have docs + // beyond the first bucket (for example). Currently, the code relies on the + // 'more' variable to work properly, and this test ensures that if the logic + // changes, we have a test to back it up. + + Similarity sim = Similarity.getDefault(); + Scorer[] scorers = new Scorer[] {new Scorer(sim) { + private int doc = -1; + @Override public float score() throws IOException { return 0; } + @Override public int docID() { return doc; } + + @Override public int nextDoc() throws IOException { + return doc = doc == -1 ? 3000 : NO_MORE_DOCS; + } + + @Override public int advance(int target) throws IOException { + return doc = target <= 3000 ? 3000 : NO_MORE_DOCS; + } + + }}; + BooleanScorer bs = new BooleanScorer(null, false, sim, 1, Arrays.asList(scorers), null, scorers.length); + + assertEquals("should have received 3000", 3000, bs.nextDoc()); + assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc()); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingCollector.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingCollector.java new file mode 100755 index 0000000..66ce1d2 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingCollector.java @@ -0,0 +1,184 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.LuceneTestCase; + +import java.io.IOException; + +public class TestCachingCollector extends LuceneTestCase { + + private static final double ONE_BYTE = 1.0 / (1024 * 1024); // 1 byte out of MB + + private static class MockScorer extends Scorer { + + private MockScorer() { + super((Weight) null); + } + + @Override + public float score() throws IOException { return 0; } + + @Override + public int docID() { return 0; } + + @Override + public int nextDoc() throws IOException { return 0; } + + @Override + public int advance(int target) throws IOException { return 0; } + + } + + private static class NoOpCollector extends Collector { + + private final boolean acceptDocsOutOfOrder; + + public NoOpCollector(boolean acceptDocsOutOfOrder) { + this.acceptDocsOutOfOrder = acceptDocsOutOfOrder; + } + + @Override + public void setScorer(Scorer scorer) throws IOException {} + + @Override + public void collect(int doc) throws IOException {} + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException {} + + @Override + public boolean acceptsDocsOutOfOrder() { + return acceptDocsOutOfOrder; + } + + } + + public void testBasic() throws Exception { + for (boolean cacheScores : new boolean[] { false, true }) { + CachingCollector cc = CachingCollector.create(new NoOpCollector(false), cacheScores, 1.0); + cc.setScorer(new MockScorer()); + + // collect 1000 docs + for (int i = 0; i < 1000; i++) { + cc.collect(i); + } + + // now replay them + cc.replay(new Collector() { + int prevDocID = -1; + + @Override + public void setScorer(Scorer scorer) throws IOException {} + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException {} + + @Override + public void collect(int doc) throws IOException { + assertEquals(prevDocID + 1, doc); + prevDocID = doc; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + }); + } + } + + public void testIllegalStateOnReplay() throws Exception { + CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE); + cc.setScorer(new MockScorer()); + + // collect 130 docs, this should be enough for triggering cache abort. + for (int i = 0; i < 130; i++) { + cc.collect(i); + } + + assertFalse("CachingCollector should not be cached due to low memory limit", cc.isCached()); + + try { + cc.replay(new NoOpCollector(false)); + fail("replay should fail if CachingCollector is not cached"); + } catch (IllegalStateException e) { + // expected + } + } + + public void testIllegalCollectorOnReplay() throws Exception { + // tests that the Collector passed to replay() has an out-of-order mode that + // is valid with the Collector passed to the ctor + + // 'src' Collector does not support out-of-order + CachingCollector cc = CachingCollector.create(new NoOpCollector(false), true, 50 * ONE_BYTE); + cc.setScorer(new MockScorer()); + for (int i = 0; i < 10; i++) cc.collect(i); + cc.replay(new NoOpCollector(true)); // this call should not fail + cc.replay(new NoOpCollector(false)); // this call should not fail + + // 'src' Collector supports out-of-order + cc = CachingCollector.create(new NoOpCollector(true), true, 50 * ONE_BYTE); + cc.setScorer(new MockScorer()); + for (int i = 0; i < 10; i++) cc.collect(i); + cc.replay(new NoOpCollector(true)); // this call should not fail + try { + cc.replay(new NoOpCollector(false)); // this call should fail + fail("should have failed if an in-order Collector was given to replay(), " + + "while CachingCollector was initialized with out-of-order collection"); + } catch (IllegalArgumentException e) { + // ok + } + } + + public void testCachedArraysAllocation() throws Exception { + // tests the cached arrays allocation -- if the 'nextLength' was too high, + // caching would terminate even if a smaller length would suffice. + + // set RAM limit enough for 150 docs + random(10000) + int numDocs = random.nextInt(10000) + 150; + for (boolean cacheScores : new boolean[] { false, true }) { + int bytesPerDoc = cacheScores ? 8 : 4; + CachingCollector cc = CachingCollector.create(new NoOpCollector(false), + cacheScores, bytesPerDoc * ONE_BYTE * numDocs); + cc.setScorer(new MockScorer()); + for (int i = 0; i < numDocs; i++) cc.collect(i); + assertTrue(cc.isCached()); + + // The 151's document should terminate caching + cc.collect(numDocs); + assertFalse(cc.isCached()); + } + } + + public void testNoWrappedCollector() throws Exception { + for (boolean cacheScores : new boolean[] { false, true }) { + // create w/ null wrapped collector, and test that the methods work + CachingCollector cc = CachingCollector.create(true, cacheScores, 50 * ONE_BYTE); + cc.setNextReader(null, 0); + cc.setScorer(new MockScorer()); + cc.collect(0); + + assertTrue(cc.isCached()); + cc.replay(new NoOpCollector(true)); + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingSpanFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingSpanFilter.java new file mode 100644 index 0000000..edd308a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingSpanFilter.java @@ -0,0 +1,158 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SerialMergeScheduler; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestCachingSpanFilter extends LuceneTestCase { + + public void testEnforceDeletions() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter( + random, + dir, + newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergeScheduler(new SerialMergeScheduler()). + // asserts below requires no unexpected merges: + setMergePolicy(newLogMergePolicy(10)) + ); + + // NOTE: cannot use writer.getReader because RIW (on + // flipping a coin) may give us a newly opened reader, + // but we use .reopen on this reader below and expect to + // (must) get an NRT reader: + IndexReader reader = IndexReader.open(writer.w, true); + // same reason we don't wrap? + IndexSearcher searcher = newSearcher(reader, false); + + // add a doc, refresh the reader, and check that its there + Document doc = new Document(); + doc.add(newField("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1); + assertEquals("Should find a hit...", 1, docs.totalHits); + + final SpanFilter startFilter = new SpanQueryFilter(new SpanTermQuery(new Term("id", "1"))); + + // ignore deletions + CachingSpanFilter filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.IGNORE); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + // now delete the doc, refresh the reader, and see that + // it's not there + _TestUtil.keepFullyDeletedSegments(writer.w); + writer.deleteDocuments(new Term("id", "1")); + + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + + // force cache to regenerate: + filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); + + writer.addDocument(doc); + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + + constantScore = new ConstantScoreQuery(filter); + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + // NOTE: important to hold ref here so GC doesn't clear + // the cache entry! Else the assert below may sometimes + // fail: + IndexReader oldReader = reader; + + // make sure we get a cache hit when we reopen readers + // that had no new deletions + reader = refreshReader(reader); + assertTrue(reader != oldReader); + searcher.close(); + searcher = newSearcher(reader, false); + int missCount = filter.missCount; + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals(missCount, filter.missCount); + + // now delete the doc, refresh the reader, and see that it's not there + writer.deleteDocuments(new Term("id", "1")); + + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits); + + // NOTE: silliness to make sure JRE does not optimize + // away our holding onto oldReader to prevent + // CachingWrapperFilter's WeakHashMap from dropping the + // entry: + assertTrue(oldReader != null); + + searcher.close(); + writer.close(); + reader.close(); + dir.close(); + } + + private static IndexReader refreshReader(IndexReader reader) throws IOException { + IndexReader oldReader = reader; + reader = reader.reopen(); + if (reader != oldReader) { + oldReader.close(); + } + return reader; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java new file mode 100644 index 0000000..7462959 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java @@ -0,0 +1,308 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SerialMergeScheduler; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.OpenBitSetDISI; +import org.apache.lucene.util._TestUtil; + +public class TestCachingWrapperFilter extends LuceneTestCase { + + public void testCachingWorks() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + + MockFilter filter = new MockFilter(); + CachingWrapperFilter cacher = new CachingWrapperFilter(filter); + + // first time, nested filter is called + cacher.getDocIdSet(reader); + assertTrue("first time", filter.wasCalled()); + + // make sure no exception if cache is holding the wrong docIdSet + cacher.getDocIdSet(reader); + + // second time, nested filter should not be called + filter.clear(); + cacher.getDocIdSet(reader); + assertFalse("second time", filter.wasCalled()); + + reader.close(); + dir.close(); + } + + public void testNullDocIdSet() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + + final Filter filter = new Filter() { + @Override + public DocIdSet getDocIdSet(IndexReader reader) { + return null; + } + }; + CachingWrapperFilter cacher = new CachingWrapperFilter(filter); + + // the caching filter should return the empty set constant + assertSame(DocIdSet.EMPTY_DOCIDSET, cacher.getDocIdSet(reader)); + + reader.close(); + dir.close(); + } + + public void testNullDocIdSetIterator() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + + final Filter filter = new Filter() { + @Override + public DocIdSet getDocIdSet(IndexReader reader) { + return new DocIdSet() { + @Override + public DocIdSetIterator iterator() { + return null; + } + }; + } + }; + CachingWrapperFilter cacher = new CachingWrapperFilter(filter); + + // the caching filter should return the empty set constant + assertSame(DocIdSet.EMPTY_DOCIDSET, cacher.getDocIdSet(reader)); + + reader.close(); + dir.close(); + } + + private static void assertDocIdSetCacheable(IndexReader reader, Filter filter, boolean shouldCacheable) throws IOException { + final CachingWrapperFilter cacher = new CachingWrapperFilter(filter); + final DocIdSet originalSet = filter.getDocIdSet(reader.getSequentialSubReaders()[0]); + final DocIdSet cachedSet = cacher.getDocIdSet(reader.getSequentialSubReaders()[0]); + assertTrue(cachedSet.isCacheable()); + assertEquals(shouldCacheable, originalSet.isCacheable()); + //System.out.println("Original: "+originalSet.getClass().getName()+" -- cached: "+cachedSet.getClass().getName()); + if (originalSet.isCacheable()) { + assertEquals("Cached DocIdSet must be of same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass()); + } else { + assertTrue("Cached DocIdSet must be an OpenBitSet if the original one was not cacheable (got " + cachedSet + ")", cachedSet instanceof OpenBitSetDISI || cachedSet == DocIdSet.EMPTY_DOCIDSET); + } + } + + public void testIsCacheAble() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + writer.addDocument(new Document()); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + + // not cacheable: + assertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test","value"))), false); + // returns default empty docidset, always cacheable: + assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", Integer.valueOf(10000), Integer.valueOf(-10000), true, true), true); + // is cacheable: + assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), true); + // a openbitset filter is always cacheable + assertDocIdSetCacheable(reader, new Filter() { + @Override + public DocIdSet getDocIdSet(IndexReader reader) { + return new OpenBitSet(); + } + }, true); + + reader.close(); + dir.close(); + } + + public void testEnforceDeletions() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter( + random, + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergeScheduler(new SerialMergeScheduler()). + // asserts below requires no unexpected merges: + setMergePolicy(newLogMergePolicy(10)) + ); + + // NOTE: cannot use writer.getReader because RIW (on + // flipping a coin) may give us a newly opened reader, + // but we use .reopen on this reader below and expect to + // (must) get an NRT reader: + IndexReader reader = IndexReader.open(writer.w, true); + // same reason we don't wrap? + IndexSearcher searcher = newSearcher(reader, false); + + // add a doc, refresh the reader, and check that its there + Document doc = new Document(); + doc.add(newField("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1); + assertEquals("Should find a hit...", 1, docs.totalHits); + + final Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1"))); + + // ignore deletions + CachingWrapperFilter filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.IGNORE); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + // now delete the doc, refresh the reader, and see that it's not there + _TestUtil.keepFullyDeletedSegments(writer.w); + writer.deleteDocuments(new Term("id", "1")); + + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + + // force cache to regenerate: + filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); + + writer.addDocument(doc); + + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + + assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + + constantScore = new ConstantScoreQuery(filter); + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + // NOTE: important to hold ref here so GC doesn't clear + // the cache entry! Else the assert below may sometimes + // fail: + IndexReader oldReader = reader; + + // make sure we get a cache hit when we reopen reader + // that had no change to deletions + reader = refreshReader(reader); + assertTrue(reader != oldReader); + searcher.close(); + searcher = newSearcher(reader, false); + int missCount = filter.missCount; + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals(missCount, filter.missCount); + + // now delete the doc, refresh the reader, and see that it's not there + writer.deleteDocuments(new Term("id", "1")); + + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + missCount = filter.missCount; + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals(missCount+1, filter.missCount); + assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits); + + + // apply deletions dynamically + filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.DYNAMIC); + + writer.addDocument(doc); + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + constantScore = new ConstantScoreQuery(filter); + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + + // now delete the doc, refresh the reader, and see that it's not there + writer.deleteDocuments(new Term("id", "1")); + + reader = refreshReader(reader); + searcher.close(); + searcher = newSearcher(reader, false); + + docs = searcher.search(new MatchAllDocsQuery(), filter, 1); + assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + + missCount = filter.missCount; + docs = searcher.search(constantScore, 1); + assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits); + + // doesn't count as a miss + assertEquals(missCount, filter.missCount); + + // NOTE: silliness to make sure JRE does not optimize + // away our holding onto oldReader to prevent + // CachingWrapperFilter's WeakHashMap from dropping the + // entry: + assertTrue(oldReader != null); + + searcher.close(); + reader.close(); + writer.close(); + dir.close(); + } + + private static IndexReader refreshReader(IndexReader reader) throws IOException { + IndexReader oldReader = reader; + reader = reader.reopen(); + if (reader != oldReader) { + oldReader.close(); + } + return reader; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestComplexExplanations.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestComplexExplanations.java new file mode 100644 index 0000000..4c4789a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestComplexExplanations.java @@ -0,0 +1,273 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.spans.*; + +/** + * TestExplanations subclass that builds up super crazy complex queries + * on the assumption that if the explanations work out right for them, + * they should work for anything. + */ +public class TestComplexExplanations extends TestExplanations { + + /** + * Override the Similarity used in our searcher with one that plays + * nice with boosts of 0.0 + */ + @Override + public void setUp() throws Exception { + super.setUp(); + searcher.setSimilarity(createQnorm1Similarity()); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + super.tearDown(); + } + + // must be static for weight serialization tests + private static DefaultSimilarity createQnorm1Similarity() { + return new DefaultSimilarity() { + @Override + public float queryNorm(float sumOfSquaredWeights) { + return 1.0f; // / (float) Math.sqrt(1.0f + sumOfSquaredWeights); + } + }; + } + + + public void test1() throws Exception { + + BooleanQuery q = new BooleanQuery(); + + q.add(qp.parse("\"w1 w2\"~1"), Occur.MUST); + q.add(snear(st("w2"), + sor("w5","zz"), + 4, true), + Occur.SHOULD); + q.add(snear(sf("w3",2), st("w2"), st("w3"), 5, true), + Occur.SHOULD); + + Query t = new FilteredQuery(qp.parse("xx"), + new ItemizedFilter(new int[] {1,3})); + t.setBoost(1000); + q.add(t, Occur.SHOULD); + + t = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2})); + t.setBoost(30); + q.add(t, Occur.SHOULD); + + DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f); + dm.add(snear(st("w2"), + sor("w5","zz"), + 4, true)); + dm.add(qp.parse("QQ")); + dm.add(qp.parse("xx yy -zz")); + dm.add(qp.parse("-xx -w1")); + + DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f); + dm2.add(qp.parse("w1")); + dm2.add(qp.parse("w2")); + dm2.add(qp.parse("w3")); + dm.add(dm2); + + q.add(dm, Occur.SHOULD); + + BooleanQuery b = new BooleanQuery(); + b.setMinimumNumberShouldMatch(2); + b.add(snear("w1","w2",1,true), Occur.SHOULD); + b.add(snear("w2","w3",1,true), Occur.SHOULD); + b.add(snear("w1","w3",3,true), Occur.SHOULD); + + q.add(b, Occur.SHOULD); + + qtest(q, new int[] { 0,1,2 }); + } + + public void test2() throws Exception { + + BooleanQuery q = new BooleanQuery(); + + q.add(qp.parse("\"w1 w2\"~1"), Occur.MUST); + q.add(snear(st("w2"), + sor("w5","zz"), + 4, true), + Occur.SHOULD); + q.add(snear(sf("w3",2), st("w2"), st("w3"), 5, true), + Occur.SHOULD); + + Query t = new FilteredQuery(qp.parse("xx"), + new ItemizedFilter(new int[] {1,3})); + t.setBoost(1000); + q.add(t, Occur.SHOULD); + + t = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2})); + t.setBoost(-20.0f); + q.add(t, Occur.SHOULD); + + DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f); + dm.add(snear(st("w2"), + sor("w5","zz"), + 4, true)); + dm.add(qp.parse("QQ")); + dm.add(qp.parse("xx yy -zz")); + dm.add(qp.parse("-xx -w1")); + + DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f); + dm2.add(qp.parse("w1")); + dm2.add(qp.parse("w2")); + dm2.add(qp.parse("w3")); + dm.add(dm2); + + q.add(dm, Occur.SHOULD); + + BooleanQuery b = new BooleanQuery(); + b.setMinimumNumberShouldMatch(2); + b.add(snear("w1","w2",1,true), Occur.SHOULD); + b.add(snear("w2","w3",1,true), Occur.SHOULD); + b.add(snear("w1","w3",3,true), Occur.SHOULD); + b.setBoost(0.0f); + + q.add(b, Occur.SHOULD); + + qtest(q, new int[] { 0,1,2 }); + } + + // :TODO: we really need more crazy complex cases. + + + // ////////////////////////////////////////////////////////////////// + + // The rest of these aren't that complex, but they are somewhat + // complex, and they expose weakness in dealing with queries that match + // with scores of 0 wrapped in other queries + + public void testT3() throws Exception { + bqtest("w1^0.0", new int[] { 0,1,2,3 }); + } + + public void testMA3() throws Exception { + Query q=new MatchAllDocsQuery(); + q.setBoost(0); + bqtest(q, new int[] { 0,1,2,3 }); + } + + public void testFQ5() throws Exception { + bqtest(new FilteredQuery(qp.parse("xx^0"), + new ItemizedFilter(new int[] {1,3})), + new int[] {3}); + } + + public void testCSQ4() throws Exception { + Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {3})); + q.setBoost(0); + bqtest(q, new int[] {3}); + } + + public void testDMQ10() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("yy w5^100")); + q.add(qp.parse("xx^0")); + q.setBoost(0.0f); + bqtest(q, new int[] { 0,2,3 }); + } + + public void testMPQ7() throws Exception { + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(ta(new String[] {"w1"})); + q.add(ta(new String[] {"w2"})); + q.setSlop(1); + q.setBoost(0.0f); + bqtest(q, new int[] { 0,1,2 }); + } + + public void testBQ12() throws Exception { + // NOTE: using qtest not bqtest + qtest("w1 w2^0.0", new int[] { 0,1,2,3 }); + } + public void testBQ13() throws Exception { + // NOTE: using qtest not bqtest + qtest("w1 -w5^0.0", new int[] { 1,2,3 }); + } + public void testBQ18() throws Exception { + // NOTE: using qtest not bqtest + qtest("+w1^0.0 w2", new int[] { 0,1,2,3 }); + } + public void testBQ21() throws Exception { + bqtest("(+w1 w2)^0.0", new int[] { 0,1,2,3 }); + } + public void testBQ22() throws Exception { + bqtest("(+w1^0.0 w2)^0.0", new int[] { 0,1,2,3 }); + } + + public void testST3() throws Exception { + SpanQuery q = st("w1"); + q.setBoost(0); + bqtest(q, new int[] {0,1,2,3}); + } + public void testST6() throws Exception { + SpanQuery q = st("xx"); + q.setBoost(0); + qtest(q, new int[] {2,3}); + } + + public void testSF3() throws Exception { + SpanQuery q = sf(("w1"),1); + q.setBoost(0); + bqtest(q, new int[] {0,1,2,3}); + } + public void testSF7() throws Exception { + SpanQuery q = sf(("xx"),3); + q.setBoost(0); + bqtest(q, new int[] {2,3}); + } + + public void testSNot3() throws Exception { + SpanQuery q = snot(sf("w1",10),st("QQ")); + q.setBoost(0); + bqtest(q, new int[] {0,1,2,3}); + } + public void testSNot6() throws Exception { + SpanQuery q = snot(sf("w1",10),st("xx")); + q.setBoost(0); + bqtest(q, new int[] {0,1,2,3}); + } + + public void testSNot8() throws Exception { + // NOTE: using qtest not bqtest + SpanQuery f = snear("w1","w3",10,true); + f.setBoost(0); + SpanQuery q = snot(f, st("xx")); + qtest(q, new int[] {0,1,3}); + } + public void testSNot9() throws Exception { + // NOTE: using qtest not bqtest + SpanQuery t = st("xx"); + t.setBoost(0); + SpanQuery q = snot(snear("w1","w3",10,true), t); + qtest(q, new int[] {0,1,3}); + } + + + + + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestComplexExplanationsOfNonMatches.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestComplexExplanationsOfNonMatches.java new file mode 100644 index 0000000..2ca3324 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestComplexExplanationsOfNonMatches.java @@ -0,0 +1,38 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/** + * subclass of TestSimpleExplanations that verifies non matches. + */ +public class TestComplexExplanationsOfNonMatches + extends TestComplexExplanations { + + /** + * Overrides superclass to ignore matches and focus on non-matches + * + * @see CheckHits#checkNoMatchExplanations + */ + @Override + public void qtest(Query q, int[] expDocNrs) throws Exception { + CheckHits.checkNoMatchExplanations(q, FIELD, searcher, expDocNrs); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestConstantScoreQuery.java new file mode 100644 index 0000000..3e8341a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -0,0 +1,132 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +import java.io.IOException; + +/** This class only tests some basic functionality in CSQ, the main parts are mostly + * tested by MultiTermQuery tests, explanations seems to be tested in TestExplanations! */ +public class TestConstantScoreQuery extends LuceneTestCase { + + public void testCSQ() throws Exception { + final Query q1 = new ConstantScoreQuery(new TermQuery(new Term("a", "b"))); + final Query q2 = new ConstantScoreQuery(new TermQuery(new Term("a", "c"))); + final Query q3 = new ConstantScoreQuery(new TermRangeFilter("a", "b", "c", true, true)); + QueryUtils.check(q1); + QueryUtils.check(q2); + QueryUtils.checkEqual(q1,q1); + QueryUtils.checkEqual(q2,q2); + QueryUtils.checkEqual(q3,q3); + QueryUtils.checkUnequal(q1,q2); + QueryUtils.checkUnequal(q2,q3); + QueryUtils.checkUnequal(q1,q3); + QueryUtils.checkUnequal(q1, new TermQuery(new Term("a", "b"))); + } + + private void checkHits(Searcher searcher, Query q, final float expectedScore, final String scorerClassName, final String innerScorerClassName) throws IOException { + final int[] count = new int[1]; + searcher.search(q, new Collector() { + private Scorer scorer; + + @Override + public void setScorer(Scorer scorer) { + this.scorer = scorer; + assertEquals("Scorer is implemented by wrong class", scorerClassName, scorer.getClass().getName()); + if (innerScorerClassName != null && scorer instanceof ConstantScoreQuery.ConstantScorer) { + final ConstantScoreQuery.ConstantScorer innerScorer = (ConstantScoreQuery.ConstantScorer) scorer; + assertEquals("inner Scorer is implemented by wrong class", innerScorerClassName, innerScorer.docIdSetIterator.getClass().getName()); + } + } + + @Override + public void collect(int doc) throws IOException { + assertEquals("Score differs from expected", expectedScore, this.scorer.score()); + count[0]++; + } + + @Override + public void setNextReader(IndexReader reader, int docBase) { + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + assertEquals("invalid number of results", 1, count[0]); + } + + public void testWrapped2Times() throws Exception { + Directory directory = null; + IndexReader reader = null; + IndexSearcher searcher = null; + try { + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter (random, directory); + + Document doc = new Document(); + doc.add(newField("field", "term", Field.Store.NO, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + + // set a similarity that does not normalize our boost away + searcher.setSimilarity(new DefaultSimilarity() { + @Override + public float queryNorm(float sumOfSquaredWeights) { + return 1.0f; + } + }); + + final Query csq1 = new ConstantScoreQuery(new TermQuery(new Term ("field", "term"))); + csq1.setBoost(2.0f); + final Query csq2 = new ConstantScoreQuery(csq1); + csq2.setBoost(5.0f); + + final BooleanQuery bq = new BooleanQuery(); + bq.add(csq1, BooleanClause.Occur.SHOULD); + bq.add(csq2, BooleanClause.Occur.SHOULD); + + final Query csqbq = new ConstantScoreQuery(bq); + csqbq.setBoost(17.0f); + + checkHits(searcher, csq1, csq1.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), null); + checkHits(searcher, csq2, csq2.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), ConstantScoreQuery.ConstantScorer.class.getName()); + + // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection! + final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer"; + checkHits(searcher, bq, csq1.getBoost() + csq2.getBoost(), bucketScorerClass, null); + checkHits(searcher, csqbq, csqbq.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), bucketScorerClass); + } finally { + if (searcher != null) searcher.close(); + if (reader != null) reader.close(); + if (directory != null) directory.close(); + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCustomSearcherSort.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCustomSearcherSort.java new file mode 100644 index 0000000..a9d2f78 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestCustomSearcherSort.java @@ -0,0 +1,263 @@ +package org.apache.lucene.search; + +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Serializable; +import java.util.Calendar; +import java.util.GregorianCalendar; +import java.util.Map; +import java.util.Random; +import java.util.TreeMap; + +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** Unit test for sorting code. */ +public class TestCustomSearcherSort extends LuceneTestCase implements Serializable { + + private Directory index = null; + private IndexReader reader; + private Query query = null; + // reduced from 20000 to 2000 to speed up test... + private final static int INDEX_SIZE = atLeast(2000); + + /** + * Create index and query for test cases. + */ + @Override + public void setUp() throws Exception { + super.setUp(); + index = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, index); + RandomGen random = new RandomGen(this.random); + for (int i = 0; i < INDEX_SIZE; ++i) { // don't decrease; if to low the + // problem doesn't show up + Document doc = new Document(); + if ((i % 5) != 0) { // some documents must not have an entry in the first + // sort field + doc.add(newField("publicationDate_", random.getLuceneDate(), + Field.Store.YES, Field.Index.NOT_ANALYZED)); + } + if ((i % 7) == 0) { // some documents to match the query (see below) + doc.add(newField("content", "test", Field.Store.YES, + Field.Index.ANALYZED)); + } + // every document has a defined 'mandant' field + doc.add(newField("mandant", Integer.toString(i % 3), Field.Store.YES, + Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + query = new TermQuery(new Term("content", "test")); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + index.close(); + super.tearDown(); + } + + /** + * Run the test using two CustomSearcher instances. + */ + public void testFieldSortCustomSearcher() throws Exception { + // log("Run testFieldSortCustomSearcher"); + // define the sort criteria + Sort custSort = new Sort( + new SortField("publicationDate_", SortField.STRING), + SortField.FIELD_SCORE); + Searcher searcher = new CustomSearcher(reader, 2); + // search and check hits + matchHits(searcher, custSort); + } + + /** + * Run the test using one CustomSearcher wrapped by a MultiSearcher. + */ + public void testFieldSortSingleSearcher() throws Exception { + // log("Run testFieldSortSingleSearcher"); + // define the sort criteria + Sort custSort = new Sort( + new SortField("publicationDate_", SortField.STRING), + SortField.FIELD_SCORE); + Searcher searcher = new MultiSearcher(new Searcher[] {new CustomSearcher( + reader, 2)}); + // search and check hits + matchHits(searcher, custSort); + } + + /** + * Run the test using two CustomSearcher instances. + */ + public void testFieldSortMultiCustomSearcher() throws Exception { + // log("Run testFieldSortMultiCustomSearcher"); + // define the sort criteria + Sort custSort = new Sort( + new SortField("publicationDate_", SortField.STRING), + SortField.FIELD_SCORE); + Searcher searcher = new MultiSearcher(new Searchable[] { + new CustomSearcher(reader, 0), new CustomSearcher(reader, 2)}); + // search and check hits + matchHits(searcher, custSort); + } + + // make sure the documents returned by the search match the expected list + private void matchHits(Searcher searcher, Sort sort) throws IOException { + // make a query without sorting first + ScoreDoc[] hitsByRank = searcher.search(query, null, Integer.MAX_VALUE).scoreDocs; + checkHits(hitsByRank, "Sort by rank: "); // check for duplicates + Map resultMap = new TreeMap(); + // store hits in TreeMap - TreeMap does not allow duplicates; existing + // entries are silently overwritten + for (int hitid = 0; hitid < hitsByRank.length; ++hitid) { + resultMap.put(Integer.valueOf(hitsByRank[hitid].doc), // Key: Lucene + // Document ID + Integer.valueOf(hitid)); // Value: Hits-Objekt Index + } + + // now make a query using the sort criteria + ScoreDoc[] resultSort = searcher.search(query, null, Integer.MAX_VALUE, + sort).scoreDocs; + checkHits(resultSort, "Sort by custom criteria: "); // check for duplicates + + // besides the sorting both sets of hits must be identical + for (int hitid = 0; hitid < resultSort.length; ++hitid) { + Integer idHitDate = Integer.valueOf(resultSort[hitid].doc); // document ID + // from sorted + // search + if (!resultMap.containsKey(idHitDate)) { + log("ID " + idHitDate + " not found. Possibliy a duplicate."); + } + assertTrue(resultMap.containsKey(idHitDate)); // same ID must be in the + // Map from the rank-sorted + // search + // every hit must appear once in both result sets --> remove it from the + // Map. + // At the end the Map must be empty! + resultMap.remove(idHitDate); + } + if (resultMap.size() == 0) { + // log("All hits matched"); + } else { + log("Couldn't match " + resultMap.size() + " hits."); + } + assertEquals(resultMap.size(), 0); + } + + /** + * Check the hits for duplicates. + * + * @param hits + */ + private void checkHits(ScoreDoc[] hits, String prefix) { + if (hits != null) { + Map idMap = new TreeMap(); + for (int docnum = 0; docnum < hits.length; ++docnum) { + Integer luceneId = null; + + luceneId = Integer.valueOf(hits[docnum].doc); + if (idMap.containsKey(luceneId)) { + StringBuilder message = new StringBuilder(prefix); + message.append("Duplicate key for hit index = "); + message.append(docnum); + message.append(", previous index = "); + message.append((idMap.get(luceneId)).toString()); + message.append(", Lucene ID = "); + message.append(luceneId); + log(message.toString()); + } else { + idMap.put(luceneId, Integer.valueOf(docnum)); + } + } + } + } + + // Simply write to console - choosen to be independant of log4j etc + private void log(String message) { + if (VERBOSE) System.out.println(message); + } + + public class CustomSearcher extends IndexSearcher { + private int switcher; + + /** + * @param r + */ + public CustomSearcher(IndexReader r, int switcher) { + super(r); + this.switcher = switcher; + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query + * , org.apache.lucene.search.Filter, int, org.apache.lucene.search.Sort) + */ + @Override + public TopFieldDocs search(Query query, Filter filter, int nDocs, Sort sort) + throws IOException { + BooleanQuery bq = new BooleanQuery(); + bq.add(query, BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), + BooleanClause.Occur.MUST); + return super.search(bq, filter, nDocs, sort); + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query + * , org.apache.lucene.search.Filter, int) + */ + @Override + public TopDocs search(Query query, Filter filter, int nDocs) + throws IOException { + BooleanQuery bq = new BooleanQuery(); + bq.add(query, BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term("mandant", Integer.toString(switcher))), + BooleanClause.Occur.MUST); + return super.search(bq, filter, nDocs); + } + } + + private class RandomGen { + RandomGen(Random random) { + this.random = random; + } + + private Random random; + private Calendar base = new GregorianCalendar(1980, 1, 1); + + // Just to generate some different Lucene Date strings + private String getLuceneDate() { + return DateTools.timeToString(base.getTimeInMillis() + random.nextInt() + - Integer.MIN_VALUE, DateTools.Resolution.DAY); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDateFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDateFilter.java new file mode 100644 index 0000000..021ec0d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDateFilter.java @@ -0,0 +1,174 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; + +import java.io.IOException; + +/** + * DateFilter JUnit tests. + * + * + * @version $Revision: 1066722 $ + */ +public class TestDateFilter extends LuceneTestCase { + + /** + * + */ + public void testBefore() throws IOException { + // create an index + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + + long now = System.currentTimeMillis(); + + Document doc = new Document(); + // add time that is in the past + doc.add(newField("datefield", DateTools.timeToString(now - 1000, + DateTools.Resolution.MILLISECOND), Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(newField("body", "Today is a very sunny day in New York City", + Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + writer.close(); + IndexSearcher searcher = newSearcher(reader); + + // filter that should preserve matches + // DateFilter df1 = DateFilter.Before("datefield", now); + TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools + .timeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools + .timeToString(now, DateTools.Resolution.MILLISECOND), false, true); + // filter that should discard matches + // DateFilter df2 = DateFilter.Before("datefield", now - 999999); + TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools + .timeToString(0, DateTools.Resolution.MILLISECOND), DateTools + .timeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, + false); + + // search something that doesn't exist with DateFilter + Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); + + // search for something that does exists + Query query2 = new TermQuery(new Term("body", "sunny")); + + ScoreDoc[] result; + + // ensure that queries return expected results without DateFilter first + result = searcher.search(query1, null, 1000).scoreDocs; + assertEquals(0, result.length); + + result = searcher.search(query2, null, 1000).scoreDocs; + assertEquals(1, result.length); + + // run queries with DateFilter + result = searcher.search(query1, df1, 1000).scoreDocs; + assertEquals(0, result.length); + + result = searcher.search(query1, df2, 1000).scoreDocs; + assertEquals(0, result.length); + + result = searcher.search(query2, df1, 1000).scoreDocs; + assertEquals(1, result.length); + + result = searcher.search(query2, df2, 1000).scoreDocs; + assertEquals(0, result.length); + searcher.close(); + reader.close(); + indexStore.close(); + } + + /** + * + */ + public void testAfter() throws IOException { + // create an index + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + + long now = System.currentTimeMillis(); + + Document doc = new Document(); + // add time that is in the future + doc.add(newField("datefield", DateTools.timeToString(now + 888888, + DateTools.Resolution.MILLISECOND), Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(newField("body", "Today is a very sunny day in New York City", + Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + writer.close(); + IndexSearcher searcher = newSearcher(reader); + + // filter that should preserve matches + // DateFilter df1 = DateFilter.After("datefield", now); + TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools + .timeToString(now, DateTools.Resolution.MILLISECOND), DateTools + .timeToString(now + 999999, DateTools.Resolution.MILLISECOND), true, + false); + // filter that should discard matches + // DateFilter df2 = DateFilter.After("datefield", now + 999999); + TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools + .timeToString(now + 999999, DateTools.Resolution.MILLISECOND), + DateTools.timeToString(now + 999999999, + DateTools.Resolution.MILLISECOND), false, true); + + // search something that doesn't exist with DateFilter + Query query1 = new TermQuery(new Term("body", "NoMatchForThis")); + + // search for something that does exists + Query query2 = new TermQuery(new Term("body", "sunny")); + + ScoreDoc[] result; + + // ensure that queries return expected results without DateFilter first + result = searcher.search(query1, null, 1000).scoreDocs; + assertEquals(0, result.length); + + result = searcher.search(query2, null, 1000).scoreDocs; + assertEquals(1, result.length); + + // run queries with DateFilter + result = searcher.search(query1, df1, 1000).scoreDocs; + assertEquals(0, result.length); + + result = searcher.search(query1, df2, 1000).scoreDocs; + assertEquals(0, result.length); + + result = searcher.search(query2, df1, 1000).scoreDocs; + assertEquals(1, result.length); + + result = searcher.search(query2, df2, 1000).scoreDocs; + assertEquals(0, result.length); + searcher.close(); + reader.close(); + indexStore.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDateSort.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDateSort.java new file mode 100644 index 0000000..039b388 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDateSort.java @@ -0,0 +1,125 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.store.Directory; + +/** + * Test date sorting, i.e. auto-sorting of fields with type "long". + * See http://issues.apache.org/jira/browse/LUCENE-1045 + */ +public class TestDateSort extends LuceneTestCase { + + private static final String TEXT_FIELD = "text"; + private static final String DATE_TIME_FIELD = "dateTime"; + + private Directory directory; + private IndexReader reader; + + @Override + public void setUp() throws Exception { + super.setUp(); + // Create an index writer. + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, new MockAnalyzer(random)); + + // oldest doc: + // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 + writer.addDocument(createDocument("Document 1", 1192001122000L)); + // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 + writer.addDocument(createDocument("Document 2", 1192001126000L)); + // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 + writer.addDocument(createDocument("Document 3", 1192101133000L)); + // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 + writer.addDocument(createDocument("Document 4", 1192104129000L)); + // latest doc: + // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 + writer.addDocument(createDocument("Document 5", 1192209943000L)); + + reader = writer.getReader(); + writer.close(); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testReverseDateSort() throws Exception { + IndexSearcher searcher = newSearcher(reader); + + Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.STRING, true)); + + QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, new MockAnalyzer(random)); + Query query = queryParser.parse("Document"); + + // Execute the search and process the search results. + String[] actualOrder = new String[5]; + ScoreDoc[] hits = searcher.search(query, null, 1000, sort).scoreDocs; + for (int i = 0; i < hits.length; i++) { + Document document = searcher.doc(hits[i].doc); + String text = document.get(TEXT_FIELD); + actualOrder[i] = text; + } + searcher.close(); + + // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). + String[] expectedOrder = new String[5]; + expectedOrder[0] = "Document 5"; + expectedOrder[1] = "Document 4"; + expectedOrder[2] = "Document 3"; + expectedOrder[3] = "Document 2"; + expectedOrder[4] = "Document 1"; + + assertEquals(Arrays.asList(expectedOrder), Arrays.asList(actualOrder)); + } + + private Document createDocument(String text, long time) { + Document document = new Document(); + + // Add the text field. + Field textField = newField(TEXT_FIELD, text, Field.Store.YES, Field.Index.ANALYZED); + document.add(textField); + + // Add the date/time field. + String dateTimeString = DateTools.timeToString(time, DateTools.Resolution.SECOND); + Field dateTimeField = newField(DATE_TIME_FIELD, dateTimeString, Field.Store.YES, + Field.Index.NOT_ANALYZED); + document.add(dateTimeField); + + return document; + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java new file mode 100644 index 0000000..c7df002 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -0,0 +1,497 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; + +import java.text.DecimalFormat; +import java.io.IOException; + +/** + * Test of the DisjunctionMaxQuery. + * + */ +public class TestDisjunctionMaxQuery extends LuceneTestCase { + + /** threshold for comparing floats */ + public static final float SCORE_COMP_THRESH = 0.0000f; + + /** + * Similarity to eliminate tf, idf and lengthNorm effects to isolate test + * case. + * + *

+ * same as TestRankingSimilarity in TestRanking.zip from + * http://issues.apache.org/jira/browse/LUCENE-323 + *

+ */ + private static class TestSimilarity extends DefaultSimilarity { + + public TestSimilarity() {} + + @Override + public float tf(float freq) { + if (freq > 0.0f) return 1.0f; + else return 0.0f; + } + + @Override + public float computeNorm(String fieldName, FieldInvertState state) { + // Disable length norm + return state.getBoost(); + } + + @Override + public float idf(int docFreq, int numDocs) { + return 1.0f; + } + } + + public Similarity sim = new TestSimilarity(); + public Directory index; + public IndexReader r; + public IndexSearcher s; + + @Override + public void setUp() throws Exception { + super.setUp(); + + index = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, index, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setSimilarity(sim).setMergePolicy(newLogMergePolicy())); + + // hed is the most important field, dek is secondary + + // d1 is an "ok" match for: albino elephant + { + Document d1 = new Document(); + d1.add(newField("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id", + // "d1")); + d1 + .add(newField("hed", "elephant", Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("hed", "elephant")); + d1 + .add(newField("dek", "elephant", Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("dek", "elephant")); + writer.addDocument(d1); + } + + // d2 is a "good" match for: albino elephant + { + Document d2 = new Document(); + d2.add(newField("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id", + // "d2")); + d2 + .add(newField("hed", "elephant", Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("hed", "elephant")); + d2.add(newField("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("dek", + // "albino")); + d2 + .add(newField("dek", "elephant", Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("dek", "elephant")); + writer.addDocument(d2); + } + + // d3 is a "better" match for: albino elephant + { + Document d3 = new Document(); + d3.add(newField("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id", + // "d3")); + d3.add(newField("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("hed", + // "albino")); + d3 + .add(newField("hed", "elephant", Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("hed", "elephant")); + writer.addDocument(d3); + } + + // d4 is the "best" match for: albino elephant + { + Document d4 = new Document(); + d4.add(newField("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id", + // "d4")); + d4.add(newField("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("hed", + // "albino")); + d4 + .add(newField("hed", "elephant", Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("hed", "elephant")); + d4.add(newField("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("dek", + // "albino")); + writer.addDocument(d4); + } + + writer.optimize(); + r = writer.getReader(); + writer.close(); + s = newSearcher(r); + s.setSimilarity(sim); + } + + @Override + public void tearDown() throws Exception { + s.close(); + r.close(); + index.close(); + super.tearDown(); + } + + public void testSkipToFirsttimeMiss() throws IOException { + final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f); + dq.add(tq("id", "d1")); + dq.add(tq("dek", "DOES_NOT_EXIST")); + + QueryUtils.check(random, dq, s); + + final Weight dw = s.createNormalizedWeight(dq); + IndexReader sub = s.getIndexReader().getSequentialSubReaders() == null ? + s.getIndexReader() : s.getIndexReader().getSequentialSubReaders()[0]; + final Scorer ds = dw.scorer(sub, true, false); + final boolean skipOk = ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS; + if (skipOk) { + fail("firsttime skipTo found a match? ... " + + r.document(ds.docID()).get("id")); + } + } + + public void testSkipToFirsttimeHit() throws IOException { + final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f); + dq.add(tq("dek", "albino")); + dq.add(tq("dek", "DOES_NOT_EXIST")); + + QueryUtils.check(random, dq, s); + + final Weight dw = s.createNormalizedWeight(dq); + IndexReader sub = s.getIndexReader().getSequentialSubReaders() == null ? + s.getIndexReader() : s.getIndexReader().getSequentialSubReaders()[0]; + final Scorer ds = dw.scorer(sub, true, false); + assertTrue("firsttime skipTo found no match", + ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS); + assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id")); + } + + public void testSimpleEqualScores1() throws Exception { + + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f); + q.add(tq("hed", "albino")); + q.add(tq("hed", "elephant")); + QueryUtils.check(random, q, s); + + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + + try { + assertEquals("all docs should match " + q.toString(), 4, h.length); + + float score = h[0].score; + for (int i = 1; i < h.length; i++) { + assertEquals("score #" + i + " is not the same", score, h[i].score, + SCORE_COMP_THRESH); + } + } catch (Error e) { + printHits("testSimpleEqualScores1", h, s); + throw e; + } + + } + + public void testSimpleEqualScores2() throws Exception { + + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f); + q.add(tq("dek", "albino")); + q.add(tq("dek", "elephant")); + QueryUtils.check(random, q, s); + + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + + try { + assertEquals("3 docs should match " + q.toString(), 3, h.length); + float score = h[0].score; + for (int i = 1; i < h.length; i++) { + assertEquals("score #" + i + " is not the same", score, h[i].score, + SCORE_COMP_THRESH); + } + } catch (Error e) { + printHits("testSimpleEqualScores2", h, s); + throw e; + } + + } + + public void testSimpleEqualScores3() throws Exception { + + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f); + q.add(tq("hed", "albino")); + q.add(tq("hed", "elephant")); + q.add(tq("dek", "albino")); + q.add(tq("dek", "elephant")); + QueryUtils.check(random, q, s); + + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + + try { + assertEquals("all docs should match " + q.toString(), 4, h.length); + float score = h[0].score; + for (int i = 1; i < h.length; i++) { + assertEquals("score #" + i + " is not the same", score, h[i].score, + SCORE_COMP_THRESH); + } + } catch (Error e) { + printHits("testSimpleEqualScores3", h, s); + throw e; + } + + } + + public void testSimpleTiebreaker() throws Exception { + + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.01f); + q.add(tq("dek", "albino")); + q.add(tq("dek", "elephant")); + QueryUtils.check(random, q, s); + + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + + try { + assertEquals("3 docs should match " + q.toString(), 3, h.length); + assertEquals("wrong first", "d2", s.doc(h[0].doc).get("id")); + float score0 = h[0].score; + float score1 = h[1].score; + float score2 = h[2].score; + assertTrue("d2 does not have better score then others: " + score0 + + " >? " + score1, score0 > score1); + assertEquals("d4 and d1 don't have equal scores", score1, score2, + SCORE_COMP_THRESH); + } catch (Error e) { + printHits("testSimpleTiebreaker", h, s); + throw e; + } + } + + public void testBooleanRequiredEqualScores() throws Exception { + + BooleanQuery q = new BooleanQuery(); + { + DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f); + q1.add(tq("hed", "albino")); + q1.add(tq("dek", "albino")); + q.add(q1, BooleanClause.Occur.MUST);// true,false); + QueryUtils.check(random, q1, s); + + } + { + DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f); + q2.add(tq("hed", "elephant")); + q2.add(tq("dek", "elephant")); + q.add(q2, BooleanClause.Occur.MUST);// true,false); + QueryUtils.check(random, q2, s); + } + + QueryUtils.check(random, q, s); + + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + + try { + assertEquals("3 docs should match " + q.toString(), 3, h.length); + float score = h[0].score; + for (int i = 1; i < h.length; i++) { + assertEquals("score #" + i + " is not the same", score, h[i].score, + SCORE_COMP_THRESH); + } + } catch (Error e) { + printHits("testBooleanRequiredEqualScores1", h, s); + throw e; + } + } + + public void testBooleanOptionalNoTiebreaker() throws Exception { + + BooleanQuery q = new BooleanQuery(); + { + DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f); + q1.add(tq("hed", "albino")); + q1.add(tq("dek", "albino")); + q.add(q1, BooleanClause.Occur.SHOULD);// false,false); + } + { + DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f); + q2.add(tq("hed", "elephant")); + q2.add(tq("dek", "elephant")); + q.add(q2, BooleanClause.Occur.SHOULD);// false,false); + } + QueryUtils.check(random, q, s); + + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + + try { + assertEquals("4 docs should match " + q.toString(), 4, h.length); + float score = h[0].score; + for (int i = 1; i < h.length - 1; i++) { /* note: -1 */ + assertEquals("score #" + i + " is not the same", score, h[i].score, + SCORE_COMP_THRESH); + } + assertEquals("wrong last", "d1", s.doc(h[h.length - 1].doc).get("id")); + float score1 = h[h.length - 1].score; + assertTrue("d1 does not have worse score then others: " + score + " >? " + + score1, score > score1); + } catch (Error e) { + printHits("testBooleanOptionalNoTiebreaker", h, s); + throw e; + } + } + + public void testBooleanOptionalWithTiebreaker() throws Exception { + + BooleanQuery q = new BooleanQuery(); + { + DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f); + q1.add(tq("hed", "albino")); + q1.add(tq("dek", "albino")); + q.add(q1, BooleanClause.Occur.SHOULD);// false,false); + } + { + DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f); + q2.add(tq("hed", "elephant")); + q2.add(tq("dek", "elephant")); + q.add(q2, BooleanClause.Occur.SHOULD);// false,false); + } + QueryUtils.check(random, q, s); + + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + + try { + + assertEquals("4 docs should match " + q.toString(), 4, h.length); + + float score0 = h[0].score; + float score1 = h[1].score; + float score2 = h[2].score; + float score3 = h[3].score; + + String doc0 = s.doc(h[0].doc).get("id"); + String doc1 = s.doc(h[1].doc).get("id"); + String doc2 = s.doc(h[2].doc).get("id"); + String doc3 = s.doc(h[3].doc).get("id"); + + assertTrue("doc0 should be d2 or d4: " + doc0, doc0.equals("d2") + || doc0.equals("d4")); + assertTrue("doc1 should be d2 or d4: " + doc0, doc1.equals("d2") + || doc1.equals("d4")); + assertEquals("score0 and score1 should match", score0, score1, + SCORE_COMP_THRESH); + assertEquals("wrong third", "d3", doc2); + assertTrue("d3 does not have worse score then d2 and d4: " + score1 + + " >? " + score2, score1 > score2); + + assertEquals("wrong fourth", "d1", doc3); + assertTrue("d1 does not have worse score then d3: " + score2 + " >? " + + score3, score2 > score3); + + } catch (Error e) { + printHits("testBooleanOptionalWithTiebreaker", h, s); + throw e; + } + + } + + public void testBooleanOptionalWithTiebreakerAndBoost() throws Exception { + + BooleanQuery q = new BooleanQuery(); + { + DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f); + q1.add(tq("hed", "albino", 1.5f)); + q1.add(tq("dek", "albino")); + q.add(q1, BooleanClause.Occur.SHOULD);// false,false); + } + { + DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f); + q2.add(tq("hed", "elephant", 1.5f)); + q2.add(tq("dek", "elephant")); + q.add(q2, BooleanClause.Occur.SHOULD);// false,false); + } + QueryUtils.check(random, q, s); + + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + + try { + + assertEquals("4 docs should match " + q.toString(), 4, h.length); + + float score0 = h[0].score; + float score1 = h[1].score; + float score2 = h[2].score; + float score3 = h[3].score; + + String doc0 = s.doc(h[0].doc).get("id"); + String doc1 = s.doc(h[1].doc).get("id"); + String doc2 = s.doc(h[2].doc).get("id"); + String doc3 = s.doc(h[3].doc).get("id"); + + assertEquals("doc0 should be d4: ", "d4", doc0); + assertEquals("doc1 should be d3: ", "d3", doc1); + assertEquals("doc2 should be d2: ", "d2", doc2); + assertEquals("doc3 should be d1: ", "d1", doc3); + + assertTrue("d4 does not have a better score then d3: " + score0 + " >? " + + score1, score0 > score1); + assertTrue("d3 does not have a better score then d2: " + score1 + " >? " + + score2, score1 > score2); + assertTrue("d3 does not have a better score then d1: " + score2 + " >? " + + score3, score2 > score3); + + } catch (Error e) { + printHits("testBooleanOptionalWithTiebreakerAndBoost", h, s); + throw e; + } + } + + /** macro */ + protected Query tq(String f, String t) { + return new TermQuery(new Term(f, t)); + } + + /** macro */ + protected Query tq(String f, String t, float b) { + Query q = tq(f, t); + q.setBoost(b); + return q; + } + + protected void printHits(String test, ScoreDoc[] h, Searcher searcher) + throws Exception { + + System.err.println("------- " + test + " -------"); + + DecimalFormat f = new DecimalFormat("0.000000000"); + + for (int i = 0; i < h.length; i++) { + Document d = searcher.doc(h[i].doc); + float score = h[i].score; + System.err + .println("#" + i + ": " + f.format(score) + " - " + d.get("id")); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDocBoost.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDocBoost.java new file mode 100644 index 0000000..d034147 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDocBoost.java @@ -0,0 +1,100 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.*; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** Document boost unit test. + * + * + * @version $Revision: 1099728 $ + */ +public class TestDocBoost extends LuceneTestCase { + + public void testDocBoost() throws Exception { + Directory store = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, store, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + + Fieldable f1 = newField("field", "word", Field.Store.YES, Field.Index.ANALYZED); + Fieldable f2 = newField("field", "word", Field.Store.YES, Field.Index.ANALYZED); + f2.setBoost(2.0f); + + Document d1 = new Document(); + Document d2 = new Document(); + Document d3 = new Document(); + Document d4 = new Document(); + d3.setBoost(3.0f); + d4.setBoost(2.0f); + + d1.add(f1); // boost = 1 + d2.add(f2); // boost = 2 + d3.add(f1); // boost = 3 + d4.add(f2); // boost = 4 + + writer.addDocument(d1); + writer.addDocument(d2); + writer.addDocument(d3); + writer.addDocument(d4); + + IndexReader reader = writer.getReader(); + writer.close(); + + final float[] scores = new float[4]; + + newSearcher(reader).search + (new TermQuery(new Term("field", "word")), + new Collector() { + private int base = 0; + private Scorer scorer; + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + @Override + public final void collect(int doc) throws IOException { + scores[doc + base] = scorer.score(); + } + @Override + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + + float lastScore = 0.0f; + + for (int i = 0; i < 4; i++) { + assertTrue(scores[i] > lastScore); + lastScore = scores[i]; + } + + reader.close(); + store.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDocIdSet.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDocIdSet.java new file mode 100644 index 0000000..0cdf640 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestDocIdSet.java @@ -0,0 +1,128 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; + +import junit.framework.Assert; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestDocIdSet extends LuceneTestCase { + public void testFilteredDocIdSet() throws Exception { + final int maxdoc=10; + final DocIdSet innerSet = new DocIdSet() { + + @Override + public DocIdSetIterator iterator() { + return new DocIdSetIterator() { + + int docid = -1; + + @Override + public int docID() { + return docid; + } + + @Override + public int nextDoc() throws IOException { + docid++; + return docid < maxdoc ? docid : (docid = NO_MORE_DOCS); + } + + @Override + public int advance(int target) throws IOException { + while (nextDoc() < target) {} + return docid; + } + }; + } + }; + + + DocIdSet filteredSet = new FilteredDocIdSet(innerSet){ + @Override + protected boolean match(int docid) { + return docid%2 == 0; //validate only even docids + } + }; + + DocIdSetIterator iter = filteredSet.iterator(); + ArrayList list = new ArrayList(); + int doc = iter.advance(3); + if (doc != DocIdSetIterator.NO_MORE_DOCS) { + list.add(Integer.valueOf(doc)); + while((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + list.add(Integer.valueOf(doc)); + } + } + + int[] docs = new int[list.size()]; + int c=0; + Iterator intIter = list.iterator(); + while(intIter.hasNext()) { + docs[c++] = intIter.next().intValue(); + } + int[] answer = new int[]{4,6,8}; + boolean same = Arrays.equals(answer, docs); + if (!same) { + System.out.println("answer: " + Arrays.toString(answer)); + System.out.println("gotten: " + Arrays.toString(docs)); + fail(); + } + } + + public void testNullDocIdSet() throws Exception { + // Tests that if a Filter produces a null DocIdSet, which is given to + // IndexSearcher, everything works fine. This came up in LUCENE-1754. + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + Document doc = new Document(); + doc.add(newField("c", "val", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + writer.close(); + + // First verify the document is searchable. + IndexSearcher searcher = newSearcher(reader); + Assert.assertEquals(1, searcher.search(new MatchAllDocsQuery(), 10).totalHits); + + // Now search w/ a Filter which returns a null DocIdSet + Filter f = new Filter() { + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + return null; + } + }; + + Assert.assertEquals(0, searcher.search(new MatchAllDocsQuery(), f, 10).totalHits); + searcher.close(); + reader.close(); + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestElevationComparator.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestElevationComparator.java new file mode 100644 index 0000000..506eec6 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestElevationComparator.java @@ -0,0 +1,183 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.*; +import org.apache.lucene.search.FieldValueHitQueue.Entry; +import org.apache.lucene.store.*; +import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class TestElevationComparator extends LuceneTestCase { + + private final Map priority = new HashMap(); + + //@Test + public void testSorting() throws Throwable { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter( + directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMaxBufferedDocs(2). + setMergePolicy(newLogMergePolicy(1000)) + ); + writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"})); + writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"})); + writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"})); + writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"})); + writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"})); + writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"})); + + IndexReader r = IndexReader.open(writer, true); + writer.close(); + + IndexSearcher searcher = newSearcher(r); + + runTest(searcher, true); + runTest(searcher, false); + + searcher.close(); + r.close(); + directory.close(); + } + + private void runTest(IndexSearcher searcher, boolean reversed) throws Throwable { + + BooleanQuery newq = new BooleanQuery(false); + TermQuery query = new TermQuery(new Term("title", "ipod")); + + newq.add(query, BooleanClause.Occur.SHOULD); + newq.add(getElevatedQuery(new String[] {"id", "a", "id", "x"}), BooleanClause.Occur.SHOULD); + + Sort sort = new Sort( + new SortField("id", new ElevationComparatorSource(priority), false), + new SortField(null, SortField.SCORE, reversed) + ); + + TopDocsCollector topCollector = TopFieldCollector.create(sort, 50, false, true, true, true); + searcher.search(newq, null, topCollector); + + TopDocs topDocs = topCollector.topDocs(0, 10); + int nDocsReturned = topDocs.scoreDocs.length; + + assertEquals(4, nDocsReturned); + + // 0 & 3 were elevated + assertEquals(0, topDocs.scoreDocs[0].doc); + assertEquals(3, topDocs.scoreDocs[1].doc); + + if (reversed) { + assertEquals(2, topDocs.scoreDocs[2].doc); + assertEquals(1, topDocs.scoreDocs[3].doc); + } else { + assertEquals(1, topDocs.scoreDocs[2].doc); + assertEquals(2, topDocs.scoreDocs[3].doc); + } + + /* + for (int i = 0; i < nDocsReturned; i++) { + ScoreDoc scoreDoc = topDocs.scoreDocs[i]; + ids[i] = scoreDoc.doc; + scores[i] = scoreDoc.score; + documents[i] = searcher.doc(ids[i]); + System.out.println("ids[i] = " + ids[i]); + System.out.println("documents[i] = " + documents[i]); + System.out.println("scores[i] = " + scores[i]); + } + */ + } + + private Query getElevatedQuery(String[] vals) { + BooleanQuery q = new BooleanQuery(false); + q.setBoost(0); + int max = (vals.length / 2) + 5; + for (int i = 0; i < vals.length - 1; i += 2) { + q.add(new TermQuery(new Term(vals[i], vals[i + 1])), BooleanClause.Occur.SHOULD); + priority.put(vals[i + 1], Integer.valueOf(max--)); + // System.out.println(" pri doc=" + vals[i+1] + " pri=" + (1+max)); + } + return q; + } + + private Document adoc(String[] vals) { + Document doc = new Document(); + for (int i = 0; i < vals.length - 2; i += 2) { + doc.add(newField(vals[i], vals[i + 1], Field.Store.YES, Field.Index.ANALYZED)); + } + return doc; + } +} + +class ElevationComparatorSource extends FieldComparatorSource { + private final Map priority; + + public ElevationComparatorSource(final Map boosts) { + this.priority = boosts; + } + + @Override + public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { + return new FieldComparator() { + + FieldCache.StringIndex idIndex; + private final int[] values = new int[numHits]; + int bottomVal; + + @Override + public int compare(int slot1, int slot2) { + return values[slot2] - values[slot1]; // values will be small enough that there is no overflow concern + } + + @Override + public void setBottom(int slot) { + bottomVal = values[slot]; + } + + private int docVal(int doc) throws IOException { + String id = idIndex.lookup[idIndex.order[doc]]; + Integer prio = priority.get(id); + return prio == null ? 0 : prio.intValue(); + } + + @Override + public int compareBottom(int doc) throws IOException { + return docVal(doc) - bottomVal; + } + + @Override + public void copy(int slot, int doc) throws IOException { + values[slot] = docVal(doc); + } + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname); + } + + @Override + public Integer value(int slot) { + return Integer.valueOf(values[slot]); + } + }; + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestExplanations.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestExplanations.java new file mode 100644 index 0000000..94bbb69 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestExplanations.java @@ -0,0 +1,256 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.spans.SpanFirstQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanNotQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests primitive queries (ie: that rewrite to themselves) to + * insure they match the expected set of docs, and that the score of each + * match is equal to the value of the scores explanation. + * + *

+ * The assumption is that if all of the "primitive" queries work well, + * then anything that rewrites to a primitive will work well also. + *

+ * + * @see "Subclasses for actual tests" + */ +public class TestExplanations extends LuceneTestCase { + protected IndexSearcher searcher; + protected IndexReader reader; + protected Directory directory; + + public static final String KEY = "KEY"; + // boost on this field is the same as the iterator for the doc + public static final String FIELD = "field"; + // same contents, but no field boost + public static final String ALTFIELD = "alt"; + public static final QueryParser qp = + new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer(random)); + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newField(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); + Field f = newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED); + f.setBoost(i); + doc.add(f); + doc.add(newField(ALTFIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + } + + protected String[] docFields = { + "w1 w2 w3 w4 w5", + "w1 w3 w2 w3 zz", + "w1 xx w2 yy w3", + "w1 w3 xx w2 yy w3 zz" + }; + + public Query makeQuery(String queryText) throws ParseException { + return qp.parse(queryText); + } + + /** check the expDocNrs first, then check the query (and the explanations) */ + public void qtest(String queryText, int[] expDocNrs) throws Exception { + qtest(makeQuery(queryText), expDocNrs); + } + + /** check the expDocNrs first, then check the query (and the explanations) */ + public void qtest(Query q, int[] expDocNrs) throws Exception { + CheckHits.checkHitCollector(random, q, FIELD, searcher, expDocNrs); + } + + /** + * Tests a query using qtest after wrapping it with both optB and reqB + * @see #qtest + * @see #reqB + * @see #optB + */ + public void bqtest(Query q, int[] expDocNrs) throws Exception { + qtest(reqB(q), expDocNrs); + qtest(optB(q), expDocNrs); + } + /** + * Tests a query using qtest after wrapping it with both optB and reqB + * @see #qtest + * @see #reqB + * @see #optB + */ + public void bqtest(String queryText, int[] expDocNrs) throws Exception { + bqtest(makeQuery(queryText), expDocNrs); + } + + /** + * Convenience subclass of FieldCacheTermsFilter + */ + public static class ItemizedFilter extends FieldCacheTermsFilter { + private static String[] int2str(int [] terms) { + String [] out = new String[terms.length]; + for (int i = 0; i < terms.length; i++) { + out[i] = ""+terms[i]; + } + return out; + } + public ItemizedFilter(String keyField, int [] keys) { + super(keyField, int2str(keys)); + } + public ItemizedFilter(int [] keys) { + super(KEY, int2str(keys)); + } + } + + /** helper for generating MultiPhraseQueries */ + public static Term[] ta(String[] s) { + Term[] t = new Term[s.length]; + for (int i = 0; i < s.length; i++) { + t[i] = new Term(FIELD, s[i]); + } + return t; + } + + /** MACRO for SpanTermQuery */ + public SpanTermQuery st(String s) { + return new SpanTermQuery(new Term(FIELD,s)); + } + + /** MACRO for SpanNotQuery */ + public SpanNotQuery snot(SpanQuery i, SpanQuery e) { + return new SpanNotQuery(i,e); + } + + /** MACRO for SpanOrQuery containing two SpanTerm queries */ + public SpanOrQuery sor(String s, String e) { + return sor(st(s), st(e)); + } + /** MACRO for SpanOrQuery containing two SpanQueries */ + public SpanOrQuery sor(SpanQuery s, SpanQuery e) { + return new SpanOrQuery(new SpanQuery[] { s, e }); + } + + /** MACRO for SpanOrQuery containing three SpanTerm queries */ + public SpanOrQuery sor(String s, String m, String e) { + return sor(st(s), st(m), st(e)); + } + /** MACRO for SpanOrQuery containing two SpanQueries */ + public SpanOrQuery sor(SpanQuery s, SpanQuery m, SpanQuery e) { + return new SpanOrQuery(new SpanQuery[] { s, m, e }); + } + + /** MACRO for SpanNearQuery containing two SpanTerm queries */ + public SpanNearQuery snear(String s, String e, int slop, boolean inOrder) { + return snear(st(s), st(e), slop, inOrder); + } + /** MACRO for SpanNearQuery containing two SpanQueries */ + public SpanNearQuery snear(SpanQuery s, SpanQuery e, + int slop, boolean inOrder) { + return new SpanNearQuery(new SpanQuery[] { s, e }, slop, inOrder); + } + + + /** MACRO for SpanNearQuery containing three SpanTerm queries */ + public SpanNearQuery snear(String s, String m, String e, + int slop, boolean inOrder) { + return snear(st(s), st(m), st(e), slop, inOrder); + } + /** MACRO for SpanNearQuery containing three SpanQueries */ + public SpanNearQuery snear(SpanQuery s, SpanQuery m, SpanQuery e, + int slop, boolean inOrder) { + return new SpanNearQuery(new SpanQuery[] { s, m, e }, slop, inOrder); + } + + /** MACRO for SpanFirst(SpanTermQuery) */ + public SpanFirstQuery sf(String s, int b) { + return new SpanFirstQuery(st(s), b); + } + + /** + * MACRO: Wraps a Query in a BooleanQuery so that it is optional, along + * with a second prohibited clause which will never match anything + */ + public Query optB(String q) throws Exception { + return optB(makeQuery(q)); + } + /** + * MACRO: Wraps a Query in a BooleanQuery so that it is optional, along + * with a second prohibited clause which will never match anything + */ + public Query optB(Query q) throws Exception { + BooleanQuery bq = new BooleanQuery(true); + bq.add(q, BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("NEVER","MATCH")), BooleanClause.Occur.MUST_NOT); + return bq; + } + + /** + * MACRO: Wraps a Query in a BooleanQuery so that it is required, along + * with a second optional clause which will match everything + */ + public Query reqB(String q) throws Exception { + return reqB(makeQuery(q)); + } + /** + * MACRO: Wraps a Query in a BooleanQuery so that it is required, along + * with a second optional clause which will match everything + */ + public Query reqB(Query q) throws Exception { + BooleanQuery bq = new BooleanQuery(true); + bq.add(q, BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term(FIELD,"w1")), BooleanClause.Occur.SHOULD); + return bq; + } + + /** + * Placeholder: JUnit freaks if you don't have one test ... making + * class abstract doesn't help + */ + public void testNoop() { + /* NOOP */ + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCache.java new file mode 100644 index 0000000..beac90f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCache.java @@ -0,0 +1,136 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +public class TestFieldCache extends LuceneTestCase { + protected IndexReader reader; + private static final int NUM_DOCS = atLeast(1000); + private Directory directory; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + long theLong = Long.MAX_VALUE; + double theDouble = Double.MAX_VALUE; + byte theByte = Byte.MAX_VALUE; + short theShort = Short.MAX_VALUE; + int theInt = Integer.MAX_VALUE; + float theFloat = Float.MAX_VALUE; + for (int i = 0; i < NUM_DOCS; i++){ + Document doc = new Document(); + doc.add(newField("theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theDouble", String.valueOf(theDouble--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theByte", String.valueOf(theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + writer.close(); + reader = IndexReader.open(directory, true); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testInfoStream() throws Exception { + try { + FieldCache cache = FieldCache.DEFAULT; + ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + cache.setInfoStream(new PrintStream(bos)); + cache.getDoubles(reader, "theDouble"); + cache.getFloats(reader, "theDouble"); + assertTrue(bos.toString().indexOf("WARNING") != -1); + } finally { + FieldCache.DEFAULT.purgeAllCaches(); + } + } + + public void test() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + double [] doubles = cache.getDoubles(reader, "theDouble"); + assertSame("Second request to cache return same array", doubles, cache.getDoubles(reader, "theDouble")); + assertSame("Second request with explicit parser return same array", doubles, cache.getDoubles(reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER)); + assertTrue("doubles Size: " + doubles.length + " is not: " + NUM_DOCS, doubles.length == NUM_DOCS); + for (int i = 0; i < doubles.length; i++) { + assertTrue(doubles[i] + " does not equal: " + (Double.MAX_VALUE - i), doubles[i] == (Double.MAX_VALUE - i)); + + } + + long [] longs = cache.getLongs(reader, "theLong"); + assertSame("Second request to cache return same array", longs, cache.getLongs(reader, "theLong")); + assertSame("Second request with explicit parser return same array", longs, cache.getLongs(reader, "theLong", FieldCache.DEFAULT_LONG_PARSER)); + assertTrue("longs Size: " + longs.length + " is not: " + NUM_DOCS, longs.length == NUM_DOCS); + for (int i = 0; i < longs.length; i++) { + assertTrue(longs[i] + " does not equal: " + (Long.MAX_VALUE - i), longs[i] == (Long.MAX_VALUE - i)); + + } + + byte [] bytes = cache.getBytes(reader, "theByte"); + assertSame("Second request to cache return same array", bytes, cache.getBytes(reader, "theByte")); + assertSame("Second request with explicit parser return same array", bytes, cache.getBytes(reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER)); + assertTrue("bytes Size: " + bytes.length + " is not: " + NUM_DOCS, bytes.length == NUM_DOCS); + for (int i = 0; i < bytes.length; i++) { + assertTrue(bytes[i] + " does not equal: " + (Byte.MAX_VALUE - i), bytes[i] == (byte) (Byte.MAX_VALUE - i)); + + } + + short [] shorts = cache.getShorts(reader, "theShort"); + assertSame("Second request to cache return same array", shorts, cache.getShorts(reader, "theShort")); + assertSame("Second request with explicit parser return same array", shorts, cache.getShorts(reader, "theShort", FieldCache.DEFAULT_SHORT_PARSER)); + assertTrue("shorts Size: " + shorts.length + " is not: " + NUM_DOCS, shorts.length == NUM_DOCS); + for (int i = 0; i < shorts.length; i++) { + assertTrue(shorts[i] + " does not equal: " + (Short.MAX_VALUE - i), shorts[i] == (short) (Short.MAX_VALUE - i)); + + } + + int [] ints = cache.getInts(reader, "theInt"); + assertSame("Second request to cache return same array", ints, cache.getInts(reader, "theInt")); + assertSame("Second request with explicit parser return same array", ints, cache.getInts(reader, "theInt", FieldCache.DEFAULT_INT_PARSER)); + assertTrue("ints Size: " + ints.length + " is not: " + NUM_DOCS, ints.length == NUM_DOCS); + for (int i = 0; i < ints.length; i++) { + assertTrue(ints[i] + " does not equal: " + (Integer.MAX_VALUE - i), ints[i] == (Integer.MAX_VALUE - i)); + + } + + float [] floats = cache.getFloats(reader, "theFloat"); + assertSame("Second request to cache return same array", floats, cache.getFloats(reader, "theFloat")); + assertSame("Second request with explicit parser return same array", floats, cache.getFloats(reader, "theFloat", FieldCache.DEFAULT_FLOAT_PARSER)); + assertTrue("floats Size: " + floats.length + " is not: " + NUM_DOCS, floats.length == NUM_DOCS); + for (int i = 0; i < floats.length; i++) { + assertTrue(floats[i] + " does not equal: " + (Float.MAX_VALUE - i), floats[i] == (Float.MAX_VALUE - i)); + + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java new file mode 100644 index 0000000..f192d83 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java @@ -0,0 +1,587 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.junit.Test; + +/** + * A basic 'positive' Unit test class for the FieldCacheRangeFilter class. + * + *

+ * NOTE: at the moment, this class only tests for 'positive' results, + * it does not verify the results to ensure there are no 'false positives', + * nor does it adequately test 'negative' results. It also does not test + * that garbage in results in an Exception. + */ +public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { + + @Test + public void testRangeFilterId() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + // test id, bounded on both ends + FieldCacheRangeFilter fcrf; + result = search.search(q,fcrf = FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs; + assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,F), numDocs).scoreDocs; + assertEquals("all but last", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,F,T), numDocs).scoreDocs; + assertEquals("all but first", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,F,F), numDocs).scoreDocs; + assertEquals("all but ends", numDocs-2, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,maxIP,T,T), numDocs).scoreDocs; + assertEquals("med and up", 1+ maxId-medId, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,medIP,T,T), numDocs).scoreDocs; + assertEquals("up to med", 1+ medId-minId, result.length); + + // unbounded id + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",null,null,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,null,T,F), numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",null,maxIP,F,T), numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,null,F,F), numDocs).scoreDocs; + assertEquals("not min, but up", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",null,maxIP,F,F), numDocs).scoreDocs; + assertEquals("not max, but down", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,maxIP,T,F), numDocs).scoreDocs; + assertEquals("med and up, not max", maxId-medId, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,medIP,F,T), numDocs).scoreDocs; + assertEquals("not min, up to med", medId-minId, result.length); + + // very small sets + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,minIP,F,F), numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,medIP,F,F), numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",maxIP,maxIP,F,F), numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,minIP,T,T), numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",null,minIP,F,T), numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",maxIP,maxIP,T,T), numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",maxIP,null,T,F), numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("id",medIP,medIP,T,T), numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + search.close(); + } + + @Test + public void testFieldCacheRangeFilterRand() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + String minRP = pad(signedIndexDir.minR); + String maxRP = pad(signedIndexDir.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + // test extremes, bounded on both ends + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,maxRP,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,maxRP,T,F), numDocs).scoreDocs; + assertEquals("all but biggest", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,maxRP,F,T), numDocs).scoreDocs; + assertEquals("all but smallest", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,maxRP,F,F), numDocs).scoreDocs; + assertEquals("all but extremes", numDocs-2, result.length); + + // unbounded + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,null,T,F), numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",null,maxRP,F,T), numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,null,F,F), numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",null,maxRP,F,F), numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs-1, result.length); + + // very small sets + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,minRP,F,F), numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",maxRP,maxRP,F,F), numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",minRP,minRP,T,T), numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",null,minRP,F,T), numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",maxRP,maxRP,T,T), numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newStringRange("rand",maxRP,null,T,F), numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + search.close(); + } + + // byte-ranges cannot be tested, because all ranges are too big for bytes, need an extra range for that + + @Test + public void testFieldCacheRangeFilterShorts() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int numDocs = reader.numDocs(); + int medId = ((maxId - minId) / 2); + Short minIdO = Short.valueOf((short) minId); + Short maxIdO = Short.valueOf((short) maxId); + Short medIdO = Short.valueOf((short) medId); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + // test id, bounded on both ends + FieldCacheRangeFilter fcrf; + result = search.search(q,fcrf=FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; + assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs; + assertEquals("all but last", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,F,T), numDocs).scoreDocs; + assertEquals("all but first", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("all but ends", numDocs-2, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",medIdO,maxIdO,T,T), numDocs).scoreDocs; + assertEquals("med and up", 1+ maxId-medId, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,medIdO,T,T), numDocs).scoreDocs; + assertEquals("up to med", 1+ medId-minId, result.length); + + // unbounded id + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",null,null,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,null,T,F), numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",null,maxIdO,F,T), numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,null,F,F), numDocs).scoreDocs; + assertEquals("not min, but up", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",null,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("not max, but down", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",medIdO,maxIdO,T,F), numDocs).scoreDocs; + assertEquals("med and up, not max", maxId-medId, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,medIdO,F,T), numDocs).scoreDocs; + assertEquals("not min, up to med", medId-minId, result.length); + + // very small sets + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,minIdO,F,F), numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",medIdO,medIdO,F,F), numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",maxIdO,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,minIdO,T,T), numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",null,minIdO,F,T), numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",maxIdO,maxIdO,T,T), numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",maxIdO,null,T,F), numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",medIdO,medIdO,T,T), numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + // special cases + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",Short.valueOf(Short.MAX_VALUE),null,F,F), numDocs).scoreDocs; + assertEquals("overflow special case", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",null,Short.valueOf(Short.MIN_VALUE),F,F), numDocs).scoreDocs; + assertEquals("overflow special case", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newShortRange("id",maxIdO,minIdO,T,T), numDocs).scoreDocs; + assertEquals("inverse range", 0, result.length); + search.close(); + } + + @Test + public void testFieldCacheRangeFilterInts() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int numDocs = reader.numDocs(); + int medId = ((maxId - minId) / 2); + Integer minIdO = Integer.valueOf(minId); + Integer maxIdO = Integer.valueOf(maxId); + Integer medIdO = Integer.valueOf(medId); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + // test id, bounded on both ends + + FieldCacheRangeFilter fcrf; + result = search.search(q,fcrf=FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; + assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs; + assertEquals("all but last", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,F,T), numDocs).scoreDocs; + assertEquals("all but first", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("all but ends", numDocs-2, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",medIdO,maxIdO,T,T), numDocs).scoreDocs; + assertEquals("med and up", 1+ maxId-medId, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,medIdO,T,T), numDocs).scoreDocs; + assertEquals("up to med", 1+ medId-minId, result.length); + + // unbounded id + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",null,null,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,null,T,F), numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",null,maxIdO,F,T), numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,null,F,F), numDocs).scoreDocs; + assertEquals("not min, but up", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",null,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("not max, but down", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",medIdO,maxIdO,T,F), numDocs).scoreDocs; + assertEquals("med and up, not max", maxId-medId, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,medIdO,F,T), numDocs).scoreDocs; + assertEquals("not min, up to med", medId-minId, result.length); + + // very small sets + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,minIdO,F,F), numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",medIdO,medIdO,F,F), numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",maxIdO,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,minIdO,T,T), numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",null,minIdO,F,T), numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",maxIdO,maxIdO,T,T), numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",maxIdO,null,T,F), numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",medIdO,medIdO,T,T), numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + // special cases + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",Integer.valueOf(Integer.MAX_VALUE),null,F,F), numDocs).scoreDocs; + assertEquals("overflow special case", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",null,Integer.valueOf(Integer.MIN_VALUE),F,F), numDocs).scoreDocs; + assertEquals("overflow special case", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newIntRange("id",maxIdO,minIdO,T,T), numDocs).scoreDocs; + assertEquals("inverse range", 0, result.length); + search.close(); + } + + @Test + public void testFieldCacheRangeFilterLongs() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int numDocs = reader.numDocs(); + int medId = ((maxId - minId) / 2); + Long minIdO = Long.valueOf(minId); + Long maxIdO = Long.valueOf(maxId); + Long medIdO = Long.valueOf(medId); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + // test id, bounded on both ends + + FieldCacheRangeFilter fcrf; + result = search.search(q,fcrf=FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; + assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs; + assertEquals("all but last", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,F,T), numDocs).scoreDocs; + assertEquals("all but first", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("all but ends", numDocs-2, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",medIdO,maxIdO,T,T), numDocs).scoreDocs; + assertEquals("med and up", 1+ maxId-medId, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,medIdO,T,T), numDocs).scoreDocs; + assertEquals("up to med", 1+ medId-minId, result.length); + + // unbounded id + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",null,null,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,null,T,F), numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",null,maxIdO,F,T), numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,null,F,F), numDocs).scoreDocs; + assertEquals("not min, but up", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",null,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("not max, but down", numDocs-1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",medIdO,maxIdO,T,F), numDocs).scoreDocs; + assertEquals("med and up, not max", maxId-medId, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,medIdO,F,T), numDocs).scoreDocs; + assertEquals("not min, up to med", medId-minId, result.length); + + // very small sets + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,minIdO,F,F), numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",medIdO,medIdO,F,F), numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",maxIdO,maxIdO,F,F), numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,minIdO,T,T), numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",null,minIdO,F,T), numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",maxIdO,maxIdO,T,T), numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",maxIdO,null,T,F), numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",medIdO,medIdO,T,T), numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + // special cases + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",Long.valueOf(Long.MAX_VALUE),null,F,F), numDocs).scoreDocs; + assertEquals("overflow special case", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",null,Long.valueOf(Long.MIN_VALUE),F,F), numDocs).scoreDocs; + assertEquals("overflow special case", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newLongRange("id",maxIdO,minIdO,T,T), numDocs).scoreDocs; + assertEquals("inverse range", 0, result.length); + search.close(); + } + + // float and double tests are a bit minimalistic, but its complicated, because missing precision + + @Test + public void testFieldCacheRangeFilterFloats() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int numDocs = reader.numDocs(); + Float minIdO = Float.valueOf(minId + .5f); + Float medIdO = Float.valueOf(minIdO.floatValue() + ((maxId-minId))/2.0f); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + result = search.search(q,FieldCacheRangeFilter.newFloatRange("id",minIdO,medIdO,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs/2, result.length); + int count = 0; + result = search.search(q,FieldCacheRangeFilter.newFloatRange("id",null,medIdO,F,T), numDocs).scoreDocs; + count += result.length; + result = search.search(q,FieldCacheRangeFilter.newFloatRange("id",medIdO,null,F,F), numDocs).scoreDocs; + count += result.length; + assertEquals("sum of two concenatted ranges", numDocs, count); + result = search.search(q,FieldCacheRangeFilter.newFloatRange("id",null,null,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + result = search.search(q,FieldCacheRangeFilter.newFloatRange("id",Float.valueOf(Float.POSITIVE_INFINITY),null,F,F), numDocs).scoreDocs; + assertEquals("infinity special case", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newFloatRange("id",null,Float.valueOf(Float.NEGATIVE_INFINITY),F,F), numDocs).scoreDocs; + assertEquals("infinity special case", 0, result.length); + search.close(); + } + + @Test + public void testFieldCacheRangeFilterDoubles() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int numDocs = reader.numDocs(); + Double minIdO = Double.valueOf(minId + .5); + Double medIdO = Double.valueOf(minIdO.floatValue() + ((maxId-minId))/2.0); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body","body")); + + result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id",minIdO,medIdO,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs/2, result.length); + int count = 0; + result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id",null,medIdO,F,T), numDocs).scoreDocs; + count += result.length; + result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id",medIdO,null,F,F), numDocs).scoreDocs; + count += result.length; + assertEquals("sum of two concenatted ranges", numDocs, count); + result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id",null,null,T,T), numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id",Double.valueOf(Double.POSITIVE_INFINITY),null,F,F), numDocs).scoreDocs; + assertEquals("infinity special case", 0, result.length); + result = search.search(q,FieldCacheRangeFilter.newDoubleRange("id",null, Double.valueOf(Double.NEGATIVE_INFINITY),F,F), numDocs).scoreDocs; + assertEquals("infinity special case", 0, result.length); + search.close(); + } + + // test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 + @Test + public void testSparseIndex() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + for (int d = -20; d <= 20; d++) { + Document doc = new Document(); + doc.add(newField("id",Integer.toString(d), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("body","body", Field.Store.NO, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + + writer.optimize(); + writer.deleteDocuments(new Term("id","0")); + writer.close(); + + IndexReader reader = IndexReader.open(dir, true); + IndexSearcher search = newSearcher(reader); + assertTrue(reader.hasDeletions()); + + ScoreDoc[] result; + FieldCacheRangeFilter fcrf; + Query q = new TermQuery(new Term("body","body")); + + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) -20),Byte.valueOf((byte) 20),T,T), 100).scoreDocs; + assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", 40, result.length); + + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) 0),Byte.valueOf((byte) 20),T,T), 100).scoreDocs; + assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", 20, result.length); + + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) -20),Byte.valueOf((byte) 0),T,T), 100).scoreDocs; + assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", 20, result.length); + + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) 10),Byte.valueOf((byte) 20),T,T), 100).scoreDocs; + assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", 11, result.length); + + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) -20),Byte.valueOf((byte) -10),T,T), 100).scoreDocs; + assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertEquals("find all", 11, result.length); + search.close(); + reader.close(); + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java new file mode 100644 index 0000000..f526f3e --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java @@ -0,0 +1,75 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; + +import java.util.ArrayList; +import java.util.List; + +/** + * A basic unit test for FieldCacheTermsFilter + * + * @see org.apache.lucene.search.FieldCacheTermsFilter + */ +public class TestFieldCacheTermsFilter extends LuceneTestCase { + public void testMissingTerms() throws Exception { + String fieldName = "field1"; + Directory rd = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, rd); + for (int i = 0; i < 100; i++) { + Document doc = new Document(); + int term = i * 10; //terms are units of 10; + doc.add(newField(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED)); + w.addDocument(doc); + } + IndexReader reader = w.getReader(); + w.close(); + + IndexSearcher searcher = newSearcher(reader); + int numDocs = reader.numDocs(); + ScoreDoc[] results; + MatchAllDocsQuery q = new MatchAllDocsQuery(); + + List terms = new ArrayList(); + terms.add("5"); + results = searcher.search(q, new FieldCacheTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs; + assertEquals("Must match nothing", 0, results.length); + + terms = new ArrayList(); + terms.add("10"); + results = searcher.search(q, new FieldCacheTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs; + assertEquals("Must match 1", 1, results.length); + + terms = new ArrayList(); + terms.add("10"); + terms.add("20"); + results = searcher.search(q, new FieldCacheTermsFilter(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs; + assertEquals("Must match 2", 2, results.length); + + searcher.close(); + reader.close(); + rd.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFilteredQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFilteredQuery.java new file mode 100644 index 0000000..cd6e210 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFilteredQuery.java @@ -0,0 +1,227 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.DocIdBitSet; +import org.apache.lucene.util.LuceneTestCase; + +import java.util.BitSet; + +/** + * FilteredQuery JUnit tests. + * + *

Created: Apr 21, 2004 1:21:46 PM + * + * + * @since 1.4 + */ +public class TestFilteredQuery extends LuceneTestCase { + + private IndexSearcher searcher; + private IndexReader reader; + private Directory directory; + private Query query; + private Filter filter; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter (random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + + Document doc = new Document(); + doc.add (newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); + doc.add (newField("sorter", "b", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument (doc); + + doc = new Document(); + doc.add (newField("field", "one two three four", Field.Store.YES, Field.Index.ANALYZED)); + doc.add (newField("sorter", "d", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument (doc); + + doc = new Document(); + doc.add (newField("field", "one two three y", Field.Store.YES, Field.Index.ANALYZED)); + doc.add (newField("sorter", "a", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument (doc); + + doc = new Document(); + doc.add (newField("field", "one two x", Field.Store.YES, Field.Index.ANALYZED)); + doc.add (newField("sorter", "c", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument (doc); + + // tests here require single segment (eg try seed + // 8239472272678419952L), because SingleDocTestFilter(x) + // blindly accepts that docID in any sub-segment + writer.optimize(); + + reader = writer.getReader(); + writer.close (); + + searcher = newSearcher(reader); + query = new TermQuery (new Term ("field", "three")); + filter = newStaticFilterB(); + } + + // must be static for serialization tests + private static Filter newStaticFilterB() { + return new Filter() { + @Override + public DocIdSet getDocIdSet (IndexReader reader) { + BitSet bitset = new BitSet(5); + bitset.set (1); + bitset.set (3); + return new DocIdBitSet(bitset); + } + }; + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testFilteredQuery() + throws Exception { + Query filteredquery = new FilteredQuery (query, filter); + ScoreDoc[] hits = searcher.search (filteredquery, null, 1000).scoreDocs; + assertEquals (1, hits.length); + assertEquals (1, hits[0].doc); + QueryUtils.check(random, filteredquery,searcher); + + hits = searcher.search (filteredquery, null, 1000, new Sort(new SortField("sorter", SortField.STRING))).scoreDocs; + assertEquals (1, hits.length); + assertEquals (1, hits[0].doc); + + filteredquery = new FilteredQuery (new TermQuery (new Term ("field", "one")), filter); + hits = searcher.search (filteredquery, null, 1000).scoreDocs; + assertEquals (2, hits.length); + QueryUtils.check(random, filteredquery,searcher); + + filteredquery = new FilteredQuery (new TermQuery (new Term ("field", "x")), filter); + hits = searcher.search (filteredquery, null, 1000).scoreDocs; + assertEquals (1, hits.length); + assertEquals (3, hits[0].doc); + QueryUtils.check(random, filteredquery,searcher); + + filteredquery = new FilteredQuery (new TermQuery (new Term ("field", "y")), filter); + hits = searcher.search (filteredquery, null, 1000).scoreDocs; + assertEquals (0, hits.length); + QueryUtils.check(random, filteredquery,searcher); + + // test boost + Filter f = newStaticFilterA(); + + float boost = 2.5f; + BooleanQuery bq1 = new BooleanQuery(); + TermQuery tq = new TermQuery (new Term ("field", "one")); + tq.setBoost(boost); + bq1.add(tq, Occur.MUST); + bq1.add(new TermQuery (new Term ("field", "five")), Occur.MUST); + + BooleanQuery bq2 = new BooleanQuery(); + tq = new TermQuery (new Term ("field", "one")); + filteredquery = new FilteredQuery(tq, f); + filteredquery.setBoost(boost); + bq2.add(filteredquery, Occur.MUST); + bq2.add(new TermQuery (new Term ("field", "five")), Occur.MUST); + assertScoreEquals(bq1, bq2); + + assertEquals(boost, filteredquery.getBoost(), 0); + assertEquals(1.0f, tq.getBoost(), 0); // the boost value of the underlying query shouldn't have changed + } + + // must be static for serialization tests + private static Filter newStaticFilterA() { + return new Filter() { + @Override + public DocIdSet getDocIdSet (IndexReader reader) { + BitSet bitset = new BitSet(5); + bitset.set(0, 5); + return new DocIdBitSet(bitset); + } + }; + } + + /** + * Tests whether the scores of the two queries are the same. + */ + public void assertScoreEquals(Query q1, Query q2) throws Exception { + ScoreDoc[] hits1 = searcher.search (q1, null, 1000).scoreDocs; + ScoreDoc[] hits2 = searcher.search (q2, null, 1000).scoreDocs; + + assertEquals(hits1.length, hits2.length); + + for (int i = 0; i < hits1.length; i++) { + assertEquals(hits1[i].score, hits2[i].score, 0.0000001f); + } + } + + /** + * This tests FilteredQuery's rewrite correctness + */ + public void testRangeQuery() throws Exception { + TermRangeQuery rq = new TermRangeQuery( + "sorter", "b", "d", true, true); + + Query filteredquery = new FilteredQuery(rq, filter); + ScoreDoc[] hits = searcher.search(filteredquery, null, 1000).scoreDocs; + assertEquals(2, hits.length); + QueryUtils.check(random, filteredquery,searcher); + } + + public void testBoolean() throws Exception { + BooleanQuery bq = new BooleanQuery(); + Query query = new FilteredQuery(new MatchAllDocsQuery(), + new SingleDocTestFilter(0)); + bq.add(query, BooleanClause.Occur.MUST); + query = new FilteredQuery(new MatchAllDocsQuery(), + new SingleDocTestFilter(1)); + bq.add(query, BooleanClause.Occur.MUST); + ScoreDoc[] hits = searcher.search(bq, null, 1000).scoreDocs; + assertEquals(0, hits.length); + QueryUtils.check(random, query,searcher); + } + + // Make sure BooleanQuery, which does out-of-order + // scoring, inside FilteredQuery, works + public void testBoolean2() throws Exception { + BooleanQuery bq = new BooleanQuery(); + Query query = new FilteredQuery(bq, + new SingleDocTestFilter(0)); + bq.add(new TermQuery(new Term("field", "one")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.SHOULD); + ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; + assertEquals(1, hits.length); + QueryUtils.check(random, query,searcher); + } +} + + + + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFilteredSearch.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFilteredSearch.java new file mode 100644 index 0000000..20849f0 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFilteredSearch.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.OpenBitSet; + + +/** + * + */ +public class TestFilteredSearch extends LuceneTestCase { + + private static final String FIELD = "category"; + + public void testFilteredSearch() throws CorruptIndexException, LockObtainFailedException, IOException { + boolean enforceSingleSegment = true; + Directory directory = newDirectory(); + int[] filterBits = {1, 36}; + SimpleDocIdSetFilter filter = new SimpleDocIdSetFilter(filterBits); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + searchFiltered(writer, directory, filter, enforceSingleSegment); + // run the test on more than one segment + enforceSingleSegment = false; + // reset - it is stateful + filter.reset(); + writer.close(); + writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10).setMergePolicy(newLogMergePolicy())); + // we index 60 docs - this will create 6 segments + searchFiltered(writer, directory, filter, enforceSingleSegment); + writer.close(); + directory.close(); + } + + public void searchFiltered(IndexWriter writer, Directory directory, SimpleDocIdSetFilter filter, boolean optimize) { + try { + for (int i = 0; i < 60; i++) {//Simple docs + Document doc = new Document(); + doc.add(newField(FIELD, Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + if(optimize) + writer.optimize(); + writer.close(); + + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(new TermQuery(new Term(FIELD, "36")), BooleanClause.Occur.SHOULD); + + + IndexSearcher indexSearcher = new IndexSearcher(directory, true); + filter.setDocBases(indexSearcher.getIndexReader()); + ScoreDoc[] hits = indexSearcher.search(booleanQuery, filter, 1000).scoreDocs; + assertEquals("Number of matched documents", 1, hits.length); + indexSearcher.close(); + } + catch (IOException e) { + fail(e.getMessage()); + } + + } + + public static final class SimpleDocIdSetFilter extends Filter { + private final int[] docs; + private int index; + private Map docBasePerSub; + + public SimpleDocIdSetFilter(int[] docs) { + this.docs = docs; + } + + public void setDocBases(IndexReader r) { + int maxDoc = 0; + docBasePerSub = new HashMap(); + for(IndexReader sub : r.getSequentialSubReaders()) { + docBasePerSub.put(sub, maxDoc); + maxDoc += sub.maxDoc(); + } + } + + @Override + public DocIdSet getDocIdSet(IndexReader reader) { + final OpenBitSet set = new OpenBitSet(reader.maxDoc()); + final int docBase = docBasePerSub.get(reader); + final int limit = docBase+reader.maxDoc(); + for (;index < docs.length; index++) { + final int docId = docs[index]; + if (docId > limit) + break; + if (docId >= docBase) { + set.set(docId-docBase); + } + } + return set.isEmpty()?null:set; + } + + public void reset(){ + index = 0; + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFuzzyQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFuzzyQuery.java new file mode 100644 index 0000000..f43b9ed --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestFuzzyQuery.java @@ -0,0 +1,390 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.List; +import java.util.Arrays; +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.queryParser.QueryParser; + +/** + * Tests {@link FuzzyQuery}. + * + */ +public class TestFuzzyQuery extends LuceneTestCase { + + public void testFuzziness() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + addDoc("aaaaa", writer); + addDoc("aaaab", writer); + addDoc("aaabb", writer); + addDoc("aabbb", writer); + addDoc("abbbb", writer); + addDoc("bbbbb", writer); + addDoc("ddddd", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + writer.close(); + + FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + + // same with prefix + query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(2, hits.length); + query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + // test scoring + query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("3 documents should match", 3, hits.length); + List order = Arrays.asList("bbbbb","abbbb","aabbb"); + for (int i = 0; i < hits.length; i++) { + final String term = searcher.doc(hits[i].doc).get("field"); + //System.out.println(hits[i].score); + assertEquals(order.get(i), term); + } + + // test pq size by supplying maxExpansions=2 + // This query would normally return 3 documents, because 3 terms match (see above): + query = new FuzzyQuery(new Term("field", "bbbbb"), FuzzyQuery.defaultMinSimilarity, 0, 2); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("only 2 documents should match", 2, hits.length); + order = Arrays.asList("bbbbb","abbbb"); + for (int i = 0; i < hits.length; i++) { + final String term = searcher.doc(hits[i].doc).get("field"); + //System.out.println(hits[i].score); + assertEquals(order.get(i), term); + } + + // not similar enough: + query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3 + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // query identical to a word in the index: + query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa")); + // default allows for up to two edits: + assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab")); + assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb")); + + // query similar to a word in the index: + query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa")); + assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab")); + assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb")); + + // now with prefix + query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa")); + assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab")); + assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb")); + query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa")); + assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab")); + assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb")); + query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa")); + assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab")); + assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb")); + query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(2, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa")); + assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab")); + query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + + query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd")); + + // now with prefix + query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd")); + query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd")); + query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd")); + query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd")); + query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + + // different field = no match: + query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + searcher.close(); + reader.close(); + directory.close(); + } + + public void testFuzzinessLong() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + addDoc("aaaaaaa", writer); + addDoc("segment", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + writer.close(); + + FuzzyQuery query; + // not similar enough: + query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + // edit distance to "aaaaaaa" = 3, this matches because the string is longer than + // in testDefaultFuzziness so a bigger difference is allowed: + query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa")); + + // now with prefix + query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa")); + query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa")); + query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // no match, more than half of the characters is wrong: + query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // now with prefix + query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // "student" and "stellent" are indeed similar to "segment" by default: + query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + // now with prefix + query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // "student" doesn't match anymore thanks to increased minimum similarity: + query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + try { + query = new FuzzyQuery(new Term("field", "student"), 1.1f); + fail("Expected IllegalArgumentException"); + } catch (IllegalArgumentException e) { + // expecting exception + } + try { + query = new FuzzyQuery(new Term("field", "student"), -0.1f); + fail("Expected IllegalArgumentException"); + } catch (IllegalArgumentException e) { + // expecting exception + } + + searcher.close(); + reader.close(); + directory.close(); + } + + public void testTokenLengthOpt() throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + addDoc("12345678911", writer); + addDoc("segment", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + writer.close(); + + Query query; + // term not over 10 chars, so optimization shortcuts + query = new FuzzyQuery(new Term("field", "1234569"), 0.9f); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // 10 chars, so no optimization + query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // over 10 chars, so no optimization + query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + // over 10 chars, no match + query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + searcher.close(); + reader.close(); + directory.close(); + } + + /** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */ + public void testBoostOnlyRewrite() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + addDoc("Lucene", writer); + addDoc("Lucene", writer); + addDoc("Lucenne", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + writer.close(); + + FuzzyQuery query = new FuzzyQuery(new Term("field", "Lucene")); + query.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50)); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + // normally, 'Lucenne' would be the first result as IDF will skew the score. + assertEquals("Lucene", reader.document(hits[0].doc).get("field")); + assertEquals("Lucene", reader.document(hits[1].doc).get("field")); + assertEquals("Lucenne", reader.document(hits[2].doc).get("field")); + searcher.close(); + reader.close(); + directory.close(); + } + + public void testGiga() throws Exception { + + MockAnalyzer analyzer = new MockAnalyzer(random); + Directory index = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, index); + + addDoc("Lucene in Action", w); + addDoc("Lucene for Dummies", w); + + //addDoc("Giga", w); + addDoc("Giga byte", w); + + addDoc("ManagingGigabytesManagingGigabyte", w); + addDoc("ManagingGigabytesManagingGigabytes", w); + + addDoc("The Art of Computer Science", w); + addDoc("J. K. Rowling", w); + addDoc("JK Rowling", w); + addDoc("Joanne K Roling", w); + addDoc("Bruce Willis", w); + addDoc("Willis bruce", w); + addDoc("Brute willis", w); + addDoc("B. willis", w); + IndexReader r = w.getReader(); + w.close(); + + Query q = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer).parse( "giga~0.9" ); + + // 3. search + IndexSearcher searcher = newSearcher(r); + ScoreDoc[] hits = searcher.search(q, 10).scoreDocs; + assertEquals(1, hits.length); + assertEquals("Giga byte", searcher.doc(hits[0].doc).get("field")); + searcher.close(); + r.close(); + index.close(); + } + + private void addDoc(String text, RandomIndexWriter writer) throws IOException { + Document doc = new Document(); + doc.add(newField("field", text, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestLocaleMethods.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestLocaleMethods.java new file mode 100644 index 0000000..591ee13 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestLocaleMethods.java @@ -0,0 +1,144 @@ +package org.apache.lucene.search; + +import java.io.IOException; +import java.text.Collator; +import java.util.Locale; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests Locale-based sort and range search + */ +public class TestLocaleMethods extends LuceneTestCase { + private static Locale locale; + private static Collator collator; + private static IndexSearcher searcher; + private static IndexReader reader; + private static Directory dir; + private static int numDocs; + + @BeforeClass + public static void beforeClass() throws Exception { + locale = LuceneTestCase.randomLocale(random); + collator = Collator.getInstance(locale); + numDocs = 1000 * RANDOM_MULTIPLIER; + dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, dir); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + String value = _TestUtil.randomUnicodeString(random); + Field field = newField("field", value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(field); + iw.addDocument(doc); + } + reader = iw.getReader(); + iw.close(); + + searcher = newSearcher(reader); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher.close(); + reader.close(); + dir.close(); + locale = null; + collator = null; + searcher = null; + reader = null; + dir = null; + } + + public void testSort() throws Exception { + SortField sf = new SortField("field", locale); + TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), null, numDocs, new Sort(sf)); + String prev = ""; + for (ScoreDoc doc : docs.scoreDocs) { + String value = reader.document(doc.doc).get("field"); + assertTrue(collator.compare(value, prev) >= 0); + prev = value; + } + } + + public void testSort2() throws Exception { + SortField sf = new SortField("field", new FieldComparatorSource() { + @Override + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { + return new FieldComparator.StringComparatorLocale(numHits, fieldname, locale); + } + }); + TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), null, numDocs, new Sort(sf)); + String prev = ""; + for (ScoreDoc doc : docs.scoreDocs) { + String value = reader.document(doc.doc).get("field"); + assertTrue(collator.compare(value, prev) >= 0); + prev = value; + } + } + + private void doTestRanges(String startPoint, String endPoint, Query query) throws Exception { + // positive test + TopDocs docs = searcher.search(query, numDocs); + for (ScoreDoc doc : docs.scoreDocs) { + String value = reader.document(doc.doc).get("field"); + assertTrue(collator.compare(value, startPoint) >= 0); + assertTrue(collator.compare(value, endPoint) <= 0); + } + + // negative test + BooleanQuery bq = new BooleanQuery(); + bq.add(new MatchAllDocsQuery(), Occur.SHOULD); + bq.add(query, Occur.MUST_NOT); + docs = searcher.search(bq, numDocs); + for (ScoreDoc doc : docs.scoreDocs) { + String value = reader.document(doc.doc).get("field"); + assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint) > 0); + } + } + + public void testRangeQuery() throws Exception { + int numQueries = 100*RANDOM_MULTIPLIER; + for (int i = 0; i < numQueries; i++) { + String startPoint = _TestUtil.randomUnicodeString(random); + String endPoint = _TestUtil.randomUnicodeString(random); + Query query = new TermRangeQuery("field", startPoint, endPoint, true, true, collator); + doTestRanges(startPoint, endPoint, query); + } + } + + public void testRangeFilter() throws Exception { + int numQueries = 100*RANDOM_MULTIPLIER; + for (int i = 0; i < numQueries; i++) { + String startPoint = _TestUtil.randomUnicodeString(random); + String endPoint = _TestUtil.randomUnicodeString(random); + Query query = new ConstantScoreQuery(new TermRangeFilter("field", startPoint, endPoint, true, true, collator)); + doTestRanges(startPoint, endPoint, query); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java new file mode 100644 index 0000000..f938bb0 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.store.Directory; + +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests MatchAllDocsQuery. + * + */ +public class TestMatchAllDocsQuery extends LuceneTestCase { + private Analyzer analyzer = new MockAnalyzer(random); + + public void testQuery() throws Exception { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); + addDoc("one", iw, 1f); + addDoc("two", iw, 20f); + addDoc("three four", iw, 300f); + iw.close(); + + IndexReader ir = IndexReader.open(dir, false); + IndexSearcher is = newSearcher(ir); + ScoreDoc[] hits; + + // assert with norms scoring turned off + + hits = is.search(new MatchAllDocsQuery(), null, 1000).scoreDocs; + assertEquals(3, hits.length); + assertEquals("one", is.doc(hits[0].doc).get("key")); + assertEquals("two", is.doc(hits[1].doc).get("key")); + assertEquals("three four", is.doc(hits[2].doc).get("key")); + + // assert with norms scoring turned on + + MatchAllDocsQuery normsQuery = new MatchAllDocsQuery("key"); + hits = is.search(normsQuery, null, 1000).scoreDocs; + assertEquals(3, hits.length); + + assertEquals("three four", is.doc(hits[0].doc).get("key")); + assertEquals("two", is.doc(hits[1].doc).get("key")); + assertEquals("one", is.doc(hits[2].doc).get("key")); + + // change norm & retest + is.getIndexReader().setNorm(0, "key", is.getSimilarity().encodeNormValue(400f)); + normsQuery = new MatchAllDocsQuery("key"); + hits = is.search(normsQuery, null, 1000).scoreDocs; + assertEquals(3, hits.length); + + assertEquals("one", is.doc(hits[0].doc).get("key")); + assertEquals("three four", is.doc(hits[1].doc).get("key")); + assertEquals("two", is.doc(hits[2].doc).get("key")); + + // some artificial queries to trigger the use of skipTo(): + + BooleanQuery bq = new BooleanQuery(); + bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + hits = is.search(bq, null, 1000).scoreDocs; + assertEquals(3, hits.length); + + bq = new BooleanQuery(); + bq.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term("key", "three")), BooleanClause.Occur.MUST); + hits = is.search(bq, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + // delete a document: + is.getIndexReader().deleteDocument(0); + hits = is.search(new MatchAllDocsQuery(), null, 1000).scoreDocs; + assertEquals(2, hits.length); + + // test parsable toString() + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "key", analyzer); + hits = is.search(qp.parse(new MatchAllDocsQuery().toString()), null, 1000).scoreDocs; + assertEquals(2, hits.length); + + // test parsable toString() with non default boost + Query maq = new MatchAllDocsQuery(); + maq.setBoost(2.3f); + Query pq = qp.parse(maq.toString()); + hits = is.search(pq, null, 1000).scoreDocs; + assertEquals(2, hits.length); + + is.close(); + ir.close(); + dir.close(); + } + + public void testEquals() { + Query q1 = new MatchAllDocsQuery(); + Query q2 = new MatchAllDocsQuery(); + assertTrue(q1.equals(q2)); + q1.setBoost(1.5f); + assertFalse(q1.equals(q2)); + } + + private void addDoc(String text, IndexWriter iw, float boost) throws IOException { + Document doc = new Document(); + Field f = newField("key", text, Field.Store.YES, Field.Index.ANALYZED); + f.setBoost(boost); + doc.add(f); + iw.addDocument(doc); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java new file mode 100644 index 0000000..d14af0d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java @@ -0,0 +1,581 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.Explanation.IDFExplanation; +import org.apache.lucene.store.Directory; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +import java.io.IOException; +import java.util.Collection; +import java.util.LinkedList; +import java.io.Reader; + +/** + * This class tests the MultiPhraseQuery class. + * + * + */ +public class TestMultiPhraseQuery extends LuceneTestCase { + + public void testPhrasePrefix() throws IOException { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + add("blueberry pie", writer); + add("blueberry strudel", writer); + add("blueberry pizza", writer); + add("blueberry chewing gum", writer); + add("bluebird pizza", writer); + add("bluebird foobar pizza", writer); + add("piccadilly circus", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + + // search for "blueberry pi*": + MultiPhraseQuery query1 = new MultiPhraseQuery(); + // search for "strawberry pi*": + MultiPhraseQuery query2 = new MultiPhraseQuery(); + query1.add(new Term("body", "blueberry")); + query2.add(new Term("body", "strawberry")); + + LinkedList termsWithPrefix = new LinkedList(); + IndexReader ir = reader; + + // this TermEnum gives "piccadilly", "pie" and "pizza". + String prefix = "pi"; + TermEnum te = ir.terms(new Term("body", prefix)); + do { + if (te.term().text().startsWith(prefix)) + { + termsWithPrefix.add(te.term()); + } + } while (te.next()); + + query1.add(termsWithPrefix.toArray(new Term[0])); + assertEquals("body:\"blueberry (piccadilly pie pizza)\"", query1.toString()); + query2.add(termsWithPrefix.toArray(new Term[0])); + assertEquals("body:\"strawberry (piccadilly pie pizza)\"", query2.toString()); + + ScoreDoc[] result; + result = searcher.search(query1, null, 1000).scoreDocs; + assertEquals(2, result.length); + result = searcher.search(query2, null, 1000).scoreDocs; + assertEquals(0, result.length); + + // search for "blue* pizza": + MultiPhraseQuery query3 = new MultiPhraseQuery(); + termsWithPrefix.clear(); + prefix = "blue"; + te = ir.terms(new Term("body", prefix)); + do { + if (te.term().text().startsWith(prefix)) + { + termsWithPrefix.add(te.term()); + } + } while (te.next()); + query3.add(termsWithPrefix.toArray(new Term[0])); + query3.add(new Term("body", "pizza")); + + result = searcher.search(query3, null, 1000).scoreDocs; + assertEquals(2, result.length); // blueberry pizza, bluebird pizza + assertEquals("body:\"(blueberry bluebird) pizza\"", query3.toString()); + + // test slop: + query3.setSlop(1); + result = searcher.search(query3, null, 1000).scoreDocs; + + // just make sure no exc: + searcher.explain(query3, 0); + + assertEquals(3, result.length); // blueberry pizza, bluebird pizza, bluebird foobar pizza + + MultiPhraseQuery query4 = new MultiPhraseQuery(); + try { + query4.add(new Term("field1", "foo")); + query4.add(new Term("field2", "foobar")); + fail(); + } catch(IllegalArgumentException e) { + // okay, all terms must belong to the same field + } + + writer.close(); + searcher.close(); + reader.close(); + indexStore.close(); + } + + // LUCENE-2580 + public void testTall() throws IOException { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + add("blueberry chocolate pie", writer); + add("blueberry chocolate tart", writer); + IndexReader r = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(r); + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(new Term("body", "blueberry")); + q.add(new Term("body", "chocolate")); + q.add(new Term[] {new Term("body", "pie"), new Term("body", "tart")}); + assertEquals(2, searcher.search(q, 1).totalHits); + searcher.close(); + r.close(); + indexStore.close(); + } + + private void add(String s, RandomIndexWriter writer) throws IOException { + Document doc = new Document(); + doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + public void testBooleanQueryContainingSingleTermPrefixQuery() + throws IOException { + // this tests against bug 33161 (now fixed) + // In order to cause the bug, the outer query must have more than one term + // and all terms required. + // The contained PhraseMultiQuery must contain exactly one term array. + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + add("blueberry pie", writer); + add("blueberry chewing gum", writer); + add("blue raspberry pie", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + // This query will be equivalent to +body:pie +body:"blue*" + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("body", "pie")), BooleanClause.Occur.MUST); + + MultiPhraseQuery trouble = new MultiPhraseQuery(); + trouble.add(new Term[] {new Term("body", "blueberry"), + new Term("body", "blue")}); + q.add(trouble, BooleanClause.Occur.MUST); + + // exception will be thrown here without fix + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + + assertEquals("Wrong number of hits", 2, hits.length); + + // just make sure no exc: + searcher.explain(q, 0); + + writer.close(); + searcher.close(); + reader.close(); + indexStore.close(); + } + + public void testPhrasePrefixWithBooleanQuery() throws IOException { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + add("This is a test", "object", writer); + add("a note", "note", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + + // This query will be equivalent to +type:note +body:"a t*" + BooleanQuery q = new BooleanQuery(); + q.add(new TermQuery(new Term("type", "note")), BooleanClause.Occur.MUST); + + MultiPhraseQuery trouble = new MultiPhraseQuery(); + trouble.add(new Term("body", "a")); + trouble + .add(new Term[] {new Term("body", "test"), new Term("body", "this")}); + q.add(trouble, BooleanClause.Occur.MUST); + + // exception will be thrown here without fix for #35626: + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals("Wrong number of hits", 0, hits.length); + writer.close(); + searcher.close(); + reader.close(); + indexStore.close(); + } + + public void testNoDocs() throws Exception { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + add("a note", "note", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(new Term("body", "a")); + q.add(new Term[] {new Term("body", "nope"), new Term("body", "nope")}); + assertEquals("Wrong number of hits", 0, + searcher.search(q, null, 1).totalHits); + + // just make sure no exc: + searcher.explain(q, 0); + + writer.close(); + searcher.close(); + reader.close(); + indexStore.close(); + } + + public void testHashCodeAndEquals() { + MultiPhraseQuery query1 = new MultiPhraseQuery(); + MultiPhraseQuery query2 = new MultiPhraseQuery(); + + assertEquals(query1.hashCode(), query2.hashCode()); + assertEquals(query1, query2); + + Term term1 = new Term("someField", "someText"); + + query1.add(term1); + query2.add(term1); + + assertEquals(query1.hashCode(), query2.hashCode()); + assertEquals(query1, query2); + + Term term2 = new Term("someField", "someMoreText"); + + query1.add(term2); + + assertFalse(query1.hashCode() == query2.hashCode()); + assertFalse(query1.equals(query2)); + + query2.add(term2); + + assertEquals(query1.hashCode(), query2.hashCode()); + assertEquals(query1, query2); + } + + private void add(String s, String type, RandomIndexWriter writer) + throws IOException { + Document doc = new Document(); + doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("type", type, Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + + // LUCENE-2526 + public void testEmptyToString() { + new MultiPhraseQuery().toString(); + } + + public void testCustomIDF() throws Exception { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + add("This is a test", "object", writer); + add("a note", "note", writer); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + searcher.setSimilarity(new DefaultSimilarity() { + + @Override + public IDFExplanation idfExplain(Collection terms, + Searcher searcher) throws IOException { + return new IDFExplanation() { + + @Override + public float getIdf() { + return 10f; + } + + @Override + public String explain() { + return "just a test"; + } + + }; + } + }); + + MultiPhraseQuery query = new MultiPhraseQuery(); + query.add(new Term[] { new Term("body", "this"), new Term("body", "that") }); + query.add(new Term("body", "is")); + Weight weight = query.createWeight(searcher); + assertEquals(10f * 10f, weight.sumOfSquaredWeights(), 0.001f); + + writer.close(); + searcher.close(); + reader.close(); + indexStore.close(); + } + + private static class TokenAndPos { + public final String token; + public final int pos; + public TokenAndPos(String token, int pos) { + this.token = token; + this.pos = pos; + } + } + + private static class CannedAnalyzer extends Analyzer { + private final TokenAndPos[] tokens; + + public CannedAnalyzer(TokenAndPos[] tokens) { + this.tokens = tokens; + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new CannedTokenizer(tokens); + } + } + + private static class CannedTokenizer extends Tokenizer { + private final TokenAndPos[] tokens; + private int upto = 0; + private int lastPos = 0; + private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + + public CannedTokenizer(TokenAndPos[] tokens) { + this.tokens = tokens; + } + + @Override + public final boolean incrementToken() throws IOException { + clearAttributes(); + if (upto < tokens.length) { + final TokenAndPos token = tokens[upto++]; + termAtt.setTermBuffer(token.token); + posIncrAtt.setPositionIncrement(token.pos - lastPos); + lastPos = token.pos; + return true; + } else { + return false; + } + } + } + + public void testZeroPosIncr() throws IOException { + Directory dir = new RAMDirectory(); + final TokenAndPos[] tokens = new TokenAndPos[3]; + tokens[0] = new TokenAndPos("a", 0); + tokens[1] = new TokenAndPos("b", 0); + tokens[2] = new TokenAndPos("c", 0); + + IndexWriter writer = new IndexWriter(dir, new CannedAnalyzer(tokens), true, IndexWriter.MaxFieldLength.LIMITED); + Document doc = new Document(); + doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.addDocument(doc); + IndexReader r = writer.getReader(); + writer.close(); + IndexSearcher s = new IndexSearcher(r); + MultiPhraseQuery mpq = new MultiPhraseQuery(); + //mpq.setSlop(1); + + // NOTE: not great that if we do the else clause here we + // get different scores! MultiPhraseQuery counts that + // phrase as occurring twice per doc (it should be 1, I + // think?). This is because MultipleTermPositions is able to + // return the same position more than once (0, in this + // case): + if (true) { + mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0); + mpq.add(new Term[] {new Term("field", "a")}, 0); + } else { + mpq.add(new Term[] {new Term("field", "a")}, 0); + mpq.add(new Term[] {new Term("field", "b"), new Term("field", "c")}, 0); + } + TopDocs hits = s.search(mpq, 2); + assertEquals(2, hits.totalHits); + assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5); + /* + for(int hit=0;hit ftl = new HashSet(); + ftl.add("other"); + SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections. emptySet()); + document = searcher.doc(hits[0].doc, fs); + assertTrue("document is null and it shouldn't be", document != null); + assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1); + String value = document.get("contents"); + assertTrue("value is not null and it should be", value == null); + value = document.get("other"); + assertTrue("value is null and it shouldn't be", value != null); + ftl.clear(); + ftl.add("contents"); + fs = new SetBasedFieldSelector(ftl, Collections. emptySet()); + document = searcher.doc(hits[1].doc, fs); + value = document.get("contents"); + assertTrue("value is null and it shouldn't be", value != null); + value = document.get("other"); + assertTrue("value is not null and it should be", value == null); + indexSearcher1.close(); + indexSearcher2.close(); + ramDirectory1.close(); + ramDirectory2.close(); + searcher.close(); + } + + /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0 + public void testNormalization1() throws IOException { + testNormalization(1, "Using 1 document per index:"); + } + */ + + public void testNormalization10() throws IOException { + testNormalization(10, "Using 10 documents per index:"); + } + + private void testNormalization(int nDocs, String message) throws IOException { + Query query=new TermQuery(new Term("contents", "doc0")); + + Directory ramDirectory1; + IndexSearcher indexSearcher1; + ScoreDoc[] hits; + + ramDirectory1=newDirectory(); + + // First put the documents in the same index + initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... + initIndex(random, ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + + indexSearcher1=new IndexSearcher(ramDirectory1, true); + indexSearcher1.setDefaultFieldSortScoring(true, true); + + hits=indexSearcher1.search(query, null, 1000).scoreDocs; + + assertEquals(message, 2, hits.length); + + // Store the scores for use later + float[] scores={ hits[0].score, hits[1].score }; + + assertTrue(message, scores[0] > scores[1]); + + indexSearcher1.close(); + ramDirectory1.close(); + hits=null; + + + + Directory ramDirectory2; + IndexSearcher indexSearcher2; + + ramDirectory1=newDirectory(); + ramDirectory2=newDirectory(); + + // Now put the documents in a different index + initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... + initIndex(random, ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + + indexSearcher1=new IndexSearcher(ramDirectory1, true); + indexSearcher1.setDefaultFieldSortScoring(true, true); + indexSearcher2=new IndexSearcher(ramDirectory2, true); + indexSearcher2.setDefaultFieldSortScoring(true, true); + + Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 }); + + hits=searcher.search(query, null, 1000).scoreDocs; + + assertEquals(message, 2, hits.length); + + // The scores should be the same (within reason) + assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1 + assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2 + + + + // Adding a Sort.RELEVANCE object should not change anything + hits=searcher.search(query, null, 1000, Sort.RELEVANCE).scoreDocs; + + assertEquals(message, 2, hits.length); + + assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1 + assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2 + + searcher.close(); + + ramDirectory1.close(); + ramDirectory2.close(); + } + + /** + * test that custom similarity is in effect when using MultiSearcher (LUCENE-789). + * @throws IOException + */ + public void testCustomSimilarity () throws IOException { + Directory dir = newDirectory(); + initIndex(random, dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + IndexSearcher srchr = new IndexSearcher(dir, true); + MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[]{srchr}); + + Similarity customSimilarity = new DefaultSimilarity() { + // overide all + @Override + public float idf(int docFreq, int numDocs) { return 100.0f; } + @Override + public float coord(int overlap, int maxOverlap) { return 1.0f; } + @Override + public float computeNorm(String fieldName, FieldInvertState state) { return state.getBoost(); } + @Override + public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + @Override + public float sloppyFreq(int distance) { return 1.0f; } + @Override + public float tf(float freq) { return 1.0f; } + }; + + srchr.setSimilarity(customSimilarity); + msrchr.setSimilarity(customSimilarity); + + Query query=new TermQuery(new Term("contents", "doc0")); + + // Get a score from IndexSearcher + TopDocs topDocs = srchr.search(query, null, 1); + float score1 = topDocs.getMaxScore(); + + // Get the score from MultiSearcher + topDocs = msrchr.search(query, null, 1); + float scoreN = topDocs.getMaxScore(); + + // The scores from the IndexSearcher and Multisearcher should be the same + // if the same similarity is used. + assertEquals("MultiSearcher score must be equal to single searcher score!", score1, scoreN, 1e-6); + msrchr.close(); + srchr.close(); + dir.close(); + } + + public void testDocFreq() throws IOException{ + Directory dir1 = newDirectory(); + Directory dir2 = newDirectory(); + + initIndex(random, dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + initIndex(random, dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + IndexSearcher searcher1 = new IndexSearcher(dir1, true); + IndexSearcher searcher2 = new IndexSearcher(dir2, true); + + MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2}); + assertEquals(15, multiSearcher.docFreq(new Term("contents","x"))); + multiSearcher.close(); + searcher1.close(); + searcher2.close(); + dir1.close(); + dir2.close(); + } + + public void testCreateDocFrequencyMap() throws IOException{ + Directory dir1 = newDirectory(); + Directory dir2 = newDirectory(); + Term template = new Term("contents") ; + String[] contents = {"a", "b", "c"}; + HashSet termsSet = new HashSet(); + for (int i = 0; i < contents.length; i++) { + initIndex(random, dir1, i+10, i==0, contents[i]); + initIndex(random, dir2, i+5, i==0, contents[i]); + termsSet.add(template.createTerm(contents[i])); + } + IndexSearcher searcher1 = new IndexSearcher(dir1, true); + IndexSearcher searcher2 = new IndexSearcher(dir2, true); + MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2}); + Map docFrequencyMap = multiSearcher.createDocFrequencyMap(termsSet); + assertEquals(3, docFrequencyMap.size()); + for (int i = 0; i < contents.length; i++) { + assertEquals(Integer.valueOf((i*2) +15), docFrequencyMap.get(template.createTerm(contents[i]))); + } + multiSearcher.close(); + searcher1.close(); + searcher2.close(); + dir1.close(); + dir2.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java new file mode 100644 index 0000000..51a33f4 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java @@ -0,0 +1,173 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.store.Directory; +import java.io.IOException; + +/** + * Tests {@link MultiSearcher} ranking, i.e. makes sure this bug is fixed: + * http://issues.apache.org/bugzilla/show_bug.cgi?id=31841 + * + */ +public class TestMultiSearcherRanking extends LuceneTestCase { + + private final String FIELD_NAME = "body"; + private Searcher multiSearcher; + private Searcher singleSearcher; + + public void testOneTermQuery() throws IOException, ParseException { + checkQuery("three"); + } + + public void testTwoTermQuery() throws IOException, ParseException { + checkQuery("three foo"); + } + + public void testPrefixQuery() throws IOException, ParseException { + checkQuery("multi*"); + } + + public void testFuzzyQuery() throws IOException, ParseException { + checkQuery("multiThree~"); + } + + public void testRangeQuery() throws IOException, ParseException { + checkQuery("{multiA TO multiP}"); + } + + public void testMultiPhraseQuery() throws IOException, ParseException { + checkQuery("\"blueberry pi*\""); + } + + public void testNoMatchQuery() throws IOException, ParseException { + checkQuery("+three +nomatch"); + } + + /* + public void testTermRepeatedQuery() throws IOException, ParseException { + // TODO: this corner case yields different results. + checkQuery("multi* multi* foo"); + } + */ + + /** + * checks if a query yields the same result when executed on + * a single IndexSearcher containing all documents and on a + * MultiSearcher aggregating sub-searchers + * @param queryStr the query to check. + * @throws IOException + * @throws ParseException + */ + private void checkQuery(String queryStr) throws IOException, ParseException { + // check result hit ranking + if(VERBOSE) System.out.println("Query: " + queryStr); + QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT)); + Query query = queryParser.parse(queryStr); + ScoreDoc[] multiSearcherHits = multiSearcher.search(query, null, 1000).scoreDocs; + ScoreDoc[] singleSearcherHits = singleSearcher.search(query, null, 1000).scoreDocs; + assertEquals(multiSearcherHits.length, singleSearcherHits.length); + for (int i = 0; i < multiSearcherHits.length; i++) { + Document docMulti = multiSearcher.doc(multiSearcherHits[i].doc); + Document docSingle = singleSearcher.doc(singleSearcherHits[i].doc); + if(VERBOSE) System.out.println("Multi: " + docMulti.get(FIELD_NAME) + " score=" + + multiSearcherHits[i].score); + if(VERBOSE) System.out.println("Single: " + docSingle.get(FIELD_NAME) + " score=" + + singleSearcherHits[i].score); + assertEquals(multiSearcherHits[i].score, singleSearcherHits[i].score, + 0.001f); + assertEquals(docMulti.get(FIELD_NAME), docSingle.get(FIELD_NAME)); + } + if(VERBOSE) System.out.println(); + } + + /** + * initializes multiSearcher and singleSearcher with the same document set + */ + @Override + public void setUp() throws Exception { + super.setUp(); + // create MultiSearcher from two seperate searchers + d1 = newDirectory(); + IndexWriter iw1 = new IndexWriter(d1, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy())); + addCollection1(iw1); + iw1.close(); + d2 = newDirectory(); + IndexWriter iw2 = new IndexWriter(d2, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy())); + addCollection2(iw2); + iw2.close(); + + Searchable[] s = new Searchable[2]; + s[0] = new IndexSearcher(d1, true); + s[1] = new IndexSearcher(d2, true); + multiSearcher = new MultiSearcher(s); + + // create IndexSearcher which contains all documents + d = newDirectory(); + IndexWriter iw = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy())); + addCollection1(iw); + addCollection2(iw); + iw.close(); + singleSearcher = new IndexSearcher(d, true); + } + + Directory d1, d2, d; + + @Override + public void tearDown() throws Exception { + multiSearcher.close(); + singleSearcher.close(); + d1.close(); + d2.close(); + d.close(); + super.tearDown(); + } + + private void addCollection1(IndexWriter iw) throws IOException { + add("one blah three", iw); + add("one foo three multiOne", iw); + add("one foobar three multiThree", iw); + add("blueberry pie", iw); + add("blueberry strudel", iw); + add("blueberry pizza", iw); + } + + private void addCollection2(IndexWriter iw) throws IOException { + add("two blah three", iw); + add("two foo xxx multiTwo", iw); + add("two foobar xxx multiThreee", iw); + add("blueberry chewing gum", iw); + add("bluebird pizza", iw); + add("bluebird foobar pizza", iw); + add("piccadilly circus", iw); + } + + private void add(String value, IndexWriter iw) throws IOException { + Document d = new Document(); + d.add(newField(FIELD_NAME, value, Field.Store.YES, Field.Index.ANALYZED)); + iw.addDocument(d); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java new file mode 100644 index 0000000..bb27968 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java @@ -0,0 +1,713 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.text.Collator; +import java.util.Locale; + +import junit.framework.Assert; + +public class TestMultiTermConstantScore extends BaseTestRangeFilter { + + /** threshold for comparing floats */ + public static final float SCORE_COMP_THRESH = 1e-6f; + + static Directory small; + static IndexReader reader; + + static public void assertEquals(String m, float e, float a) { + Assert.assertEquals(m, e, a, SCORE_COMP_THRESH); + } + + static public void assertEquals(String m, int e, int a) { + Assert.assertEquals(m, e, a); + } + + @BeforeClass + public static void beforeClass() throws Exception { + String[] data = new String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, + "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", + "X 4 5 6" }; + + small = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, small, + newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy())); + + for (int i = 0; i < data.length; i++) { + Document doc = new Document(); + doc.add(newField("id", String.valueOf(i), Field.Store.YES, + Field.Index.NOT_ANALYZED));// Field.Keyword("id",String.valueOf(i))); + doc + .add(newField("all", "all", Field.Store.YES, + Field.Index.NOT_ANALYZED));// Field.Keyword("all","all")); + if (null != data[i]) { + doc.add(newField("data", data[i], Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("data",data[i])); + } + writer.addDocument(doc); + } + + reader = writer.getReader(); + writer.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + reader.close(); + small.close(); + reader = null; + small = null; + } + + /** macro for readability */ + public static Query csrq(String f, String l, String h, boolean il, boolean ih) { + TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); + query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + return query; + } + + public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) { + TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); + query.setRewriteMethod(method); + return query; + } + + /** macro for readability */ + public static Query csrq(String f, String l, String h, boolean il, + boolean ih, Collator c) { + TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c); + query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + return query; + } + + /** macro for readability */ + public static Query cspq(Term prefix) { + PrefixQuery query = new PrefixQuery(prefix); + query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + return query; + } + + /** macro for readability */ + public static Query cswcq(Term wild) { + WildcardQuery query = new WildcardQuery(wild); + query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + return query; + } + + @Test + public void testBasics() throws IOException { + QueryUtils.check(csrq("data", "1", "6", T, T)); + QueryUtils.check(csrq("data", "A", "Z", T, T)); + QueryUtils.checkUnequal(csrq("data", "1", "6", T, T), csrq("data", "A", + "Z", T, T)); + + QueryUtils.check(cspq(new Term("data", "p*u?"))); + QueryUtils.checkUnequal(cspq(new Term("data", "pre*")), cspq(new Term( + "data", "pres*"))); + + QueryUtils.check(cswcq(new Term("data", "p"))); + QueryUtils.checkUnequal(cswcq(new Term("data", "pre*n?t")), cswcq(new Term( + "data", "pr*t?j"))); + } + + @Test + public void testBasicsRngCollating() throws IOException { + Collator c = Collator.getInstance(Locale.ENGLISH); + QueryUtils.check(csrq("data", "1", "6", T, T, c)); + QueryUtils.check(csrq("data", "A", "Z", T, T, c)); + QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A", + "Z", T, T, c)); + } + + @Test + public void testEqualScores() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexSearcher search = newSearcher(reader); + + ScoreDoc[] result; + + // some hits match more terms then others, score should be the same + + result = search.search(csrq("data", "1", "6", T, T), null, 1000).scoreDocs; + int numHits = result.length; + assertEquals("wrong number of results", 6, numHits); + float score = result[0].score; + for (int i = 1; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score); + } + + result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000).scoreDocs; + numHits = result.length; + assertEquals("wrong number of results", 6, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score); + } + + search.close(); + } + + @Test + public void testBoost() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexSearcher search = newSearcher(reader); + + // test for correct application of query normalization + // must use a non score normalizing method for this. + Query q = csrq("data", "1", "6", T, T); + q.setBoost(100); + search.search(q, null, new Collector() { + private int base = 0; + private Scorer scorer; + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + @Override + public void collect(int doc) throws IOException { + assertEquals("score for doc " + (doc + base) + " was not correct", 1.0f, scorer.score()); + } + @Override + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + + // + // Ensure that boosting works to score one clause of a query higher + // than another. + // + Query q1 = csrq("data", "A", "A", T, T); // matches document #0 + q1.setBoost(.1f); + Query q2 = csrq("data", "Z", "Z", T, T); // matches document #1 + BooleanQuery bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + ScoreDoc[] hits = search.search(bq, null, 1000).scoreDocs; + Assert.assertEquals(1, hits[0].doc); + Assert.assertEquals(0, hits[1].doc); + assertTrue(hits[0].score > hits[1].score); + + q1 = csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #0 + q1.setBoost(.1f); + q2 = csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #1 + bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + hits = search.search(bq, null, 1000).scoreDocs; + Assert.assertEquals(1, hits[0].doc); + Assert.assertEquals(0, hits[1].doc); + assertTrue(hits[0].score > hits[1].score); + + q1 = csrq("data", "A", "A", T, T); // matches document #0 + q1.setBoost(10f); + q2 = csrq("data", "Z", "Z", T, T); // matches document #1 + bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + hits = search.search(bq, null, 1000).scoreDocs; + Assert.assertEquals(0, hits[0].doc); + Assert.assertEquals(1, hits[1].doc); + assertTrue(hits[0].score > hits[1].score); + search.close(); + } + + @Test + public void testBooleanOrderUnAffected() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexSearcher search = newSearcher(reader); + + // first do a regular TermRangeQuery which uses term expansion so + // docs with more terms in range get higher scores + + Query rq = new TermRangeQuery("data", "1", "4", T, T); + + ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs; + int numHits = expected.length; + + // now do a boolean where which also contains a + // ConstantScoreRangeQuery and make sure hte order is the same + + BooleanQuery q = new BooleanQuery(); + q.add(rq, BooleanClause.Occur.MUST);// T, F); + q.add(csrq("data", "1", "6", T, T), BooleanClause.Occur.MUST);// T, F); + + ScoreDoc[] actual = search.search(q, null, 1000).scoreDocs; + + assertEquals("wrong numebr of hits", numHits, actual.length); + for (int i = 0; i < numHits; i++) { + assertEquals("mismatch in docid for hit#" + i, expected[i].doc, + actual[i].doc); + } + + search.close(); + } + + @Test + public void testRangeQueryId() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + // test id, bounded on both ends + + result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + + result = search.search(csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + + // unbounded id + + result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(csrq("id", null, maxIP, F, T), null, numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(csrq("id", minIP, null, F, F), null, numDocs).scoreDocs; + assertEquals("not min, but up", numDocs - 1, result.length); + + result = search.search(csrq("id", null, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("not max, but down", numDocs - 1, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, F), null, numDocs).scoreDocs; + assertEquals("med and up, not max", maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, F, T), null, numDocs).scoreDocs; + assertEquals("not min, up to med", medId - minId, result.length); + + // very small sets + + result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + + result = search.search(csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + + result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + + result = search.search(csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + + result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + + result = search.search(csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + + result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + + result = search.search(csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + + result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + search.close(); + } + + @Test + public void testRangeQueryIdCollating() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + Collator c = Collator.getInstance(Locale.ENGLISH); + + // test id, bounded on both ends + + result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + + // unbounded id + + result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs; + assertEquals("not min, but up", numDocs - 1, result.length); + + result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("not max, but down", numDocs - 1, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs; + assertEquals("med and up, not max", maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("not min, up to med", medId - minId, result.length); + + // very small sets + + result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("min,min,F,F,c", 0, result.length); + result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("med,med,F,F,c", 0, result.length); + result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("max,max,F,F,c", 0, result.length); + + result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("min,min,T,T,c", 1, result.length); + result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T,c", 1, result.length); + + result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("max,max,T,T,c", 1, result.length); + result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T,c", 1, result.length); + + result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("med,med,T,T,c", 1, result.length); + + search.close(); + } + + @Test + public void testRangeQueryRand() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + String minRP = pad(signedIndexDir.minR); + String maxRP = pad(signedIndexDir.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + // test extremes, bounded on both ends + + result = search.search(csrq("rand", minRP, maxRP, T, T), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("rand", minRP, maxRP, T, F), null, numDocs).scoreDocs; + assertEquals("all but biggest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, T), null, numDocs).scoreDocs; + assertEquals("all but smallest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("all but extremes", numDocs - 2, result.length); + + // unbounded + + result = search.search(csrq("rand", minRP, null, T, F), null, numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(csrq("rand", null, maxRP, F, T), null, numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(csrq("rand", minRP, null, F, F), null, numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs - 1, result.length); + + result = search.search(csrq("rand", null, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs - 1, result.length); + + // very small sets + + result = search.search(csrq("rand", minRP, minRP, F, F), null, numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(csrq("rand", maxRP, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(csrq("rand", minRP, minRP, T, T), null, numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(csrq("rand", null, minRP, F, T), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(csrq("rand", maxRP, maxRP, T, T), null, numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(csrq("rand", maxRP, null, T, F), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + search.close(); + } + + @Test + public void testRangeQueryRandCollating() throws IOException { + // NOTE: uses index build in *super* setUp + + // using the unsigned index because collation seems to ignore hyphens + IndexReader reader = unsignedIndexReader; + IndexSearcher search = newSearcher(reader); + + String minRP = pad(unsignedIndexDir.minR); + String maxRP = pad(unsignedIndexDir.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + Collator c = Collator.getInstance(Locale.ENGLISH); + + // test extremes, bounded on both ends + + result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs; + assertEquals("all but biggest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("all but smallest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("all but extremes", numDocs - 2, result.length); + + // unbounded + + result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs - 1, result.length); + + result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs - 1, result.length); + + // very small sets + + result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("min,min,F,F,c", 0, result.length); + result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("max,max,F,F,c", 0, result.length); + + result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("min,min,T,T,c", 1, result.length); + result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T,c", 1, result.length); + + result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("max,max,T,T,c", 1, result.length); + result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T,c", 1, result.length); + + search.close(); + } + + @Test + public void testFarsi() throws Exception { + + /* build an index */ + Directory farsiIndex = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex, new SimpleAnalyzer(TEST_VERSION_CURRENT)); + Document doc = new Document(); + doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc + .add(newField("body", "body", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher search = newSearcher(reader); + + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + Collator c = Collator.getInstance(new Locale("ar")); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a ConstantScoreRangeQuery + // with a Farsi Collator (or an Arabic one for the case when Farsi is + // not supported). + ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T, + c), null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, result.length); + + result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, + 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, result.length); + search.close(); + reader.close(); + farsiIndex.close(); + } + + @Test + public void testDanish() throws Exception { + + /* build an index */ + Directory danishIndex = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex, new SimpleAnalyzer(TEST_VERSION_CURRENT)); + + // Danish collation orders the words below in the given order + // (example taken from TestSort.testInternationalSort() ). + String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; + for (int docnum = 0 ; docnum < words.length ; ++docnum) { + Document doc = new Document(); + doc.add(newField("content", words[docnum], + Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("body", "body", + Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher search = newSearcher(reader); + + Collator c = Collator.getInstance(new Locale("da", "dk")); + + // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], + // but Danish collation does. + ScoreDoc[] result = search.search + (csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, result.length); + + result = search.search + (csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, result.length); + search.close(); + reader.close(); + danishIndex.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java new file mode 100644 index 0000000..adb9474 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java @@ -0,0 +1,191 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.*; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.TermFreqVector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; + +import java.io.IOException; + +public class TestMultiThreadTermVectors extends LuceneTestCase { + private Directory directory; + public int numDocs = 100; + public int numThreads = 3; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + //writer.setUseCompoundFile(false); + //writer.infoStream = System.out; + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + Fieldable fld = newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES); + doc.add(fld); + writer.addDocument(doc); + } + writer.close(); + + } + + @Override + public void tearDown() throws Exception { + directory.close(); + super.tearDown(); + } + + public void test() throws Exception { + + IndexReader reader = null; + + try { + reader = IndexReader.open(directory, true); + for(int i = 1; i <= numThreads; i++) + testTermPositionVectors(reader, i); + + + } + catch (IOException ioe) { + fail(ioe.getMessage()); + } + finally { + if (reader != null) { + try { + /** close the opened reader */ + reader.close(); + } catch (IOException ioe) { + ioe.printStackTrace(); + } + } + } + } + + public void testTermPositionVectors(final IndexReader reader, int threadCount) throws Exception { + MultiThreadTermVectorsReader[] mtr = new MultiThreadTermVectorsReader[threadCount]; + for (int i = 0; i < threadCount; i++) { + mtr[i] = new MultiThreadTermVectorsReader(); + mtr[i].init(reader); + } + + + /** run until all threads finished */ + int threadsAlive = mtr.length; + while (threadsAlive > 0) { + //System.out.println("Threads alive"); + Thread.sleep(10); + threadsAlive = mtr.length; + for (int i = 0; i < mtr.length; i++) { + if (mtr[i].isAlive() == true) { + break; + } + + threadsAlive--; + } + } + + long totalTime = 0L; + for (int i = 0; i < mtr.length; i++) { + totalTime += mtr[i].timeElapsed; + mtr[i] = null; + } + + //System.out.println("threadcount: " + mtr.length + " average term vector time: " + totalTime/mtr.length); + + } + +} + +class MultiThreadTermVectorsReader implements Runnable { + + private IndexReader reader = null; + private Thread t = null; + + private final int runsToDo = 100; + long timeElapsed = 0; + + + public void init(IndexReader reader) { + this.reader = reader; + timeElapsed = 0; + t=new Thread(this); + t.start(); + } + + public boolean isAlive() { + if (t == null) return false; + + return t.isAlive(); + } + + public void run() { + try { + // run the test 100 times + for (int i = 0; i < runsToDo; i++) + testTermVectors(); + } + catch (Exception e) { + e.printStackTrace(); + } + return; + } + + private void testTermVectors() throws Exception { + // check: + int numDocs = reader.numDocs(); + long start = 0L; + for (int docId = 0; docId < numDocs; docId++) { + start = System.currentTimeMillis(); + TermFreqVector [] vectors = reader.getTermFreqVectors(docId); + timeElapsed += System.currentTimeMillis()-start; + + // verify vectors result + verifyVectors(vectors, docId); + + start = System.currentTimeMillis(); + TermFreqVector vector = reader.getTermFreqVector(docId, "field"); + timeElapsed += System.currentTimeMillis()-start; + + vectors = new TermFreqVector[1]; + vectors[0] = vector; + + verifyVectors(vectors, docId); + + } + } + + private void verifyVectors(TermFreqVector[] vectors, int num) { + StringBuilder temp = new StringBuilder(); + String[] terms = null; + for (int i = 0; i < vectors.length; i++) { + terms = vectors[i].getTerms(); + for (int z = 0; z < terms.length; z++) { + temp.append(terms[z]); + } + } + + if (!English.intToEnglish(num).trim().equals(temp.toString().trim())) + System.out.println("wrong term result"); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java new file mode 100644 index 0000000..9a22ce5 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java @@ -0,0 +1,81 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Locale; +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestMultiValuedNumericRangeQuery extends LuceneTestCase { + + /** Tests NumericRangeQuery on a multi-valued field (multiple numeric values per document). + * This test ensures, that a classical TermRangeQuery returns exactly the same document numbers as + * NumericRangeQuery (see SOLR-1322 for discussion) and the multiple precision terms per numeric value + * do not interfere with multiple numeric values. + */ + public void testMultiValuedNRQ() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); + + DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US)); + + int num = atLeast(500); + for (int l = 0; l < num; l++) { + Document doc = new Document(); + for (int m=0, c=random.nextInt(10); m<=c; m++) { + int value = random.nextInt(Integer.MAX_VALUE); + doc.add(newField("asc", format.format(value), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new NumericField("trie", Field.Store.NO, true).setIntValue(value)); + } + writer.addDocument(doc); + } + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher searcher=newSearcher(reader); + num = atLeast(50); + for (int i = 0; i < num; i++) { + int lower=random.nextInt(Integer.MAX_VALUE); + int upper=random.nextInt(Integer.MAX_VALUE); + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + TermRangeQuery cq=new TermRangeQuery("asc", format.format(lower), format.format(upper), true, true); + NumericRangeQuery tq=NumericRangeQuery.newIntRange("trie", lower, upper, true, true); + TopDocs trTopDocs = searcher.search(cq, 1); + TopDocs nrTopDocs = searcher.search(tq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", trTopDocs.totalHits, nrTopDocs.totalHits ); + } + searcher.close(); + reader.close(); + directory.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNot.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNot.java new file mode 100644 index 0000000..7a62f94 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNot.java @@ -0,0 +1,59 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.store.Directory; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +/** Similarity unit test. + * + * + * @version $Revision: 1091277 $ + */ +public class TestNot extends LuceneTestCase { + + public void testNot() throws Exception { + Directory store = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, store); + + Document d1 = new Document(); + d1.add(newField("field", "a b", Field.Store.YES, Field.Index.ANALYZED)); + + writer.addDocument(d1); + IndexReader reader = writer.getReader(); + + IndexSearcher searcher = newSearcher(reader); + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(random)); + Query query = parser.parse("a NOT b"); + //System.out.println(query); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + writer.close(); + searcher.close(); + reader.close(); + store.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java new file mode 100644 index 0000000..449d92b --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java @@ -0,0 +1,594 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util._TestUtil; + +import org.junit.Test; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public class TestNumericRangeQuery32 extends LuceneTestCase { + // distance of entries + private static final int distance = 6666; + // shift the starting of the values to the left, to also have negative values: + private static final int startOffset = - 1 << 15; + // number of docs to generate for testing + private static final int noDocs = atLeast(5000); + + private static Directory directory = null; + private static IndexReader reader = null; + private static IndexSearcher searcher = null; + + @BeforeClass + public static void beforeClass() throws Exception { + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)) + .setMergePolicy(newLogMergePolicy())); + + NumericField + field8 = new NumericField("field8", 8, Field.Store.YES, true), + field4 = new NumericField("field4", 4, Field.Store.YES, true), + field2 = new NumericField("field2", 2, Field.Store.YES, true), + fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true), + ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), + ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), + ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); + + Document doc = new Document(); + // add fields, that have a distance to test general functionality + doc.add(field8); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie); + // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive + doc.add(ascfield8); doc.add(ascfield4); doc.add(ascfield2); + + // Add a series of noDocs docs with increasing int values + for (int l=0; l q = NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); + NumericRangeFilter f = NumericRangeFilter.newIntRange(field, precisionStep, lower, upper, true, true); + int lastTerms = 0; + for (byte i=0; i<3; i++) { + TopDocs topDocs; + int terms; + String type; + q.clearTotalNumberOfTerms(); + f.clearTotalNumberOfTerms(); + switch (i) { + case 0: + type = " (constant score filter rewrite)"; + q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + terms = q.getTotalNumberOfTerms(); + break; + case 1: + type = " (constant score boolean rewrite)"; + q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + terms = q.getTotalNumberOfTerms(); + break; + case 2: + type = " (filter)"; + topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER); + terms = f.getTotalNumberOfTerms(); + break; + default: + return; + } + if (VERBOSE) System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+"."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count"+type, count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc"+type, 2*distance+startOffset, Integer.parseInt(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc"+type, (1+count)*distance+startOffset, Integer.parseInt(doc.get(field)) ); + if (i>0 && + (searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1)) { + assertEquals("Distinct term number is equal for all query types", lastTerms, terms); + } + lastTerms = terms; + } + } + + @Test + public void testRange_8bit() throws Exception { + testRange(8); + } + + @Test + public void testRange_4bit() throws Exception { + testRange(4); + } + + @Test + public void testRange_2bit() throws Exception { + testRange(2); + } + + @Test + public void testInverseRange() throws Exception { + NumericRangeFilter f = NumericRangeFilter.newIntRange("field8", 8, 1000, -1000, true, true); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + f = NumericRangeFilter.newIntRange("field8", 8, Integer.MAX_VALUE, null, false, false); + assertSame("A exclusive range starting with Integer.MAX_VALUE should return the EMPTY_DOCIDSET instance", + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + f = NumericRangeFilter.newIntRange("field8", 8, null, Integer.MIN_VALUE, false, false); + assertSame("A exclusive range ending with Integer.MIN_VALUE should return the EMPTY_DOCIDSET instance", + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + } + + @Test + public void testOneMatchQuery() throws Exception { + NumericRangeQuery q = NumericRangeQuery.newIntRange("ascfield8", 8, 1000, 1000, true, true); + assertSame(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE, q.getRewriteMethod()); + TopDocs topDocs = searcher.search(q, noDocs); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", 1, sd.length ); + } + + private void testLeftOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + int upper=(count-1)*distance + (distance/3) + startOffset; + NumericRangeQuery q=NumericRangeQuery.newIntRange(field, precisionStep, null, upper, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + if (VERBOSE) System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", startOffset, Integer.parseInt(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (count-1)*distance+startOffset, Integer.parseInt(doc.get(field)) ); + + q=NumericRangeQuery.newIntRange(field, precisionStep, null, upper, false, true); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", count, sd.length ); + doc=searcher.doc(sd[0].doc); + assertEquals("First doc", startOffset, Integer.parseInt(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (count-1)*distance+startOffset, Integer.parseInt(doc.get(field)) ); + } + + @Test + public void testLeftOpenRange_8bit() throws Exception { + testLeftOpenRange(8); + } + + @Test + public void testLeftOpenRange_4bit() throws Exception { + testLeftOpenRange(4); + } + + @Test + public void testLeftOpenRange_2bit() throws Exception { + testLeftOpenRange(2); + } + + private void testRightOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + int lower=(count-1)*distance + (distance/3) +startOffset; + NumericRangeQuery q=NumericRangeQuery.newIntRange(field, precisionStep, lower, null, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + if (VERBOSE) System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", noDocs-count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", count*distance+startOffset, Integer.parseInt(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (noDocs-1)*distance+startOffset, Integer.parseInt(doc.get(field)) ); + + q=NumericRangeQuery.newIntRange(field, precisionStep, lower, null, true, false); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", noDocs-count, sd.length ); + doc=searcher.doc(sd[0].doc); + assertEquals("First doc", count*distance+startOffset, Integer.parseInt(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (noDocs-1)*distance+startOffset, Integer.parseInt(doc.get(field)) ); + } + + @Test + public void testRightOpenRange_8bit() throws Exception { + testRightOpenRange(8); + } + + @Test + public void testRightOpenRange_4bit() throws Exception { + testRightOpenRange(4); + } + + @Test + public void testRightOpenRange_2bit() throws Exception { + testRightOpenRange(2); + } + + @Test + public void testInfiniteValues() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(new NumericField("float").setFloatValue(Float.NEGATIVE_INFINITY)); + doc.add(new NumericField("int").setIntValue(Integer.MIN_VALUE)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new NumericField("float").setFloatValue(Float.POSITIVE_INFINITY)); + doc.add(new NumericField("int").setIntValue(Integer.MAX_VALUE)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new NumericField("float").setFloatValue(0.0f)); + doc.add(new NumericField("int").setIntValue(0)); + writer.addDocument(doc); + writer.close(); + + IndexSearcher s = new IndexSearcher(dir); + + Query q=NumericRangeQuery.newIntRange("int", null, null, true, true); + TopDocs topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newIntRange("int", null, null, false, false); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newIntRange("int", Integer.MIN_VALUE, Integer.MAX_VALUE, true, true); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newIntRange("int", Integer.MIN_VALUE, Integer.MAX_VALUE, false, false); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 1, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newFloatRange("float", null, null, true, true); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newFloatRange("float", null, null, false, false); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + s.close(); + dir.close(); + } + + private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { + String field="field"+precisionStep; + int termCountT=0,termCountC=0; + int num = atLeast(10); + for (int i = 0; i < num; i++) { + int lower=(int)(random.nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(random.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + // test inclusive range + NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); + TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + TopDocs cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test exclusive range + tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false); + cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, false); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test left exclusive range + tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true); + cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test right exclusive range + tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false); + cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, false); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + } + if (precisionStep == Integer.MAX_VALUE && + (searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1)) { + assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC); + } else if (VERBOSE) { + System.out.println("Average number of terms during random search on '" + field + "':"); + System.out.println(" Trie query: " + (((double)termCountT)/(num * 4))); + System.out.println(" Classical query: " + (((double)termCountC)/(num * 4))); + } + } + + @Test + public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { + testRandomTrieAndClassicRangeQuery(8); + } + + @Test + public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { + testRandomTrieAndClassicRangeQuery(4); + } + + @Test + public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { + testRandomTrieAndClassicRangeQuery(2); + } + + @Test + public void testRandomTrieAndClassicRangeQuery_NoTrie() throws Exception { + testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE); + } + + private void testRangeSplit(int precisionStep) throws Exception { + String field="ascfield"+precisionStep; + // 10 random tests + int num = atLeast(10); + for (int i =0; i< num; i++) { + int lower=(int)(random.nextDouble()*noDocs - noDocs/2); + int upper=(int)(random.nextDouble()*noDocs - noDocs/2); + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + // test inclusive range + Query tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + // test exclusive range + tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); + // test left exclusive range + tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + // test right exclusive range + tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + } + } + + @Test + public void testRangeSplit_8bit() throws Exception { + testRangeSplit(8); + } + + @Test + public void testRangeSplit_4bit() throws Exception { + testRangeSplit(4); + } + + @Test + public void testRangeSplit_2bit() throws Exception { + testRangeSplit(2); + } + + /** we fake a float test using int2float conversion of NumericUtils */ + private void testFloatRange(int precisionStep) throws Exception { + final String field="ascfield"+precisionStep; + final int lower=-1000, upper=+2000; + + Query tq=NumericRangeQuery.newFloatRange(field, precisionStep, + NumericUtils.sortableIntToFloat(lower), NumericUtils.sortableIntToFloat(upper), true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + + Filter tf=NumericRangeFilter.newFloatRange(field, precisionStep, + NumericUtils.sortableIntToFloat(lower), NumericUtils.sortableIntToFloat(upper), true, true); + tTopDocs = searcher.search(new MatchAllDocsQuery(), tf, 1); + assertEquals("Returned count of range filter must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + } + + @Test + public void testFloatRange_8bit() throws Exception { + testFloatRange(8); + } + + @Test + public void testFloatRange_4bit() throws Exception { + testFloatRange(4); + } + + @Test + public void testFloatRange_2bit() throws Exception { + testFloatRange(2); + } + + private void testSorting(int precisionStep) throws Exception { + String field="field"+precisionStep; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + int num = atLeast(10); + for (int i = 0; i < num; i++) { + int lower=(int)(random.nextDouble()*noDocs*distance)+startOffset; + int upper=(int)(random.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + int a=lower; lower=upper; upper=a; + } + Query tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); + TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(new SortField(field, SortField.INT, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + int last=Integer.parseInt(searcher.doc(sd[0].doc).get(field)); + for (int j=1; jact ); + last=act; + } + } + } + + @Test + public void testSorting_8bit() throws Exception { + testSorting(8); + } + + @Test + public void testSorting_4bit() throws Exception { + testSorting(4); + } + + @Test + public void testSorting_2bit() throws Exception { + testSorting(2); + } + + @Test + public void testEqualsAndHash() throws Exception { + QueryUtils.checkHashEquals(NumericRangeQuery.newIntRange("test1", 4, 10, 20, true, true)); + QueryUtils.checkHashEquals(NumericRangeQuery.newIntRange("test2", 4, 10, 20, false, true)); + QueryUtils.checkHashEquals(NumericRangeQuery.newIntRange("test3", 4, 10, 20, true, false)); + QueryUtils.checkHashEquals(NumericRangeQuery.newIntRange("test4", 4, 10, 20, false, false)); + QueryUtils.checkHashEquals(NumericRangeQuery.newIntRange("test5", 4, 10, null, true, true)); + QueryUtils.checkHashEquals(NumericRangeQuery.newIntRange("test6", 4, null, 20, true, true)); + QueryUtils.checkHashEquals(NumericRangeQuery.newIntRange("test7", 4, null, null, true, true)); + QueryUtils.checkEqual( + NumericRangeQuery.newIntRange("test8", 4, 10, 20, true, true), + NumericRangeQuery.newIntRange("test8", 4, 10, 20, true, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newIntRange("test9", 4, 10, 20, true, true), + NumericRangeQuery.newIntRange("test9", 8, 10, 20, true, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newIntRange("test10a", 4, 10, 20, true, true), + NumericRangeQuery.newIntRange("test10b", 4, 10, 20, true, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newIntRange("test11", 4, 10, 20, true, true), + NumericRangeQuery.newIntRange("test11", 4, 20, 10, true, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newIntRange("test12", 4, 10, 20, true, true), + NumericRangeQuery.newIntRange("test12", 4, 10, 20, false, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newIntRange("test13", 4, 10, 20, true, true), + NumericRangeQuery.newFloatRange("test13", 4, 10f, 20f, true, true) + ); + // the following produces a hash collision, because Long and Integer have the same hashcode, so only test equality: + Query q1 = NumericRangeQuery.newIntRange("test14", 4, 10, 20, true, true); + Query q2 = NumericRangeQuery.newLongRange("test14", 4, 10L, 20L, true, true); + assertFalse(q1.equals(q2)); + assertFalse(q2.equals(q1)); + } + + private void testEnum(int lower, int upper) throws Exception { + NumericRangeQuery q = NumericRangeQuery.newIntRange("field4", 4, lower, upper, true, true); + FilteredTermEnum termEnum = q.getEnum(searcher.getIndexReader()); + try { + int count = 0; + do { + final Term t = termEnum.term(); + if (t != null) { + final int val = NumericUtils.prefixCodedToInt(t.text()); + assertTrue("value not in bounds", val >= lower && val <= upper); + count++; + } else break; + } while (termEnum.next()); + assertFalse(termEnum.next()); + if (VERBOSE) System.out.println("TermEnum on 'field4' for range [" + lower + "," + upper + "] contained " + count + " terms."); + } finally { + termEnum.close(); + } + } + + @Test + public void testEnum() throws Exception { + int count=3000; + int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3); + // test enum with values + testEnum(lower, upper); + // test empty enum + testEnum(upper, lower); + // test empty enum outside of bounds + lower = distance*noDocs+startOffset; + upper = 2 * lower; + testEnum(lower, upper); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java new file mode 100644 index 0000000..fd2f3db --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java @@ -0,0 +1,595 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util._TestUtil; + +import org.junit.Test; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public class TestNumericRangeQuery64 extends LuceneTestCase { + // distance of entries + private static final long distance = 66666L; + // shift the starting of the values to the left, to also have negative values: + private static final long startOffset = - 1L << 31; + // number of docs to generate for testing + private static final int noDocs = atLeast(5000); + + private static Directory directory = null; + private static IndexReader reader = null; + private static IndexSearcher searcher = null; + + @BeforeClass + public static void beforeClass() throws Exception { + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)) + .setMergePolicy(newLogMergePolicy())); + + NumericField + field8 = new NumericField("field8", 8, Field.Store.YES, true), + field6 = new NumericField("field6", 6, Field.Store.YES, true), + field4 = new NumericField("field4", 4, Field.Store.YES, true), + field2 = new NumericField("field2", 2, Field.Store.YES, true), + fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true), + ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), + ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true), + ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), + ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); + + Document doc = new Document(); + // add fields, that have a distance to test general functionality + doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie); + // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive + doc.add(ascfield8); doc.add(ascfield6); doc.add(ascfield4); doc.add(ascfield2); + + // Add a series of noDocs docs with increasing long values, by updating the fields + for (int l=0; l q = NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); + NumericRangeFilter f = NumericRangeFilter.newLongRange(field, precisionStep, lower, upper, true, true); + int lastTerms = 0; + for (byte i=0; i<3; i++) { + TopDocs topDocs; + int terms; + String type; + q.clearTotalNumberOfTerms(); + f.clearTotalNumberOfTerms(); + switch (i) { + case 0: + type = " (constant score filter rewrite)"; + q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + terms = q.getTotalNumberOfTerms(); + break; + case 1: + type = " (constant score boolean rewrite)"; + q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + terms = q.getTotalNumberOfTerms(); + break; + case 2: + type = " (filter)"; + topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER); + terms = f.getTotalNumberOfTerms(); + break; + default: + return; + } + if (VERBOSE) System.out.println("Found "+terms+" distinct terms in range for field '"+field+"'"+type+"."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count"+type, count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc"+type, 2*distance+startOffset, Long.parseLong(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc"+type, (1+count)*distance+startOffset, Long.parseLong(doc.get(field)) ); + if (i>0 && + (searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1)) { + assertEquals("Distinct term number is equal for all query types", lastTerms, terms); + } + lastTerms = terms; + } + } + + @Test + public void testRange_8bit() throws Exception { + testRange(8); + } + + @Test + public void testRange_6bit() throws Exception { + testRange(6); + } + + @Test + public void testRange_4bit() throws Exception { + testRange(4); + } + + @Test + public void testRange_2bit() throws Exception { + testRange(2); + } + + @Test + public void testInverseRange() throws Exception { + NumericRangeFilter f = NumericRangeFilter.newLongRange("field8", 8, 1000L, -1000L, true, true); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + f = NumericRangeFilter.newLongRange("field8", 8, Long.MAX_VALUE, null, false, false); + assertSame("A exclusive range starting with Long.MAX_VALUE should return the EMPTY_DOCIDSET instance", + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + f = NumericRangeFilter.newLongRange("field8", 8, null, Long.MIN_VALUE, false, false); + assertSame("A exclusive range ending with Long.MIN_VALUE should return the EMPTY_DOCIDSET instance", + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); + } + + @Test + public void testOneMatchQuery() throws Exception { + NumericRangeQuery q = NumericRangeQuery.newLongRange("ascfield8", 8, 1000L, 1000L, true, true); + assertSame(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE, q.getRewriteMethod()); + TopDocs topDocs = searcher.search(q, noDocs); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", 1, sd.length ); + } + + private void testLeftOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + long upper=(count-1)*distance + (distance/3) + startOffset; + NumericRangeQuery q=NumericRangeQuery.newLongRange(field, precisionStep, null, upper, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + if (VERBOSE) System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in left open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", startOffset, Long.parseLong(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (count-1)*distance+startOffset, Long.parseLong(doc.get(field)) ); + + q=NumericRangeQuery.newLongRange(field, precisionStep, null, upper, false, true); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", count, sd.length ); + doc=searcher.doc(sd[0].doc); + assertEquals("First doc", startOffset, Long.parseLong(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (count-1)*distance+startOffset, Long.parseLong(doc.get(field)) ); + } + + @Test + public void testLeftOpenRange_8bit() throws Exception { + testLeftOpenRange(8); + } + + @Test + public void testLeftOpenRange_6bit() throws Exception { + testLeftOpenRange(6); + } + + @Test + public void testLeftOpenRange_4bit() throws Exception { + testLeftOpenRange(4); + } + + @Test + public void testLeftOpenRange_2bit() throws Exception { + testLeftOpenRange(2); + } + + private void testRightOpenRange(int precisionStep) throws Exception { + String field="field"+precisionStep; + int count=3000; + long lower=(count-1)*distance + (distance/3) +startOffset; + NumericRangeQuery q=NumericRangeQuery.newLongRange(field, precisionStep, lower, null, true, true); + TopDocs topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + if (VERBOSE) System.out.println("Found "+q.getTotalNumberOfTerms()+" distinct terms in right open range for field '"+field+"'."); + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", noDocs-count, sd.length ); + Document doc=searcher.doc(sd[0].doc); + assertEquals("First doc", count*distance+startOffset, Long.parseLong(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (noDocs-1)*distance+startOffset, Long.parseLong(doc.get(field)) ); + + q=NumericRangeQuery.newLongRange(field, precisionStep, lower, null, true, false); + topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); + sd = topDocs.scoreDocs; + assertNotNull(sd); + assertEquals("Score doc count", noDocs-count, sd.length ); + doc=searcher.doc(sd[0].doc); + assertEquals("First doc", count*distance+startOffset, Long.parseLong(doc.get(field)) ); + doc=searcher.doc(sd[sd.length-1].doc); + assertEquals("Last doc", (noDocs-1)*distance+startOffset, Long.parseLong(doc.get(field)) ); + } + + @Test + public void testRightOpenRange_8bit() throws Exception { + testRightOpenRange(8); + } + + @Test + public void testRightOpenRange_6bit() throws Exception { + testRightOpenRange(6); + } + + @Test + public void testRightOpenRange_4bit() throws Exception { + testRightOpenRange(4); + } + + @Test + public void testRightOpenRange_2bit() throws Exception { + testRightOpenRange(2); + } + + @Test + public void testInfiniteValues() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(new NumericField("double").setDoubleValue(Double.NEGATIVE_INFINITY)); + doc.add(new NumericField("long").setLongValue(Long.MIN_VALUE)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new NumericField("double").setDoubleValue(Double.POSITIVE_INFINITY)); + doc.add(new NumericField("long").setLongValue(Long.MAX_VALUE)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(new NumericField("double").setDoubleValue(0.0)); + doc.add(new NumericField("long").setLongValue(0L)); + writer.addDocument(doc); + writer.close(); + + IndexSearcher s = new IndexSearcher(dir); + + Query q=NumericRangeQuery.newLongRange("long", null, null, true, true); + TopDocs topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newLongRange("long", null, null, false, false); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newLongRange("long", Long.MIN_VALUE, Long.MAX_VALUE, true, true); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newLongRange("long", Long.MIN_VALUE, Long.MAX_VALUE, false, false); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 1, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newDoubleRange("double", null, null, true, true); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newDoubleRange("double", null, null, false, false); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + s.close(); + dir.close(); + } + + private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { + String field="field"+precisionStep; + int termCountT=0,termCountC=0; + int num = atLeast(10); + for (int i = 0; i < num; i++) { + long lower=(long)(random.nextDouble()*noDocs*distance)+startOffset; + long upper=(long)(random.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + long a=lower; lower=upper; upper=a; + } + // test inclusive range + NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); + TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + TopDocs cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test exclusive range + tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false); + cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, false); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test left exclusive range + tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true); + cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, true); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + // test right exclusive range + tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false); + cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, false); + tTopDocs = searcher.search(tq, 1); + cTopDocs = searcher.search(cq, 1); + assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); + termCountT += tq.getTotalNumberOfTerms(); + termCountC += cq.getTotalNumberOfTerms(); + } + if (precisionStep == Integer.MAX_VALUE && + (searcher.getIndexReader().getSequentialSubReaders() == null || + searcher.getIndexReader().getSequentialSubReaders().length == 1)) { + assertEquals("Total number of terms should be equal for unlimited precStep", termCountT, termCountC); + } else if (VERBOSE) { + System.out.println("Average number of terms during random search on '" + field + "':"); + System.out.println(" Trie query: " + (((double)termCountT)/(num * 4))); + System.out.println(" Classical query: " + (((double)termCountC)/(num * 4))); + } + } + + @Test + public void testRandomTrieAndClassicRangeQuery_8bit() throws Exception { + testRandomTrieAndClassicRangeQuery(8); + } + + @Test + public void testRandomTrieAndClassicRangeQuery_6bit() throws Exception { + testRandomTrieAndClassicRangeQuery(6); + } + + @Test + public void testRandomTrieAndClassicRangeQuery_4bit() throws Exception { + testRandomTrieAndClassicRangeQuery(4); + } + + @Test + public void testRandomTrieAndClassicRangeQuery_2bit() throws Exception { + testRandomTrieAndClassicRangeQuery(2); + } + + @Test + public void testRandomTrieAndClassicRangeQuery_NoTrie() throws Exception { + testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE); + } + + private void testRangeSplit(int precisionStep) throws Exception { + String field="ascfield"+precisionStep; + // 10 random tests + int num = atLeast(10); + for (int i = 0; i < num; i++) { + long lower=(long)(random.nextDouble()*noDocs - noDocs/2); + long upper=(long)(random.nextDouble()*noDocs - noDocs/2); + if (lower>upper) { + long a=lower; lower=upper; upper=a; + } + // test inclusive range + Query tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + // test exclusive range + tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits ); + // test left exclusive range + tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + // test right exclusive range + tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false); + tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits ); + } + } + + @Test + public void testRangeSplit_8bit() throws Exception { + testRangeSplit(8); + } + + @Test + public void testRangeSplit_6bit() throws Exception { + testRangeSplit(6); + } + + @Test + public void testRangeSplit_4bit() throws Exception { + testRangeSplit(4); + } + + @Test + public void testRangeSplit_2bit() throws Exception { + testRangeSplit(2); + } + + /** we fake a double test using long2double conversion of NumericUtils */ + private void testDoubleRange(int precisionStep) throws Exception { + final String field="ascfield"+precisionStep; + final long lower=-1000L, upper=+2000L; + + Query tq=NumericRangeQuery.newDoubleRange(field, precisionStep, + NumericUtils.sortableLongToDouble(lower), NumericUtils.sortableLongToDouble(upper), true, true); + TopDocs tTopDocs = searcher.search(tq, 1); + assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + + Filter tf=NumericRangeFilter.newDoubleRange(field, precisionStep, + NumericUtils.sortableLongToDouble(lower), NumericUtils.sortableLongToDouble(upper), true, true); + tTopDocs = searcher.search(new MatchAllDocsQuery(), tf, 1); + assertEquals("Returned count of range filter must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits ); + } + + @Test + public void testDoubleRange_8bit() throws Exception { + testDoubleRange(8); + } + + @Test + public void testDoubleRange_6bit() throws Exception { + testDoubleRange(6); + } + + @Test + public void testDoubleRange_4bit() throws Exception { + testDoubleRange(4); + } + + @Test + public void testDoubleRange_2bit() throws Exception { + testDoubleRange(2); + } + + private void testSorting(int precisionStep) throws Exception { + String field="field"+precisionStep; + // 10 random tests, the index order is ascending, + // so using a reverse sort field should retun descending documents + int num = atLeast(10); + for (int i = 0; i < num; i++) { + long lower=(long)(random.nextDouble()*noDocs*distance)+startOffset; + long upper=(long)(random.nextDouble()*noDocs*distance)+startOffset; + if (lower>upper) { + long a=lower; lower=upper; upper=a; + } + Query tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); + TopDocs topDocs = searcher.search(tq, null, noDocs, new Sort(new SortField(field, SortField.LONG, true))); + if (topDocs.totalHits==0) continue; + ScoreDoc[] sd = topDocs.scoreDocs; + assertNotNull(sd); + long last=Long.parseLong(searcher.doc(sd[0].doc).get(field)); + for (int j=1; jact ); + last=act; + } + } + } + + @Test + public void testSorting_8bit() throws Exception { + testSorting(8); + } + + @Test + public void testSorting_6bit() throws Exception { + testSorting(6); + } + + @Test + public void testSorting_4bit() throws Exception { + testSorting(4); + } + + @Test + public void testSorting_2bit() throws Exception { + testSorting(2); + } + + @Test + public void testEqualsAndHash() throws Exception { + QueryUtils.checkHashEquals(NumericRangeQuery.newLongRange("test1", 4, 10L, 20L, true, true)); + QueryUtils.checkHashEquals(NumericRangeQuery.newLongRange("test2", 4, 10L, 20L, false, true)); + QueryUtils.checkHashEquals(NumericRangeQuery.newLongRange("test3", 4, 10L, 20L, true, false)); + QueryUtils.checkHashEquals(NumericRangeQuery.newLongRange("test4", 4, 10L, 20L, false, false)); + QueryUtils.checkHashEquals(NumericRangeQuery.newLongRange("test5", 4, 10L, null, true, true)); + QueryUtils.checkHashEquals(NumericRangeQuery.newLongRange("test6", 4, null, 20L, true, true)); + QueryUtils.checkHashEquals(NumericRangeQuery.newLongRange("test7", 4, null, null, true, true)); + QueryUtils.checkEqual( + NumericRangeQuery.newLongRange("test8", 4, 10L, 20L, true, true), + NumericRangeQuery.newLongRange("test8", 4, 10L, 20L, true, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newLongRange("test9", 4, 10L, 20L, true, true), + NumericRangeQuery.newLongRange("test9", 8, 10L, 20L, true, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newLongRange("test10a", 4, 10L, 20L, true, true), + NumericRangeQuery.newLongRange("test10b", 4, 10L, 20L, true, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newLongRange("test11", 4, 10L, 20L, true, true), + NumericRangeQuery.newLongRange("test11", 4, 20L, 10L, true, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newLongRange("test12", 4, 10L, 20L, true, true), + NumericRangeQuery.newLongRange("test12", 4, 10L, 20L, false, true) + ); + QueryUtils.checkUnequal( + NumericRangeQuery.newLongRange("test13", 4, 10L, 20L, true, true), + NumericRangeQuery.newFloatRange("test13", 4, 10f, 20f, true, true) + ); + // difference to int range is tested in TestNumericRangeQuery32 + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestParallelMultiSearcher.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestParallelMultiSearcher.java new file mode 100644 index 0000000..44e35ca --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestParallelMultiSearcher.java @@ -0,0 +1,51 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.util._TestUtil; + +/** + * Unit tests for the ParallelMultiSearcher + */ +public class TestParallelMultiSearcher extends TestMultiSearcher { + List pools = new ArrayList(); + + @Override + public void tearDown() throws Exception { + for (ExecutorService exec : pools) + exec.awaitTermination(1000, TimeUnit.MILLISECONDS); + pools.clear(); + super.tearDown(); + } + + @Override + protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) + throws IOException { + ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random, 2, 8)); + pools.add(exec); + return new ParallelMultiSearcher(exec, searchers); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java new file mode 100644 index 0000000..7b0da44 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java @@ -0,0 +1,101 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.store.Directory; + +import java.io.IOException; +import java.util.LinkedList; + +/** + * This class tests PhrasePrefixQuery class. + */ +public class TestPhrasePrefixQuery extends LuceneTestCase { + + /** + * + */ + public void testPhrasePrefix() throws IOException { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + Document doc1 = new Document(); + Document doc2 = new Document(); + Document doc3 = new Document(); + Document doc4 = new Document(); + Document doc5 = new Document(); + doc1.add(newField("body", "blueberry pie", Field.Store.YES, + Field.Index.ANALYZED)); + doc2.add(newField("body", "blueberry strudel", Field.Store.YES, + Field.Index.ANALYZED)); + doc3.add(newField("body", "blueberry pizza", Field.Store.YES, + Field.Index.ANALYZED)); + doc4.add(newField("body", "blueberry chewing gum", Field.Store.YES, + Field.Index.ANALYZED)); + doc5.add(newField("body", "piccadilly circus", Field.Store.YES, + Field.Index.ANALYZED)); + writer.addDocument(doc1); + writer.addDocument(doc2); + writer.addDocument(doc3); + writer.addDocument(doc4); + writer.addDocument(doc5); + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(reader); + + // PhrasePrefixQuery query1 = new PhrasePrefixQuery(); + MultiPhraseQuery query1 = new MultiPhraseQuery(); + // PhrasePrefixQuery query2 = new PhrasePrefixQuery(); + MultiPhraseQuery query2 = new MultiPhraseQuery(); + query1.add(new Term("body", "blueberry")); + query2.add(new Term("body", "strawberry")); + + LinkedList termsWithPrefix = new LinkedList(); + + // this TermEnum gives "piccadilly", "pie" and "pizza". + String prefix = "pi"; + TermEnum te = reader.terms(new Term("body", prefix + "*")); + do { + if (te.term().text().startsWith(prefix)) + { + termsWithPrefix.add(te.term()); + } + } while (te.next()); + + query1.add(termsWithPrefix.toArray(new Term[0])); + query2.add(termsWithPrefix.toArray(new Term[0])); + + ScoreDoc[] result; + result = searcher.search(query1, null, 1000).scoreDocs; + assertEquals(2, result.length); + + result = searcher.search(query2, null, 1000).scoreDocs; + assertEquals(0, result.length); + searcher.close(); + reader.close(); + indexStore.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPhraseQuery.java new file mode 100644 index 0000000..0d07f20 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPhraseQuery.java @@ -0,0 +1,697 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.document.*; +import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.store.*; +import org.apache.lucene.util.Version; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.List; +import java.util.ArrayList; +import java.util.Random; + +/** + * Tests {@link PhraseQuery}. + * + * @see TestPositionIncrement + */ +public class TestPhraseQuery extends LuceneTestCase { + + /** threshold for comparing floats */ + public static final float SCORE_COMP_THRESH = 1e-6f; + + private static IndexSearcher searcher; + private static IndexReader reader; + private PhraseQuery query; + private static Directory directory; + + @BeforeClass + public static void beforeClass() throws Exception { + directory = newDirectory(); + Analyzer analyzer = new Analyzer() { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + } + + @Override + public int getPositionIncrementGap(String fieldName) { + return 100; + } + }; + RandomIndexWriter writer = new RandomIndexWriter(random, directory, analyzer); + + Document doc = new Document(); + doc.add(newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED)); + Fieldable repeatedField = newField("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED); + doc.add(repeatedField); + doc.add(newField("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + reader = writer.getReader(); + writer.close(); + + searcher = newSearcher(reader); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + query = new PhraseQuery(); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher.close(); + searcher = null; + reader.close(); + reader = null; + directory.close(); + directory = null; + } + + public void testNotCloseEnough() throws Exception { + query.setSlop(2); + query.add(new Term("field", "one")); + query.add(new Term("field", "five")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + QueryUtils.check(random, query,searcher); + } + + public void testBarelyCloseEnough() throws Exception { + query.setSlop(3); + query.add(new Term("field", "one")); + query.add(new Term("field", "five")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + QueryUtils.check(random, query,searcher); + } + + /** + * Ensures slop of 0 works for exact matches, but not reversed + */ + public void testExact() throws Exception { + // slop is zero by default + query.add(new Term("field", "four")); + query.add(new Term("field", "five")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("exact match", 1, hits.length); + QueryUtils.check(random, query,searcher); + + + query = new PhraseQuery(); + query.add(new Term("field", "two")); + query.add(new Term("field", "one")); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("reverse not exact", 0, hits.length); + QueryUtils.check(random, query,searcher); + } + + public void testSlop1() throws Exception { + // Ensures slop of 1 works with terms in order. + query.setSlop(1); + query.add(new Term("field", "one")); + query.add(new Term("field", "two")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("in order", 1, hits.length); + QueryUtils.check(random, query,searcher); + + + // Ensures slop of 1 does not work for phrases out of order; + // must be at least 2. + query = new PhraseQuery(); + query.setSlop(1); + query.add(new Term("field", "two")); + query.add(new Term("field", "one")); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("reversed, slop not 2 or more", 0, hits.length); + QueryUtils.check(random, query,searcher); + } + + /** + * As long as slop is at least 2, terms can be reversed + */ + public void testOrderDoesntMatter() throws Exception { + query.setSlop(2); // must be at least two for reverse order match + query.add(new Term("field", "two")); + query.add(new Term("field", "one")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("just sloppy enough", 1, hits.length); + QueryUtils.check(random, query,searcher); + + + query = new PhraseQuery(); + query.setSlop(2); + query.add(new Term("field", "three")); + query.add(new Term("field", "one")); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("not sloppy enough", 0, hits.length); + QueryUtils.check(random, query,searcher); + + } + + /** + * slop is the total number of positional moves allowed + * to line up a phrase + */ + public void testMulipleTerms() throws Exception { + query.setSlop(2); + query.add(new Term("field", "one")); + query.add(new Term("field", "three")); + query.add(new Term("field", "five")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("two total moves", 1, hits.length); + QueryUtils.check(random, query,searcher); + + + query = new PhraseQuery(); + query.setSlop(5); // it takes six moves to match this phrase + query.add(new Term("field", "five")); + query.add(new Term("field", "three")); + query.add(new Term("field", "one")); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("slop of 5 not close enough", 0, hits.length); + QueryUtils.check(random, query,searcher); + + + query.setSlop(6); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("slop of 6 just right", 1, hits.length); + QueryUtils.check(random, query,searcher); + + } + + public void testPhraseQueryWithStopAnalyzer() throws Exception { + Directory directory = newDirectory(); + StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig( Version.LUCENE_24, stopAnalyzer)); + Document doc = new Document(); + doc.add(newField("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(reader); + + // valid exact phrase query + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field","stop")); + query.add(new Term("field","words")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + QueryUtils.check(random, query,searcher); + + + // StopAnalyzer as of 2.4 does not leave "holes", so this matches. + query = new PhraseQuery(); + query.add(new Term("field", "words")); + query.add(new Term("field", "here")); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + QueryUtils.check(random, query,searcher); + + + searcher.close(); + reader.close(); + directory.close(); + } + + public void testPhraseQueryInConjunctionScorer() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + + Document doc = new Document(); + doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(newField("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(reader); + + PhraseQuery phraseQuery = new PhraseQuery(); + phraseQuery.add(new Term("source", "marketing")); + phraseQuery.add(new Term("source", "info")); + ScoreDoc[] hits = searcher.search(phraseQuery, null, 1000).scoreDocs; + assertEquals(2, hits.length); + QueryUtils.check(random, phraseQuery,searcher); + + + TermQuery termQuery = new TermQuery(new Term("contents","foobar")); + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(termQuery, BooleanClause.Occur.MUST); + booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST); + hits = searcher.search(booleanQuery, null, 1000).scoreDocs; + assertEquals(1, hits.length); + QueryUtils.check(random, termQuery,searcher); + + + searcher.close(); + reader.close(); + + writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + doc = new Document(); + doc.add(newField("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(newField("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + doc = new Document(); + doc.add(newField("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + reader = writer.getReader(); + writer.close(); + + searcher = newSearcher(reader); + + termQuery = new TermQuery(new Term("contents","woo")); + phraseQuery = new PhraseQuery(); + phraseQuery.add(new Term("contents","map")); + phraseQuery.add(new Term("contents","entry")); + + hits = searcher.search(termQuery, null, 1000).scoreDocs; + assertEquals(3, hits.length); + hits = searcher.search(phraseQuery, null, 1000).scoreDocs; + assertEquals(2, hits.length); + + + booleanQuery = new BooleanQuery(); + booleanQuery.add(termQuery, BooleanClause.Occur.MUST); + booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST); + hits = searcher.search(booleanQuery, null, 1000).scoreDocs; + assertEquals(2, hits.length); + + booleanQuery = new BooleanQuery(); + booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST); + booleanQuery.add(termQuery, BooleanClause.Occur.MUST); + hits = searcher.search(booleanQuery, null, 1000).scoreDocs; + assertEquals(2, hits.length); + QueryUtils.check(random, booleanQuery,searcher); + + + searcher.close(); + reader.close(); + directory.close(); + } + + public void testSlopScoring() throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + + Document doc = new Document(); + doc.add(newField("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + Document doc2 = new Document(); + doc2.add(newField("field", "foo firstname zzz lastname foo", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc2); + + Document doc3 = new Document(); + doc3.add(newField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc3); + + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(reader); + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "firstname")); + query.add(new Term("field", "lastname")); + query.setSlop(Integer.MAX_VALUE); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(3, hits.length); + // Make sure that those matches where the terms appear closer to + // each other get a higher score: + assertEquals(0.71, hits[0].score, 0.01); + assertEquals(0, hits[0].doc); + assertEquals(0.44, hits[1].score, 0.01); + assertEquals(1, hits[1].doc); + assertEquals(0.31, hits[2].score, 0.01); + assertEquals(2, hits[2].doc); + QueryUtils.check(random, query,searcher); + searcher.close(); + reader.close(); + directory.close(); + } + + public void testToString() throws Exception { + StopAnalyzer analyzer = new StopAnalyzer(TEST_VERSION_CURRENT); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer); + qp.setEnablePositionIncrements(true); + PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\""); + assertEquals("field:\"? hi ? ? ? test\"", q.toString()); + q.add(new Term("field", "hello"), 1); + assertEquals("field:\"? hi|hello ? ? ? test\"", q.toString()); + } + + public void testWrappedPhrase() throws IOException { + query.add(new Term("repeated", "first")); + query.add(new Term("repeated", "part")); + query.add(new Term("repeated", "second")); + query.add(new Term("repeated", "part")); + query.setSlop(100); + + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("slop of 100 just right", 1, hits.length); + QueryUtils.check(random, query,searcher); + + query.setSlop(99); + + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("slop of 99 not enough", 0, hits.length); + QueryUtils.check(random, query,searcher); + } + + // work on two docs like this: "phrase exist notexist exist found" + public void testNonExistingPhrase() throws IOException { + // phrase without repetitions that exists in 2 docs + query.add(new Term("nonexist", "phrase")); + query.add(new Term("nonexist", "notexist")); + query.add(new Term("nonexist", "found")); + query.setSlop(2); // would be found this way + + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("phrase without repetitions exists in 2 docs", 2, hits.length); + QueryUtils.check(random, query,searcher); + + // phrase with repetitions that exists in 2 docs + query = new PhraseQuery(); + query.add(new Term("nonexist", "phrase")); + query.add(new Term("nonexist", "exist")); + query.add(new Term("nonexist", "exist")); + query.setSlop(1); // would be found + + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("phrase with repetitions exists in two docs", 2, hits.length); + QueryUtils.check(random, query,searcher); + + // phrase I with repetitions that does not exist in any doc + query = new PhraseQuery(); + query.add(new Term("nonexist", "phrase")); + query.add(new Term("nonexist", "notexist")); + query.add(new Term("nonexist", "phrase")); + query.setSlop(1000); // would not be found no matter how high the slop is + + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length); + QueryUtils.check(random, query,searcher); + + // phrase II with repetitions that does not exist in any doc + query = new PhraseQuery(); + query.add(new Term("nonexist", "phrase")); + query.add(new Term("nonexist", "exist")); + query.add(new Term("nonexist", "exist")); + query.add(new Term("nonexist", "exist")); + query.setSlop(1000); // would not be found no matter how high the slop is + + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("nonexisting phrase with repetitions does not exist in any doc", 0, hits.length); + QueryUtils.check(random, query,searcher); + + } + + /** + * Working on a 2 fields like this: + * Field("field", "one two three four five") + * Field("palindrome", "one two three two one") + * Phrase of size 2 occuriong twice, once in order and once in reverse, + * because doc is a palyndrome, is counted twice. + * Also, in this case order in query does not matter. + * Also, when an exact match is found, both sloppy scorer and exact scorer scores the same. + */ + public void testPalyndrome2() throws Exception { + + // search on non palyndrome, find phrase with no slop, using exact phrase scorer + query.setSlop(0); // to use exact phrase scorer + query.add(new Term("field", "two")); + query.add(new Term("field", "three")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("phrase found with exact phrase scorer", 1, hits.length); + float score0 = hits[0].score; + //System.out.println("(exact) field: two three: "+score0); + QueryUtils.check(random, query,searcher); + + // search on non palyndrome, find phrase with slop 2, though no slop required here. + query.setSlop(2); // to use sloppy scorer + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("just sloppy enough", 1, hits.length); + float score1 = hits[0].score; + //System.out.println("(sloppy) field: two three: "+score1); + assertEquals("exact scorer and sloppy scorer score the same when slop does not matter",score0, score1, SCORE_COMP_THRESH); + QueryUtils.check(random, query,searcher); + + // search ordered in palyndrome, find it twice + query = new PhraseQuery(); + query.setSlop(2); // must be at least two for both ordered and reversed to match + query.add(new Term("palindrome", "two")); + query.add(new Term("palindrome", "three")); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("just sloppy enough", 1, hits.length); + //float score2 = hits[0].score; + //System.out.println("palindrome: two three: "+score2); + QueryUtils.check(random, query,searcher); + + //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). + //assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH> docs = new ArrayList>(); + Document d = new Document(); + Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED); + d.add(f); + + Random r = random; + + int NUM_DOCS = atLeast(10); + for (int i = 0; i < NUM_DOCS; i++) { + // must be > 4096 so it spans multiple chunks + int termCount = _TestUtil.nextInt(random, 4097, 8200); + + List doc = new ArrayList(); + + StringBuilder sb = new StringBuilder(); + while(doc.size() < termCount) { + if (r.nextInt(5) == 1 || docs.size() == 0) { + // make new non-empty-string term + String term; + while(true) { + term = _TestUtil.randomUnicodeString(r); + if (term.length() > 0) { + break; + } + } + TokenStream ts = analyzer.reusableTokenStream("ignore", new StringReader(term)); + CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + while(ts.incrementToken()) { + String text = termAttr.toString(); + doc.add(text); + sb.append(text).append(' '); + } + ts.end(); + ts.close(); + } else { + // pick existing sub-phrase + List lastDoc = docs.get(r.nextInt(docs.size())); + int len = _TestUtil.nextInt(r, 1, 10); + int start = r.nextInt(lastDoc.size()-len); + for(int k=start;k doc = docs.get(docID); + + final int numTerm = _TestUtil.nextInt(r, 2, 20); + final int start = r.nextInt(doc.size()-numTerm); + PhraseQuery pq = new PhraseQuery(); + StringBuilder sb = new StringBuilder(); + for(int t=start;t payloads = pspans.getPayload(); + sawZero |= pspans.start() == 0; + count += payloads.size(); + } + assertEquals(5, count); + assertTrue(sawZero); + + //System.out.println("\ngetSpans test"); + Spans spans = snq.getSpans(is.getIndexReader()); + count = 0; + sawZero = false; + while (spans.next()) { + count++; + sawZero |= spans.start() == 0; + //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end()); + } + assertEquals(4, count); + assertTrue(sawZero); + + //System.out.println("\nPayloadSpanUtil test"); + + sawZero = false; + PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader()); + Collection pls = psu.getPayloadsForQuery(snq); + count = pls.size(); + for (byte[] bytes : pls) { + String s = new String(bytes); + //System.out.println(s); + sawZero |= s.equals("pos: 0"); + } + assertEquals(5, count); + assertTrue(sawZero); + writer.close(); + is.getIndexReader().close(); + dir.close(); + } +} + +final class TestPayloadAnalyzer extends Analyzer { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader); + return new PayloadFilter(result, fieldName); + } +} + +final class PayloadFilter extends TokenFilter { + String fieldName; + + int pos; + + int i; + + final PositionIncrementAttribute posIncrAttr; + final PayloadAttribute payloadAttr; + final CharTermAttribute termAttr; + + public PayloadFilter(TokenStream input, String fieldName) { + super(input); + this.fieldName = fieldName; + pos = 0; + i = 0; + posIncrAttr = input.addAttribute(PositionIncrementAttribute.class); + payloadAttr = input.addAttribute(PayloadAttribute.class); + termAttr = input.addAttribute(CharTermAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes())); + int posIncr; + if (i % 2 == 1) { + posIncr = 1; + } else { + posIncr = 0; + } + posIncrAttr.setPositionIncrement(posIncr); + pos += posIncr; + if (TestPositionIncrement.VERBOSE) { + System.out.println("term=" + termAttr + " pos=" + pos); + } + i++; + return true; + } else { + return false; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java new file mode 100644 index 0000000..4ef962c --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java @@ -0,0 +1,97 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestPositiveScoresOnlyCollector extends LuceneTestCase { + + private static final class SimpleScorer extends Scorer { + private int idx = -1; + + public SimpleScorer(Weight weight) { + super(weight); + } + + @Override public float score() throws IOException { + return idx == scores.length ? Float.NaN : scores[idx]; + } + + @Override public int docID() { return idx; } + + @Override public int nextDoc() throws IOException { + return ++idx != scores.length ? idx : NO_MORE_DOCS; + } + + @Override public int advance(int target) throws IOException { + idx = target; + return idx < scores.length ? idx : NO_MORE_DOCS; + } + } + + // The scores must have positive as well as negative values + private static final float[] scores = new float[] { 0.7767749f, -1.7839992f, + 8.9925785f, 7.9608946f, -0.07948637f, 2.6356435f, 7.4950366f, 7.1490803f, + -8.108544f, 4.961808f, 2.2423935f, -7.285586f, 4.6699767f }; + + public void testNegativeScores() throws Exception { + + // The Top*Collectors previously filtered out documents with <= scores. This + // behavior has changed. This test checks that if PositiveOnlyScoresFilter + // wraps one of these collectors, documents with <= 0 scores are indeed + // filtered. + + int numPositiveScores = 0; + for (int i = 0; i < scores.length; i++) { + if (scores[i] > 0) { + ++numPositiveScores; + } + } + + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + writer.commit(); + IndexReader ir = writer.getReader(); + writer.close(); + IndexSearcher searcher = newSearcher(ir); + Weight fake = new TermQuery(new Term("fake", "weight")).createWeight(searcher); + Scorer s = new SimpleScorer(fake); + TopDocsCollector tdc = TopScoreDocCollector.create(scores.length, true); + Collector c = new PositiveScoresOnlyCollector(tdc); + c.setScorer(s); + while (s.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + c.collect(0); + } + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + assertEquals(numPositiveScores, td.totalHits); + for (int i = 0; i < sd.length; i++) { + assertTrue("only positive scores should return: " + sd[i].score, sd[i].score > 0); + } + searcher.close(); + ir.close(); + directory.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixFilter.java new file mode 100644 index 0000000..65a197f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixFilter.java @@ -0,0 +1,109 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +/** + * Tests {@link PrefixFilter} class. + * + */ +public class TestPrefixFilter extends LuceneTestCase { + public void testPrefixFilter() throws Exception { + Directory directory = newDirectory(); + + String[] categories = new String[] {"/Computers/Linux", + "/Computers/Mac/One", + "/Computers/Mac/Two", + "/Computers/Windows"}; + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + for (int i = 0; i < categories.length; i++) { + Document doc = new Document(); + doc.add(newField("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + IndexReader reader = writer.getReader(); + + // PrefixFilter combined with ConstantScoreQuery + PrefixFilter filter = new PrefixFilter(new Term("category", "/Computers")); + Query query = new ConstantScoreQuery(filter); + IndexSearcher searcher = newSearcher(reader); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(4, hits.length); + + // test middle of values + filter = new PrefixFilter(new Term("category", "/Computers/Mac")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(2, hits.length); + + // test start of values + filter = new PrefixFilter(new Term("category", "/Computers/Linux")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + // test end of values + filter = new PrefixFilter(new Term("category", "/Computers/Windows")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + // test non-existant + filter = new PrefixFilter(new Term("category", "/Computers/ObsoleteOS")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // test non-existant, before values + filter = new PrefixFilter(new Term("category", "/Computers/AAA")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // test non-existant, after values + filter = new PrefixFilter(new Term("category", "/Computers/ZZZ")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + // test zero length prefix + filter = new PrefixFilter(new Term("category", "")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(4, hits.length); + + // test non existent field + filter = new PrefixFilter(new Term("nonexistantfield", "/Computers")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(0, hits.length); + + writer.close(); + searcher.close(); + reader.close(); + directory.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java new file mode 100644 index 0000000..619b676 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java @@ -0,0 +1,117 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * https://issues.apache.org/jira/browse/LUCENE-1974 + * + * represent the bug of + * + * BooleanScorer.score(Collector collector, int max, int firstDocID) + * + * Line 273, end=8192, subScorerDocID=11378, then more got false? + * + */ +public class TestPrefixInBooleanQuery extends LuceneTestCase { + + private static final String FIELD = "name"; + private static Directory directory; + private static IndexReader reader; + private static IndexSearcher searcher; + + @BeforeClass + public static void beforeClass() throws Exception { + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + + Document doc = new Document(); + Field field = newField(FIELD, "meaninglessnames", Field.Store.NO, + Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(field); + + for (int i = 0; i < 5137; ++i) { + writer.addDocument(doc); + } + + field.setValue("tangfulin"); + writer.addDocument(doc); + + field.setValue("meaninglessnames"); + for (int i = 5138; i < 11377; ++i) { + writer.addDocument(doc); + } + + field.setValue("tangfulin"); + writer.addDocument(doc); + + reader = writer.getReader(); + searcher = newSearcher(reader); + writer.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher.close(); + searcher = null; + reader.close(); + reader = null; + directory.close(); + directory = null; + } + + public void testPrefixQuery() throws Exception { + Query query = new PrefixQuery(new Term(FIELD, "tang")); + assertEquals("Number of matched documents", 2, + searcher.search(query, null, 1000).totalHits); + } + public void testTermQuery() throws Exception { + Query query = new TermQuery(new Term(FIELD, "tangfulin")); + assertEquals("Number of matched documents", 2, + searcher.search(query, null, 1000).totalHits); + } + public void testTermBooleanQuery() throws Exception { + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term(FIELD, "tangfulin")), + BooleanClause.Occur.SHOULD); + query.add(new TermQuery(new Term(FIELD, "notexistnames")), + BooleanClause.Occur.SHOULD); + assertEquals("Number of matched documents", 2, + searcher.search(query, null, 1000).totalHits); + + } + public void testPrefixBooleanQuery() throws Exception { + BooleanQuery query = new BooleanQuery(); + query.add(new PrefixQuery(new Term(FIELD, "tang")), + BooleanClause.Occur.SHOULD); + query.add(new TermQuery(new Term(FIELD, "notexistnames")), + BooleanClause.Occur.SHOULD); + assertEquals("Number of matched documents", 2, + searcher.search(query, null, 1000).totalHits); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixQuery.java new file mode 100644 index 0000000..6533815 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixQuery.java @@ -0,0 +1,60 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +/** + * Tests {@link PrefixQuery} class. + * + */ +public class TestPrefixQuery extends LuceneTestCase { + public void testPrefixQuery() throws Exception { + Directory directory = newDirectory(); + + String[] categories = new String[] {"/Computers", + "/Computers/Mac", + "/Computers/Windows"}; + RandomIndexWriter writer = new RandomIndexWriter(random, directory); + for (int i = 0; i < categories.length; i++) { + Document doc = new Document(); + doc.add(newField("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + IndexReader reader = writer.getReader(); + + PrefixQuery query = new PrefixQuery(new Term("category", "/Computers")); + IndexSearcher searcher = newSearcher(reader); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("All documents in /Computers category and below", 3, hits.length); + + query = new PrefixQuery(new Term("category", "/Computers/Mac")); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("One in /Computers/Mac", 1, hits.length); + writer.close(); + searcher.close(); + reader.close(); + directory.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixRandom.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixRandom.java new file mode 100644 index 0000000..8d705d5 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestPrefixRandom.java @@ -0,0 +1,147 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Create an index with random unicode terms + * Generates random prefix queries, and validates against a simple impl. + */ +public class TestPrefixRandom extends LuceneTestCase { + private IndexSearcher searcher; + private IndexReader reader; + private Directory dir; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)) + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); + + Document doc = new Document(); + Field bogus1 = newField("bogus", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + Field field = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED); + Field bogus2 = newField("zbogus", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(field); + doc.add(bogus1); + doc.add(bogus2); + + int num = atLeast(2000); + + for (int i = 0; i < num; i++) { + field.setValue(_TestUtil.randomUnicodeString(random, 10)); + bogus1.setValue(_TestUtil.randomUnicodeString(random, 10)); + bogus2.setValue(_TestUtil.randomUnicodeString(random, 10)); + writer.addDocument(doc); + } + reader = writer.getReader(); + searcher = newSearcher(reader); + writer.close(); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + searcher.close(); + dir.close(); + super.tearDown(); + } + + /** a stupid prefix query that just blasts thru the terms */ + private class DumbPrefixQuery extends MultiTermQuery { + private final Term prefix; + + DumbPrefixQuery(Term term) { + super(); + prefix = term; + } + + @Override + protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + return new SimplePrefixTermEnum(reader, prefix); + } + + private class SimplePrefixTermEnum extends FilteredTermEnum { + private final Term prefix; + private boolean endEnum; + + private SimplePrefixTermEnum(IndexReader reader, Term prefix) throws IOException { + this.prefix = prefix; + setEnum(reader.terms(new Term(prefix.field(), ""))); + } + + @Override + protected boolean termCompare(Term term) { + if (term.field() == prefix.field()) { + return term.text().startsWith(prefix.text()); + } else { + endEnum = true; + return false; + } + } + + @Override + public float difference() { + return 1.0F; + } + + @Override + protected boolean endEnum() { + return endEnum; + } + } + + @Override + public String toString(String field) { + return field.toString() + ":" + prefix.toString(); + } + } + + /** test a bunch of random prefixes */ + public void testPrefixes() throws Exception { + int num = atLeast(1000); + for (int i = 0; i < num; i++) + assertSame(_TestUtil.randomUnicodeString(random, 5)); + } + + /** check that the # of hits is the same as from a very + * simple prefixquery implementation. + */ + private void assertSame(String prefix) throws IOException { + PrefixQuery smart = new PrefixQuery(new Term("field", prefix)); + DumbPrefixQuery dumb = new DumbPrefixQuery(new Term("field", prefix)); + + TopDocs smartDocs = searcher.search(smart, 25); + TopDocs dumbDocs = searcher.search(dumb, 25); + CheckHits.checkEqual(smart, smartDocs.scoreDocs, dumbDocs.scoreDocs); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestQueryTermVector.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestQueryTermVector.java new file mode 100644 index 0000000..0c0cf85 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestQueryTermVector.java @@ -0,0 +1,53 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; + +public class TestQueryTermVector extends LuceneTestCase { + + public void testConstructor() { + String [] queryTerm = {"foo", "bar", "foo", "again", "foo", "bar", "go", "go", "go"}; + //Items are sorted lexicographically + String [] gold = {"again", "bar", "foo", "go"}; + int [] goldFreqs = {1, 2, 3, 3}; + QueryTermVector result = new QueryTermVector(queryTerm); + String [] terms = result.getTerms(); + assertTrue(terms.length == 4); + int [] freq = result.getTermFrequencies(); + assertTrue(freq.length == 4); + checkGold(terms, gold, freq, goldFreqs); + result = new QueryTermVector(null); + assertTrue(result.getTerms().length == 0); + + result = new QueryTermVector("foo bar foo again foo bar go go go", new MockAnalyzer(random)); + terms = result.getTerms(); + assertTrue(terms.length == 4); + freq = result.getTermFrequencies(); + assertTrue(freq.length == 4); + checkGold(terms, gold, freq, goldFreqs); + } + + private void checkGold(String[] terms, String[] gold, int[] freq, int[] goldFreqs) { + for (int i = 0; i < terms.length; i++) { + assertTrue(terms[i].equals(gold[i])); + assertTrue(freq[i] == goldFreqs[i]); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java new file mode 100644 index 0000000..37d522c --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java @@ -0,0 +1,84 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestQueryWrapperFilter extends LuceneTestCase { + + public void testBasic() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + Document doc = new Document(); + doc.add(newField("field", "value", Store.NO, Index.ANALYZED)); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + writer.close(); + + TermQuery termQuery = new TermQuery(new Term("field", "value")); + + // should not throw exception with primitive query + QueryWrapperFilter qwf = new QueryWrapperFilter(termQuery); + + IndexSearcher searcher = newSearcher(reader); + TopDocs hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); + assertEquals(1, hits.totalHits); + hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); + assertEquals(1, hits.totalHits); + + // should not throw exception with complex primitive query + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(termQuery, Occur.MUST); + booleanQuery.add(new TermQuery(new Term("field", "missing")), + Occur.MUST_NOT); + qwf = new QueryWrapperFilter(termQuery); + + hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); + assertEquals(1, hits.totalHits); + hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); + assertEquals(1, hits.totalHits); + + // should not throw exception with non primitive Query (doesn't implement + // Query#createWeight) + qwf = new QueryWrapperFilter(new FuzzyQuery(new Term("field", "valu"))); + + hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); + assertEquals(1, hits.totalHits); + hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); + assertEquals(1, hits.totalHits); + + // test a query with no hits + termQuery = new TermQuery(new Term("field", "not_exist")); + qwf = new QueryWrapperFilter(termQuery); + hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); + assertEquals(0, hits.totalHits); + hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); + assertEquals(0, hits.totalHits); + searcher.close(); + reader.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java new file mode 100644 index 0000000..315b067 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java @@ -0,0 +1,115 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.LuceneTestCase; + +public class TestScoreCachingWrappingScorer extends LuceneTestCase { + + private static final class SimpleScorer extends Scorer { + private int idx = 0; + private int doc = -1; + + public SimpleScorer() { + super((Similarity)null); + } + + @Override public float score() throws IOException { + // advance idx on purpose, so that consecutive calls to score will get + // different results. This is to emulate computation of a score. If + // ScoreCachingWrappingScorer is used, this should not be called more than + // once per document. + return idx == scores.length ? Float.NaN : scores[idx++]; + } + + @Override public int docID() { return doc; } + + @Override public int nextDoc() throws IOException { + return ++doc < scores.length ? doc : NO_MORE_DOCS; + } + + @Override public int advance(int target) throws IOException { + doc = target; + return doc < scores.length ? doc : NO_MORE_DOCS; + } + + } + + private static final class ScoreCachingCollector extends Collector { + + private int idx = 0; + private Scorer scorer; + float[] mscores; + + public ScoreCachingCollector(int numToCollect) { + mscores = new float[numToCollect]; + } + + @Override public void collect(int doc) throws IOException { + // just a sanity check to avoid IOOB. + if (idx == mscores.length) { + return; + } + + // just call score() a couple of times and record the score. + mscores[idx] = scorer.score(); + mscores[idx] = scorer.score(); + mscores[idx] = scorer.score(); + ++idx; + } + + @Override public void setNextReader(IndexReader reader, int docBase) + throws IOException { + } + + @Override public void setScorer(Scorer scorer) throws IOException { + this.scorer = new ScoreCachingWrappingScorer(scorer); + } + + @Override public boolean acceptsDocsOutOfOrder() { + return true; + } + + } + + private static final float[] scores = new float[] { 0.7767749f, 1.7839992f, + 8.9925785f, 7.9608946f, 0.07948637f, 2.6356435f, 7.4950366f, 7.1490803f, + 8.108544f, 4.961808f, 2.2423935f, 7.285586f, 4.6699767f }; + + public void testGetScores() throws Exception { + + Scorer s = new SimpleScorer(); + ScoreCachingCollector scc = new ScoreCachingCollector(scores.length); + scc.setScorer(s); + + // We need to iterate on the scorer so that its doc() advances. + int doc; + while ((doc = s.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + scc.collect(doc); + } + + for (int i = 0; i < scores.length; i++) { + assertEquals(scores[i], scc.mscores[i], 0f); + } + + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestScorerPerf.java new file mode 100755 index 0000000..9f71a23 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -0,0 +1,411 @@ +package org.apache.lucene.search; + +import org.apache.lucene.util.DocIdBitSet; +import org.apache.lucene.util.LuceneTestCase; + +import java.util.BitSet; +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestScorerPerf extends LuceneTestCase { + boolean validate = true; // set to false when doing performance testing + + BitSet[] sets; + Term[] terms; + IndexSearcher s; + Directory d; + + public void createDummySearcher() throws Exception { + // Create a dummy index with nothing in it. + // This could possibly fail if Lucene starts checking for docid ranges... + d = newDirectory(); + IndexWriter iw = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + iw.addDocument(new Document()); + iw.close(); + s = new IndexSearcher(d, true); + } + + public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception { + int[] freq = new int[nTerms]; + terms = new Term[nTerms]; + for (int i=0; i 0 && totHits > 0); + netSearch.addAndGet(totSearch); + } catch (Exception exc) { + failed.set(true); + throw new RuntimeException(exc); + } + } + }; + threads[threadID].setDaemon(true); + } + + for (Thread t : threads) { + t.start(); + } + + for (Thread t : threads) { + t.join(); + } + + if (VERBOSE) System.out.println(NUM_SEARCH_THREADS + " threads did " + netSearch.get() + " searches"); + + s.close(); + r.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSetNorm.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSetNorm.java new file mode 100644 index 0000000..60bb8ce --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSetNorm.java @@ -0,0 +1,94 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.*; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; + +/** Document boost unit test. + * + * + * @version $Revision: 1091277 $ + */ +public class TestSetNorm extends LuceneTestCase { + + public void testSetNorm() throws Exception { + Directory store = newDirectory(); + IndexWriter writer = new IndexWriter(store, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + // add the same document four times + Fieldable f1 = newField("field", "word", Field.Store.YES, Field.Index.ANALYZED); + Document d1 = new Document(); + d1.add(f1); + writer.addDocument(d1); + writer.addDocument(d1); + writer.addDocument(d1); + writer.addDocument(d1); + writer.close(); + + // reset the boost of each instance of this document + IndexReader reader = IndexReader.open(store, false); + reader.setNorm(0, "field", 1.0f); + reader.setNorm(1, "field", 2.0f); + reader.setNorm(2, "field", 4.0f); + reader.setNorm(3, "field", 16.0f); + reader.close(); + + // check that searches are ordered by this boost + final float[] scores = new float[4]; + + IndexSearcher is = new IndexSearcher(store, true); + is.search + (new TermQuery(new Term("field", "word")), + new Collector() { + private int base = 0; + private Scorer scorer; + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + @Override + public final void collect(int doc) throws IOException { + scores[doc + base] = scorer.score(); + } + @Override + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + is.close(); + float lastScore = 0.0f; + + for (int i = 0; i < 4; i++) { + assertTrue(scores[i] > lastScore); + lastScore = scores[i]; + } + store.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimilarity.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimilarity.java new file mode 100644 index 0000000..f6fe932 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimilarity.java @@ -0,0 +1,179 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.Explanation.IDFExplanation; + +/** Similarity unit test. + * + * + * @version $Revision: 1091277 $ + */ +public class TestSimilarity extends LuceneTestCase { + + public static class SimpleSimilarity extends Similarity { + @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } + @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } + @Override public float tf(float freq) { return freq; } + @Override public float sloppyFreq(int distance) { return 2.0f; } + @Override public float idf(int docFreq, int numDocs) { return 1.0f; } + @Override public float coord(int overlap, int maxOverlap) { return 1.0f; } + @Override public IDFExplanation idfExplain(Collection terms, Searcher searcher) throws IOException { + return new IDFExplanation() { + @Override + public float getIdf() { + return 1.0f; + } + @Override + public String explain() { + return "Inexplicable"; + } + }; + } + } + + public void testSimilarity() throws Exception { + Directory store = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, store, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setSimilarity(new SimpleSimilarity())); + + Document d1 = new Document(); + d1.add(newField("field", "a c", Field.Store.YES, Field.Index.ANALYZED)); + + Document d2 = new Document(); + d2.add(newField("field", "a b c", Field.Store.YES, Field.Index.ANALYZED)); + + writer.addDocument(d1); + writer.addDocument(d2); + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(reader); + searcher.setSimilarity(new SimpleSimilarity()); + + Term a = new Term("field", "a"); + Term b = new Term("field", "b"); + Term c = new Term("field", "c"); + + searcher.search(new TermQuery(b), new Collector() { + private Scorer scorer; + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + @Override + public final void collect(int doc) throws IOException { + assertEquals(1.0f, scorer.score()); + } + @Override + public void setNextReader(IndexReader reader, int docBase) {} + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(a), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(b), BooleanClause.Occur.SHOULD); + //System.out.println(bq.toString("field")); + searcher.search(bq, new Collector() { + private int base = 0; + private Scorer scorer; + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + @Override + public final void collect(int doc) throws IOException { + //System.out.println("Doc=" + doc + " score=" + score); + assertEquals((float)doc+base+1, scorer.score()); + } + @Override + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + + PhraseQuery pq = new PhraseQuery(); + pq.add(a); + pq.add(c); + //System.out.println(pq.toString("field")); + searcher.search(pq, + new Collector() { + private Scorer scorer; + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + @Override + public final void collect(int doc) throws IOException { + //System.out.println("Doc=" + doc + " score=" + score); + assertEquals(1.0f, scorer.score()); + } + @Override + public void setNextReader(IndexReader reader, int docBase) {} + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + + pq.setSlop(2); + //System.out.println(pq.toString("field")); + searcher.search(pq, new Collector() { + private Scorer scorer; + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + @Override + public final void collect(int doc) throws IOException { + //System.out.println("Doc=" + doc + " score=" + score); + assertEquals(2.0f, scorer.score()); + } + @Override + public void setNextReader(IndexReader reader, int docBase) {} + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + + searcher.close(); + reader.close(); + store.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimpleExplanations.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimpleExplanations.java new file mode 100644 index 0000000..45aec07 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimpleExplanations.java @@ -0,0 +1,432 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.standard.StandardAnalyzer; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.Directory; + + +/** + * TestExplanations subclass focusing on basic query types + */ +public class TestSimpleExplanations extends TestExplanations { + + // we focus on queries that don't rewrite to other queries. + // if we get those covered well, then the ones that rewrite should + // also be covered. + + + /* simple term tests */ + + public void testT1() throws Exception { + qtest("w1", new int[] { 0,1,2,3 }); + } + public void testT2() throws Exception { + qtest("w1^1000", new int[] { 0,1,2,3 }); + } + + /* MatchAllDocs */ + + public void testMA1() throws Exception { + qtest(new MatchAllDocsQuery(), new int[] { 0,1,2,3 }); + } + public void testMA2() throws Exception { + Query q=new MatchAllDocsQuery(); + q.setBoost(1000); + qtest(q, new int[] { 0,1,2,3 }); + } + + /* some simple phrase tests */ + + public void testP1() throws Exception { + qtest("\"w1 w2\"", new int[] { 0 }); + } + public void testP2() throws Exception { + qtest("\"w1 w3\"", new int[] { 1,3 }); + } + public void testP3() throws Exception { + qtest("\"w1 w2\"~1", new int[] { 0,1,2 }); + } + public void testP4() throws Exception { + qtest("\"w2 w3\"~1", new int[] { 0,1,2,3 }); + } + public void testP5() throws Exception { + qtest("\"w3 w2\"~1", new int[] { 1,3 }); + } + public void testP6() throws Exception { + qtest("\"w3 w2\"~2", new int[] { 0,1,3 }); + } + public void testP7() throws Exception { + qtest("\"w3 w2\"~3", new int[] { 0,1,2,3 }); + } + + /* some simple filtered query tests */ + + public void testFQ1() throws Exception { + qtest(new FilteredQuery(qp.parse("w1"), + new ItemizedFilter(new int[] {0,1,2,3})), + new int[] {0,1,2,3}); + } + public void testFQ2() throws Exception { + qtest(new FilteredQuery(qp.parse("w1"), + new ItemizedFilter(new int[] {0,2,3})), + new int[] {0,2,3}); + } + public void testFQ3() throws Exception { + qtest(new FilteredQuery(qp.parse("xx"), + new ItemizedFilter(new int[] {1,3})), + new int[] {3}); + } + public void testFQ4() throws Exception { + qtest(new FilteredQuery(qp.parse("xx^1000"), + new ItemizedFilter(new int[] {1,3})), + new int[] {3}); + } + public void testFQ6() throws Exception { + Query q = new FilteredQuery(qp.parse("xx"), + new ItemizedFilter(new int[] {1,3})); + q.setBoost(1000); + qtest(q, new int[] {3}); + } + + /* ConstantScoreQueries */ + + public void testCSQ1() throws Exception { + Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,1,2,3})); + qtest(q, new int[] {0,1,2,3}); + } + public void testCSQ2() throws Exception { + Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {1,3})); + qtest(q, new int[] {1,3}); + } + public void testCSQ3() throws Exception { + Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2})); + q.setBoost(1000); + qtest(q, new int[] {0,2}); + } + + /* DisjunctionMaxQuery */ + + public void testDMQ1() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f); + q.add(qp.parse("w1")); + q.add(qp.parse("w5")); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testDMQ2() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("w1")); + q.add(qp.parse("w5")); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testDMQ3() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("QQ")); + q.add(qp.parse("w5")); + qtest(q, new int[] { 0 }); + } + public void testDMQ4() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("QQ")); + q.add(qp.parse("xx")); + qtest(q, new int[] { 2,3 }); + } + public void testDMQ5() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("yy -QQ")); + q.add(qp.parse("xx")); + qtest(q, new int[] { 2,3 }); + } + public void testDMQ6() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("-yy w3")); + q.add(qp.parse("xx")); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testDMQ7() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("-yy w3")); + q.add(qp.parse("w2")); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testDMQ8() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("yy w5^100")); + q.add(qp.parse("xx^100000")); + qtest(q, new int[] { 0,2,3 }); + } + public void testDMQ9() throws Exception { + DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); + q.add(qp.parse("yy w5^100")); + q.add(qp.parse("xx^0")); + qtest(q, new int[] { 0,2,3 }); + } + + /* MultiPhraseQuery */ + + public void testMPQ1() throws Exception { + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(ta(new String[] {"w1"})); + q.add(ta(new String[] {"w2","w3", "xx"})); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testMPQ2() throws Exception { + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(ta(new String[] {"w1"})); + q.add(ta(new String[] {"w2","w3"})); + qtest(q, new int[] { 0,1,3 }); + } + public void testMPQ3() throws Exception { + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(ta(new String[] {"w1","xx"})); + q.add(ta(new String[] {"w2","w3"})); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testMPQ4() throws Exception { + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(ta(new String[] {"w1"})); + q.add(ta(new String[] {"w2"})); + qtest(q, new int[] { 0 }); + } + public void testMPQ5() throws Exception { + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(ta(new String[] {"w1"})); + q.add(ta(new String[] {"w2"})); + q.setSlop(1); + qtest(q, new int[] { 0,1,2 }); + } + public void testMPQ6() throws Exception { + MultiPhraseQuery q = new MultiPhraseQuery(); + q.add(ta(new String[] {"w1","w3"})); + q.add(ta(new String[] {"w2"})); + q.setSlop(1); + qtest(q, new int[] { 0,1,2,3 }); + } + + /* some simple tests of boolean queries containing term queries */ + + public void testBQ1() throws Exception { + qtest("+w1 +w2", new int[] { 0,1,2,3 }); + } + public void testBQ2() throws Exception { + qtest("+yy +w3", new int[] { 2,3 }); + } + public void testBQ3() throws Exception { + qtest("yy +w3", new int[] { 0,1,2,3 }); + } + public void testBQ4() throws Exception { + qtest("w1 (-xx w2)", new int[] { 0,1,2,3 }); + } + public void testBQ5() throws Exception { + qtest("w1 (+qq w2)", new int[] { 0,1,2,3 }); + } + public void testBQ6() throws Exception { + qtest("w1 -(-qq w5)", new int[] { 1,2,3 }); + } + public void testBQ7() throws Exception { + qtest("+w1 +(qq (xx -w2) (+w3 +w4))", new int[] { 0 }); + } + public void testBQ8() throws Exception { + qtest("+w1 (qq (xx -w2) (+w3 +w4))", new int[] { 0,1,2,3 }); + } + public void testBQ9() throws Exception { + qtest("+w1 (qq (-xx w2) -(+w3 +w4))", new int[] { 0,1,2,3 }); + } + public void testBQ10() throws Exception { + qtest("+w1 +(qq (-xx w2) -(+w3 +w4))", new int[] { 1 }); + } + public void testBQ11() throws Exception { + qtest("w1 w2^1000.0", new int[] { 0,1,2,3 }); + } + public void testBQ14() throws Exception { + BooleanQuery q = new BooleanQuery(true); + q.add(qp.parse("QQQQQ"), BooleanClause.Occur.SHOULD); + q.add(qp.parse("w1"), BooleanClause.Occur.SHOULD); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testBQ15() throws Exception { + BooleanQuery q = new BooleanQuery(true); + q.add(qp.parse("QQQQQ"), BooleanClause.Occur.MUST_NOT); + q.add(qp.parse("w1"), BooleanClause.Occur.SHOULD); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testBQ16() throws Exception { + BooleanQuery q = new BooleanQuery(true); + q.add(qp.parse("QQQQQ"), BooleanClause.Occur.SHOULD); + q.add(qp.parse("w1 -xx"), BooleanClause.Occur.SHOULD); + qtest(q, new int[] { 0,1 }); + } + public void testBQ17() throws Exception { + BooleanQuery q = new BooleanQuery(true); + q.add(qp.parse("w2"), BooleanClause.Occur.SHOULD); + q.add(qp.parse("w1 -xx"), BooleanClause.Occur.SHOULD); + qtest(q, new int[] { 0,1,2,3 }); + } + public void testBQ19() throws Exception { + qtest("-yy w3", new int[] { 0,1 }); + } + + public void testBQ20() throws Exception { + BooleanQuery q = new BooleanQuery(); + q.setMinimumNumberShouldMatch(2); + q.add(qp.parse("QQQQQ"), BooleanClause.Occur.SHOULD); + q.add(qp.parse("yy"), BooleanClause.Occur.SHOULD); + q.add(qp.parse("zz"), BooleanClause.Occur.SHOULD); + q.add(qp.parse("w5"), BooleanClause.Occur.SHOULD); + q.add(qp.parse("w4"), BooleanClause.Occur.SHOULD); + + qtest(q, new int[] { 0,3 }); + + } + + public void testTermQueryMultiSearcherExplain() throws Exception { + // creating two directories for indices + Directory indexStoreA = newDirectory(); + Directory indexStoreB = newDirectory(); + + Document lDoc = new Document(); + lDoc.add(newField("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); + Document lDoc2 = new Document(); + lDoc2.add(newField("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); + Document lDoc3 = new Document(); + lDoc3.add(newField("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); + + IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig( + TEST_VERSION_CURRENT, new StandardAnalyzer( + TEST_VERSION_CURRENT))); + IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig( + TEST_VERSION_CURRENT, new StandardAnalyzer( + TEST_VERSION_CURRENT))); + + writerA.addDocument(lDoc); + writerA.addDocument(lDoc2); + writerA.optimize(); + writerA.close(); + + writerB.addDocument(lDoc3); + writerB.close(); + + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fulltext", new StandardAnalyzer(TEST_VERSION_CURRENT)); + Query query = parser.parse("handle:1"); + + Searcher[] searchers = new Searcher[2]; + searchers[0] = new IndexSearcher(indexStoreB, true); + searchers[1] = new IndexSearcher(indexStoreA, true); + Searcher mSearcher = new MultiSearcher(searchers); + ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs; + + assertEquals(3, hits.length); + + Explanation explain = mSearcher.explain(query, hits[0].doc); + String exp = explain.toString(0); + assertTrue(exp, exp.indexOf("maxDocs=3") > -1); + assertTrue(exp, exp.indexOf("docFreq=3") > -1); + + query = parser.parse("handle:\"1 2\""); + hits = mSearcher.search(query, null, 1000).scoreDocs; + + assertEquals(3, hits.length); + + explain = mSearcher.explain(query, hits[0].doc); + exp = explain.toString(0); + assertTrue(exp, exp.indexOf("1=3") > -1); + assertTrue(exp, exp.indexOf("2=3") > -1); + + query = new SpanNearQuery(new SpanQuery[] { + new SpanTermQuery(new Term("handle", "1")), + new SpanTermQuery(new Term("handle", "2")) }, 0, true); + hits = mSearcher.search(query, null, 1000).scoreDocs; + + assertEquals(3, hits.length); + + explain = mSearcher.explain(query, hits[0].doc); + exp = explain.toString(0); + assertTrue(exp, exp.indexOf("1=3") > -1); + assertTrue(exp, exp.indexOf("2=3") > -1); + mSearcher.close(); + indexStoreA.close(); + indexStoreB.close(); + } + + /* BQ of TQ: using alt so some fields have zero boost and some don't */ + + public void testMultiFieldBQ1() throws Exception { + qtest("+w1 +alt:w2", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ2() throws Exception { + qtest("+yy +alt:w3", new int[] { 2,3 }); + } + public void testMultiFieldBQ3() throws Exception { + qtest("yy +alt:w3", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ4() throws Exception { + qtest("w1 (-xx alt:w2)", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ5() throws Exception { + qtest("w1 (+alt:qq alt:w2)", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ6() throws Exception { + qtest("w1 -(-alt:qq alt:w5)", new int[] { 1,2,3 }); + } + public void testMultiFieldBQ7() throws Exception { + qtest("+w1 +(alt:qq (alt:xx -alt:w2) (+alt:w3 +alt:w4))", new int[] { 0 }); + } + public void testMultiFieldBQ8() throws Exception { + qtest("+alt:w1 (qq (alt:xx -w2) (+alt:w3 +w4))", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ9() throws Exception { + qtest("+w1 (alt:qq (-xx w2) -(+alt:w3 +w4))", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ10() throws Exception { + qtest("+w1 +(alt:qq (-xx alt:w2) -(+alt:w3 +w4))", new int[] { 1 }); + } + + /* BQ of PQ: using alt so some fields have zero boost and some don't */ + + public void testMultiFieldBQofPQ1() throws Exception { + qtest("\"w1 w2\" alt:\"w1 w2\"", new int[] { 0 }); + } + public void testMultiFieldBQofPQ2() throws Exception { + qtest("\"w1 w3\" alt:\"w1 w3\"", new int[] { 1,3 }); + } + public void testMultiFieldBQofPQ3() throws Exception { + qtest("\"w1 w2\"~1 alt:\"w1 w2\"~1", new int[] { 0,1,2 }); + } + public void testMultiFieldBQofPQ4() throws Exception { + qtest("\"w2 w3\"~1 alt:\"w2 w3\"~1", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQofPQ5() throws Exception { + qtest("\"w3 w2\"~1 alt:\"w3 w2\"~1", new int[] { 1,3 }); + } + public void testMultiFieldBQofPQ6() throws Exception { + qtest("\"w3 w2\"~2 alt:\"w3 w2\"~2", new int[] { 0,1,3 }); + } + public void testMultiFieldBQofPQ7() throws Exception { + qtest("\"w3 w2\"~3 alt:\"w3 w2\"~3", new int[] { 0,1,2,3 }); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimpleExplanationsOfNonMatches.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimpleExplanationsOfNonMatches.java new file mode 100644 index 0000000..ca1e3a7 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSimpleExplanationsOfNonMatches.java @@ -0,0 +1,39 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + + +/** + * subclass of TestSimpleExplanations that verifies non matches. + */ +public class TestSimpleExplanationsOfNonMatches + extends TestSimpleExplanations { + + /** + * Overrides superclass to ignore matches and focus on non-matches + * + * @see CheckHits#checkNoMatchExplanations + */ + @Override + public void qtest(Query q, int[] expDocNrs) throws Exception { + CheckHits.checkNoMatchExplanations(q, FIELD, searcher, expDocNrs); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java new file mode 100755 index 0000000..2280cf0 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java @@ -0,0 +1,155 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.store.Directory; + +public class TestSloppyPhraseQuery extends LuceneTestCase { + + private static final String S_1 = "A A A"; + private static final String S_2 = "A 1 2 3 A 4 5 6 A"; + + private static final Document DOC_1 = makeDocument("X " + S_1 + " Y"); + private static final Document DOC_2 = makeDocument("X " + S_2 + " Y"); + private static final Document DOC_3 = makeDocument("X " + S_1 + " A Y"); + private static final Document DOC_1_B = makeDocument("X " + S_1 + " Y N N N N " + S_1 + " Z"); + private static final Document DOC_2_B = makeDocument("X " + S_2 + " Y N N N N " + S_2 + " Z"); + private static final Document DOC_3_B = makeDocument("X " + S_1 + " A Y N N N N " + S_1 + " A Y"); + private static final Document DOC_4 = makeDocument("A A X A X B A X B B A A X B A A"); + + private static final PhraseQuery QUERY_1 = makePhraseQuery( S_1 ); + private static final PhraseQuery QUERY_2 = makePhraseQuery( S_2 ); + private static final PhraseQuery QUERY_4 = makePhraseQuery( "X A A"); + + /** + * Test DOC_4 and QUERY_4. + * QUERY_4 has a fuzzy (len=1) match to DOC_4, so all slop values > 0 should succeed. + * But only the 3rd sequence of A's in DOC_4 will do. + */ + public void testDoc4_Query4_All_Slops_Should_match() throws Exception { + for (int slop=0; slop<30; slop++) { + int numResultsExpected = slop<1 ? 0 : 1; + checkPhraseQuery(DOC_4, QUERY_4, slop, numResultsExpected); + } + } + + /** + * Test DOC_1 and QUERY_1. + * QUERY_1 has an exact match to DOC_1, so all slop values should succeed. + * Before LUCENE-1310, a slop value of 1 did not succeed. + */ + public void testDoc1_Query1_All_Slops_Should_match() throws Exception { + for (int slop=0; slop<30; slop++) { + float score1 = checkPhraseQuery(DOC_1, QUERY_1, slop, 1); + float score2 = checkPhraseQuery(DOC_1_B, QUERY_1, slop, 1); + assertTrue("slop="+slop+" score2="+score2+" should be greater than score1 "+score1, score2>score1); + } + } + + /** + * Test DOC_2 and QUERY_1. + * 6 should be the minimum slop to make QUERY_1 match DOC_2. + * Before LUCENE-1310, 7 was the minimum. + */ + public void testDoc2_Query1_Slop_6_or_more_Should_match() throws Exception { + for (int slop=0; slop<30; slop++) { + int numResultsExpected = slop<6 ? 0 : 1; + float score1 = checkPhraseQuery(DOC_2, QUERY_1, slop, numResultsExpected); + if (numResultsExpected>0) { + float score2 = checkPhraseQuery(DOC_2_B, QUERY_1, slop, 1); + assertTrue("slop="+slop+" score2="+score2+" should be greater than score1 "+score1, score2>score1); + } + } + } + + /** + * Test DOC_2 and QUERY_2. + * QUERY_2 has an exact match to DOC_2, so all slop values should succeed. + * Before LUCENE-1310, 0 succeeds, 1 through 7 fail, and 8 or greater succeeds. + */ + public void testDoc2_Query2_All_Slops_Should_match() throws Exception { + for (int slop=0; slop<30; slop++) { + float score1 = checkPhraseQuery(DOC_2, QUERY_2, slop, 1); + float score2 = checkPhraseQuery(DOC_2_B, QUERY_2, slop, 1); + assertTrue("slop="+slop+" score2="+score2+" should be greater than score1 "+score1, score2>score1); + } + } + + /** + * Test DOC_3 and QUERY_1. + * QUERY_1 has an exact match to DOC_3, so all slop values should succeed. + */ + public void testDoc3_Query1_All_Slops_Should_match() throws Exception { + for (int slop=0; slop<30; slop++) { + float score1 = checkPhraseQuery(DOC_3, QUERY_1, slop, 1); + float score2 = checkPhraseQuery(DOC_3_B, QUERY_1, slop, 1); + assertTrue("slop="+slop+" score2="+score2+" should be greater than score1 "+score1, score2>score1); + } + } + + private float checkPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) throws Exception { + query.setSlop(slop); + + Directory ramDir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, ramDir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + + IndexSearcher searcher = newSearcher(reader); + TopDocs td = searcher.search(query,null,10); + //System.out.println("slop: "+slop+" query: "+query+" doc: "+doc+" Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore()); + assertEquals("slop: "+slop+" query: "+query+" doc: "+doc+" Wrong number of hits", expectedNumResults, td.totalHits); + + //QueryUtils.check(query,searcher); + writer.close(); + searcher.close(); + reader.close(); + ramDir.close(); + + return td.getMaxScore(); + } + + private static Document makeDocument(String docText) { + Document doc = new Document(); + Field f = new Field("f", docText, Field.Store.NO, Field.Index.ANALYZED); + f.setOmitNorms(true); + doc.add(f); + return doc; + } + + private static PhraseQuery makePhraseQuery(String terms) { + PhraseQuery query = new PhraseQuery(); + String[] t = terms.split(" +"); + for (int i=0; iCreated: Feb 17, 2004 4:55:10 PM + * + * @since lucene 1.4 + */ + +public class TestSort extends LuceneTestCase implements Serializable { + + private static final int NUM_STRINGS = atLeast(6000); + private IndexSearcher full; + private IndexSearcher searchX; + private IndexSearcher searchY; + private Query queryX; + private Query queryY; + private Query queryA; + private Query queryE; + private Query queryF; + private Query queryG; + private Sort sort; + + // document data: + // the tracer field is used to determine which document was hit + // the contents field is used to search and sort by relevance + // the int field to sort by int + // the float field to sort by float + // the string field to sort by string + // the i18n field includes accented characters for testing locale-specific sorting + private String[][] data = new String[][] { + // tracer contents int float string custom i18n long double, 'short', byte, 'custom parser encoding' + { "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche", "10", "-4.0", "3", "126", "J"},//A, x + { "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT", "1000000000", "40.0", "24", "1", "I"},//B, y + { "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9", "99999999", "40.00002343", "125", "15", "H"},//C, x + { "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT", String.valueOf(Long.MAX_VALUE), String.valueOf(Double.MIN_VALUE), String.valueOf(Short.MIN_VALUE), String.valueOf(Byte.MIN_VALUE), "G"},//D, y + { "E", "x a b c d", "5", "2f", "h", "B-8", "peach", String.valueOf(Long.MIN_VALUE), String.valueOf(Double.MAX_VALUE), String.valueOf(Short.MAX_VALUE), String.valueOf(Byte.MAX_VALUE), "F"},//E,x + { "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T", "-44", "343.034435444", "-3", "0", "E"},//F,y + { "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin", "323254543543", "4.043544", "5", "100", "D"},//G,x + { "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T", "1023423423005","4.043545", "10", "-50", "C"},//H,y + { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn", "332422459999", "4.043546", "-340", "51", "B"},//I,x + { "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT", "34334543543", "4.0000220343", "300", "2", "A"},//J,y + { "W", "g", "1", null, null, null, null, null, null, null, null, null}, + { "X", "g", "1", "0.1", null, null, null, null, null, null, null, null}, + { "Y", "g", "1", "0.2", null, null, null, null, null, null, null, null}, + { "Z", "f g", null, null, null, null, null, null, null, null, null, null} + }; + + // the sort order of Ø versus U depends on the version of the rules being used + // for the inherited root locale: Ø's order isnt specified in Locale.US since + // its not used in english. + private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0; + + // create an index of all the documents, or just the x, or just the y documents + private IndexSearcher getIndex (boolean even, boolean odd) + throws IOException { + Directory indexStore = newDirectory(); + dirs.add(indexStore); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + + for (int i=0; i dirs = new ArrayList(); + + @Override + public void tearDown() throws Exception { + full.reader.close(); + searchX.reader.close(); + searchY.reader.close(); + full.close(); + searchX.close(); + searchY.close(); + for (Directory dir : dirs) + dir.close(); + super.tearDown(); + } + + // test the sorts by score and document number + public void testBuiltInSorts() throws Exception { + sort = new Sort(); + assertMatches (full, queryX, sort, "ACEGI"); + assertMatches (full, queryY, sort, "BDFHJ"); + + sort.setSort(SortField.FIELD_DOC); + assertMatches (full, queryX, sort, "ACEGI"); + assertMatches (full, queryY, sort, "BDFHJ"); + } + + // test sorts where the type of field is specified + public void testTypedSort() throws Exception { + sort.setSort (new SortField ("int", SortField.INT), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "IGAEC"); + assertMatches (full, queryY, sort, "DHFJB"); + + sort.setSort (new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "GCIEA"); + assertMatches (full, queryY, sort, "DHJFB"); + + sort.setSort (new SortField ("long", SortField.LONG), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "EACGI"); + assertMatches (full, queryY, sort, "FBJHD"); + + sort.setSort (new SortField ("double", SortField.DOUBLE), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "AGICE"); + assertMatches (full, queryY, sort, "DJHBF"); + + sort.setSort (new SortField ("byte", SortField.BYTE), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "CIGAE"); + assertMatches (full, queryY, sort, "DHFBJ"); + + sort.setSort (new SortField ("short", SortField.SHORT), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "IAGCE"); + assertMatches (full, queryY, sort, "DFHBJ"); + + sort.setSort (new SortField ("string", SortField.STRING), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "AIGEC"); + assertMatches (full, queryY, sort, "DJHFB"); + } + + /** + * Test String sorting: small queue to many matches, multi field sort, reverse sort + */ + public void testStringSort() throws IOException, ParseException { + ScoreDoc[] result = null; + IndexSearcher searcher = getFullStrings(); + sort.setSort( + new SortField("string", SortField.STRING), + new SortField("string2", SortField.STRING, true), + SortField.FIELD_DOC ); + + result = searcher.search(new MatchAllDocsQuery(), null, 500, sort).scoreDocs; + + StringBuilder buff = new StringBuilder(); + int n = result.length; + String last = null; + String lastSub = null; + int lastDocId = 0; + boolean fail = false; + for (int x = 0; x < n; ++x) { + Document doc2 = searcher.doc(result[x].doc); + String[] v = doc2.getValues("tracer"); + String[] v2 = doc2.getValues("tracer2"); + for (int j = 0; j < v.length; ++j) { + if (last != null) { + int cmp = v[j].compareTo(last); + if (!(cmp >= 0)) { // ensure first field is in order + fail = true; + System.out.println("fail:" + v[j] + " < " + last); + } + if (cmp == 0) { // ensure second field is in reverse order + cmp = v2[j].compareTo(lastSub); + if (cmp > 0) { + fail = true; + System.out.println("rev field fail:" + v2[j] + " > " + lastSub); + } else if(cmp == 0) { // ensure docid is in order + if (result[x].doc < lastDocId) { + fail = true; + System.out.println("doc fail:" + result[x].doc + " > " + lastDocId); + } + } + } + } + last = v[j]; + lastSub = v2[j]; + lastDocId = result[x].doc; + buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+") "); + } + } + if(fail) { + System.out.println("topn field1(field2)(docID):" + buff); + } + assertFalse("Found sort results out of order", fail); + searcher.close(); + } + + /** + * test sorts where the type of field is specified and a custom field parser + * is used, that uses a simple char encoding. The sorted string contains a + * character beginning from 'A' that is mapped to a numeric value using some + * "funny" algorithm to be different for each data type. + */ + public void testCustomFieldParserSort() throws Exception { + // since tests explicilty uses different parsers on the same fieldname + // we explicitly check/purge the FieldCache between each assertMatch + FieldCache fc = FieldCache.DEFAULT; + + + sort.setSort (new SortField ("parser", new FieldCache.IntParser(){ + public final int parseInt(final String val) { + return (val.charAt(0)-'A') * 123456; + } + }), SortField.FIELD_DOC ); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " IntParser"); + fc.purgeAllCaches(); + + sort.setSort (new SortField ("parser", new FieldCache.FloatParser(){ + public final float parseFloat(final String val) { + return (float) Math.sqrt( val.charAt(0) ); + } + }), SortField.FIELD_DOC ); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " FloatParser"); + fc.purgeAllCaches(); + + sort.setSort (new SortField ("parser", new FieldCache.LongParser(){ + public final long parseLong(final String val) { + return (val.charAt(0)-'A') * 1234567890L; + } + }), SortField.FIELD_DOC ); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " LongParser"); + fc.purgeAllCaches(); + + sort.setSort (new SortField ("parser", new FieldCache.DoubleParser(){ + public final double parseDouble(final String val) { + return Math.pow( val.charAt(0), (val.charAt(0)-'A') ); + } + }), SortField.FIELD_DOC ); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " DoubleParser"); + fc.purgeAllCaches(); + + sort.setSort (new SortField ("parser", new FieldCache.ByteParser(){ + public final byte parseByte(final String val) { + return (byte) (val.charAt(0)-'A'); + } + }), SortField.FIELD_DOC ); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " ByteParser"); + fc.purgeAllCaches(); + + sort.setSort (new SortField ("parser", new FieldCache.ShortParser(){ + public final short parseShort(final String val) { + return (short) (val.charAt(0)-'A'); + } + }), SortField.FIELD_DOC ); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " ShortParser"); + fc.purgeAllCaches(); + } + + // test sorts when there's nothing in the index + public void testEmptyIndex() throws Exception { + Searcher empty = getEmptyIndex(); + + sort = new Sort(); + assertMatches (empty, queryX, sort, ""); + + sort.setSort(SortField.FIELD_DOC); + assertMatches (empty, queryX, sort, ""); + + sort.setSort (new SortField ("int", SortField.INT), SortField.FIELD_DOC ); + assertMatches (empty, queryX, sort, ""); + + sort.setSort (new SortField ("string", SortField.STRING, true), SortField.FIELD_DOC ); + assertMatches (empty, queryX, sort, ""); + + sort.setSort (new SortField ("float", SortField.FLOAT), new SortField ("string", SortField.STRING) ); + assertMatches (empty, queryX, sort, ""); + } + + static class MyFieldComparator extends FieldComparator { + int[] docValues; + int[] slotValues; + int bottomValue; + + MyFieldComparator(int numHits) { + slotValues = new int[numHits]; + } + + @Override + public void copy(int slot, int doc) { + slotValues[slot] = docValues[doc]; + } + + @Override + public int compare(int slot1, int slot2) { + // values are small enough that overflow won't happen + return slotValues[slot1] - slotValues[slot2]; + } + + @Override + public int compareBottom(int doc) { + return bottomValue - docValues[doc]; + } + + @Override + public void setBottom(int bottom) { + bottomValue = slotValues[bottom]; + } + + private static final FieldCache.IntParser testIntParser = new FieldCache.IntParser() { + public final int parseInt(final String val) { + return (val.charAt(0)-'A') * 123456; + } + }; + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + docValues = FieldCache.DEFAULT.getInts(reader, "parser", testIntParser); + } + + @Override + public Integer value(int slot) { + return Integer.valueOf(slotValues[slot]); + } + } + + static class MyFieldComparatorSource extends FieldComparatorSource { + @Override + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) { + return new MyFieldComparator(numHits); + } + } + + // Test sorting w/ custom FieldComparator + public void testNewCustomFieldParserSort() throws Exception { + sort.setSort (new SortField ("parser", new MyFieldComparatorSource())); + assertMatches (full, queryA, sort, "JIHGFEDCBA"); + } + + // test sorts in reverse + public void testReverseSort() throws Exception { + sort.setSort (new SortField (null, SortField.SCORE, true), SortField.FIELD_DOC ); + assertMatches (full, queryX, sort, "IEGCA"); + assertMatches (full, queryY, sort, "JFHDB"); + + sort.setSort (new SortField (null, SortField.DOC, true)); + assertMatches (full, queryX, sort, "IGECA"); + assertMatches (full, queryY, sort, "JHFDB"); + + sort.setSort (new SortField ("int", SortField.INT, true) ); + assertMatches (full, queryX, sort, "CAEGI"); + assertMatches (full, queryY, sort, "BJFHD"); + + sort.setSort (new SortField ("float", SortField.FLOAT, true) ); + assertMatches (full, queryX, sort, "AECIG"); + assertMatches (full, queryY, sort, "BFJHD"); + + sort.setSort (new SortField ("string", SortField.STRING, true) ); + assertMatches (full, queryX, sort, "CEGIA"); + assertMatches (full, queryY, sort, "BFHJD"); + } + + // test sorting when the sort field is empty (undefined) for some of the documents + public void testEmptyFieldSort() throws Exception { + sort.setSort (new SortField ("string", SortField.STRING) ); + assertMatches (full, queryF, sort, "ZJI"); + + sort.setSort (new SortField ("string", SortField.STRING, true) ); + assertMatches (full, queryF, sort, "IJZ"); + + sort.setSort (new SortField ("i18n", Locale.ENGLISH)); + assertMatches (full, queryF, sort, "ZJI"); + + sort.setSort (new SortField ("i18n", Locale.ENGLISH, true)); + assertMatches (full, queryF, sort, "IJZ"); + + sort.setSort (new SortField ("int", SortField.INT) ); + assertMatches (full, queryF, sort, "IZJ"); + + sort.setSort (new SortField ("int", SortField.INT, true) ); + assertMatches (full, queryF, sort, "JZI"); + + sort.setSort (new SortField ("float", SortField.FLOAT) ); + assertMatches (full, queryF, sort, "ZJI"); + + // using a nonexisting field as first sort key shouldn't make a difference: + sort.setSort (new SortField ("nosuchfield", SortField.STRING), + new SortField ("float", SortField.FLOAT) ); + assertMatches (full, queryF, sort, "ZJI"); + + sort.setSort (new SortField ("float", SortField.FLOAT, true) ); + assertMatches (full, queryF, sort, "IJZ"); + + // When a field is null for both documents, the next SortField should be used. + // Works for + sort.setSort (new SortField ("int", SortField.INT), + new SortField ("string", SortField.STRING), + new SortField ("float", SortField.FLOAT) ); + assertMatches (full, queryG, sort, "ZWXY"); + + // Reverse the last criterium to make sure the test didn't pass by chance + sort.setSort (new SortField ("int", SortField.INT), + new SortField ("string", SortField.STRING), + new SortField ("float", SortField.FLOAT, true) ); + assertMatches (full, queryG, sort, "ZYXW"); + + // Do the same for a MultiSearcher + Searcher multiSearcher=new MultiSearcher (new Searchable[] { full }); + + sort.setSort (new SortField ("int", SortField.INT), + new SortField ("string", SortField.STRING), + new SortField ("float", SortField.FLOAT) ); + assertMatches (multiSearcher, queryG, sort, "ZWXY"); + + sort.setSort (new SortField ("int", SortField.INT), + new SortField ("string", SortField.STRING), + new SortField ("float", SortField.FLOAT, true) ); + assertMatches (multiSearcher, queryG, sort, "ZYXW"); + // Don't close the multiSearcher. it would close the full searcher too! + + // Do the same for a ParallelMultiSearcher + ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random, 2, 8)); + Searcher parallelSearcher=new ParallelMultiSearcher (exec, full); + + sort.setSort (new SortField ("int", SortField.INT), + new SortField ("string", SortField.STRING), + new SortField ("float", SortField.FLOAT) ); + assertMatches (parallelSearcher, queryG, sort, "ZWXY"); + + sort.setSort (new SortField ("int", SortField.INT), + new SortField ("string", SortField.STRING), + new SortField ("float", SortField.FLOAT, true) ); + assertMatches (parallelSearcher, queryG, sort, "ZYXW"); + parallelSearcher.close(); + exec.awaitTermination(1000, TimeUnit.MILLISECONDS); + } + + // test sorts using a series of fields + public void testSortCombos() throws Exception { + sort.setSort (new SortField ("int", SortField.INT), new SortField ("float", SortField.FLOAT) ); + assertMatches (full, queryX, sort, "IGEAC"); + + sort.setSort (new SortField ("int", SortField.INT, true), new SortField (null, SortField.DOC, true) ); + assertMatches (full, queryX, sort, "CEAGI"); + + sort.setSort (new SortField ("float", SortField.FLOAT), new SortField ("string", SortField.STRING) ); + assertMatches (full, queryX, sort, "GICEA"); + } + + // test using a Locale for sorting strings + public void testLocaleSort() throws Exception { + sort.setSort (new SortField ("string", Locale.US) ); + assertMatches (full, queryX, sort, "AIGEC"); + assertMatches (full, queryY, sort, "DJHFB"); + + sort.setSort (new SortField ("string", Locale.US, true) ); + assertMatches (full, queryX, sort, "CEGIA"); + assertMatches (full, queryY, sort, "BFHJD"); + } + + // test using various international locales with accented characters + // (which sort differently depending on locale) + public void testInternationalSort() throws Exception { + sort.setSort (new SortField ("i18n", Locale.US)); + assertMatches (full, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH"); + + sort.setSort (new SortField ("i18n", new Locale("sv", "se"))); + assertMatches (full, queryY, sort, "BJDFH"); + + sort.setSort (new SortField ("i18n", new Locale("da", "dk"))); + assertMatches (full, queryY, sort, "BJDHF"); + + sort.setSort (new SortField ("i18n", Locale.US)); + assertMatches (full, queryX, sort, "ECAGI"); + + sort.setSort (new SortField ("i18n", Locale.FRANCE)); + assertMatches (full, queryX, sort, "EACGI"); + } + + // Test the MultiSearcher's ability to preserve locale-sensitive ordering + // by wrapping it around a single searcher + public void testInternationalMultiSearcherSort() throws Exception { + Searcher multiSearcher = new MultiSearcher (new Searchable[] { full }); + + sort.setSort (new SortField ("i18n", new Locale("sv", "se"))); + assertMatches (multiSearcher, queryY, sort, "BJDFH"); + + sort.setSort (new SortField ("i18n", Locale.US)); + assertMatches (multiSearcher, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH"); + + sort.setSort (new SortField ("i18n", new Locale("da", "dk"))); + assertMatches (multiSearcher, queryY, sort, "BJDHF"); + } + + // test a variety of sorts using more than one searcher + public void testMultiSort() throws Exception { + MultiSearcher searcher = new MultiSearcher (new Searchable[] { searchX, searchY }); + runMultiSorts(searcher, false); + } + + // test a variety of sorts using a parallel multisearcher + public void testParallelMultiSort() throws Exception { + ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random, 2, 8)); + Searcher searcher = new ParallelMultiSearcher (exec, searchX, searchY); + runMultiSorts(searcher, false); + searcher.close(); + exec.awaitTermination(1000, TimeUnit.MILLISECONDS); + } + + // test that the relevancy scores are the same even if + // hits are sorted + public void testNormalizedScores() throws Exception { + + // capture relevancy scores + HashMap scoresX = getScores (full.search (queryX, null, 1000).scoreDocs, full); + HashMap scoresY = getScores (full.search (queryY, null, 1000).scoreDocs, full); + HashMap scoresA = getScores (full.search (queryA, null, 1000).scoreDocs, full); + + // we'll test searching locally, remote and multi + + MultiSearcher multi = new MultiSearcher (new Searchable[] { searchX, searchY }); + + // change sorting and make sure relevancy stays the same + + sort = new Sort(); + assertSameValues (scoresX, getScores (full.search (queryX, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresX, getScores (multi.search (queryX, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresY, getScores (full.search (queryY, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresY, getScores (multi.search (queryY, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresA, getScores (full.search (queryA, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresA, getScores (multi.search (queryA, null, 1000, sort).scoreDocs, multi)); + + sort.setSort(SortField.FIELD_DOC); + assertSameValues (scoresX, getScores (full.search (queryX, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresX, getScores (multi.search (queryX, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresY, getScores (full.search (queryY, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresY, getScores (multi.search (queryY, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresA, getScores (full.search (queryA, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresA, getScores (multi.search (queryA, null, 1000, sort).scoreDocs, multi)); + + sort.setSort (new SortField("int", SortField.INT)); + assertSameValues (scoresX, getScores (full.search (queryX, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresX, getScores (multi.search (queryX, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresY, getScores (full.search (queryY, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresY, getScores (multi.search (queryY, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresA, getScores (full.search (queryA, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresA, getScores (multi.search (queryA, null, 1000, sort).scoreDocs, multi)); + + sort.setSort (new SortField("float", SortField.FLOAT)); + assertSameValues (scoresX, getScores (full.search (queryX, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresX, getScores (multi.search (queryX, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresY, getScores (full.search (queryY, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresY, getScores (multi.search (queryY, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresA, getScores (full.search (queryA, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresA, getScores (multi.search (queryA, null, 1000, sort).scoreDocs, multi)); + + sort.setSort (new SortField("string", SortField.STRING)); + assertSameValues (scoresX, getScores (full.search (queryX, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresX, getScores (multi.search (queryX, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresY, getScores (full.search (queryY, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresY, getScores (multi.search (queryY, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresA, getScores (full.search (queryA, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresA, getScores (multi.search (queryA, null, 1000, sort).scoreDocs, multi)); + + sort.setSort (new SortField("int", SortField.INT),new SortField("float", SortField.FLOAT)); + assertSameValues (scoresX, getScores (full.search (queryX, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresX, getScores (multi.search (queryX, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresY, getScores (full.search (queryY, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresY, getScores (multi.search (queryY, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresA, getScores (full.search (queryA, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresA, getScores (multi.search (queryA, null, 1000, sort).scoreDocs, multi)); + + sort.setSort (new SortField ("int", SortField.INT, true), new SortField (null, SortField.DOC, true) ); + assertSameValues (scoresX, getScores (full.search (queryX, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresX, getScores (multi.search (queryX, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresY, getScores (full.search (queryY, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresY, getScores (multi.search (queryY, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresA, getScores (full.search (queryA, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresA, getScores (multi.search (queryA, null, 1000, sort).scoreDocs, multi)); + + sort.setSort (new SortField("int", SortField.INT),new SortField("string", SortField.STRING)); + assertSameValues (scoresX, getScores (full.search (queryX, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresX, getScores (multi.search (queryX, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresY, getScores (full.search (queryY, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresY, getScores (multi.search (queryY, null, 1000, sort).scoreDocs, multi)); + assertSameValues (scoresA, getScores (full.search (queryA, null, 1000, sort).scoreDocs, full)); + assertSameValues (scoresA, getScores (multi.search (queryA, null, 1000, sort).scoreDocs, multi)); + + } + + public void testTopDocsScores() throws Exception { + + // There was previously a bug in FieldSortedHitQueue.maxscore when only a single + // doc was added. That is what the following tests for. + Sort sort = new Sort(); + int nDocs=10; + + // try to pick a query that will result in an unnormalized + // score greater than 1 to test for correct normalization + final TopDocs docs1 = full.search(queryE,null,nDocs,sort); + + // a filter that only allows through the first hit + Filter filt = new Filter() { + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + BitSet bs = new BitSet(reader.maxDoc()); + bs.set(0, reader.maxDoc()); + bs.set(docs1.scoreDocs[0].doc); + return new DocIdBitSet(bs); + } + }; + + TopDocs docs2 = full.search(queryE, filt, nDocs, sort); + + assertEquals(docs1.scoreDocs[0].score, docs2.scoreDocs[0].score, 1e-6); + } + + public void testSortWithoutFillFields() throws Exception { + + // There was previously a bug in TopFieldCollector when fillFields was set + // to false - the same doc and score was set in ScoreDoc[] array. This test + // asserts that if fillFields is false, the documents are set properly. It + // does not use Searcher's default search methods (with Sort) since all set + // fillFields to true. + Sort[] sort = new Sort[] { new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + Query q = new MatchAllDocsQuery(); + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, false, + false, false, true); + + full.search(q, tdc); + + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + for (int j = 1; j < sd.length; j++) { + assertTrue(sd[j].doc != sd[j - 1].doc); + } + + } + } + + public void testSortWithoutScoreTracking() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + Query q = new MatchAllDocsQuery(); + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, false, + false, true); + + full.search(q, tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + for (int j = 0; j < sd.length; j++) { + assertTrue(Float.isNaN(sd[j].score)); + } + assertTrue(Float.isNaN(td.getMaxScore())); + } + } + + public void testSortWithScoreNoMaxScoreTracking() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + Query q = new MatchAllDocsQuery(); + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, true, + false, true); + + full.search(q, tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + for (int j = 0; j < sd.length; j++) { + assertTrue(!Float.isNaN(sd[j].score)); + } + assertTrue(Float.isNaN(td.getMaxScore())); + } + } + + // MultiComparatorScoringNoMaxScoreCollector + public void testSortWithScoreNoMaxScoreTrackingMulti() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE) }; + for (int i = 0; i < sort.length; i++) { + Query q = new MatchAllDocsQuery(); + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, true, + false, true); + + full.search(q, tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + for (int j = 0; j < sd.length; j++) { + assertTrue(!Float.isNaN(sd[j].score)); + } + assertTrue(Float.isNaN(td.getMaxScore())); + } + } + + public void testSortWithScoreAndMaxScoreTracking() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + Query q = new MatchAllDocsQuery(); + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, true, + true, true); + + full.search(q, tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + for (int j = 0; j < sd.length; j++) { + assertTrue(!Float.isNaN(sd[j].score)); + } + assertTrue(!Float.isNaN(td.getMaxScore())); + } + } + + public void testOutOfOrderDocsScoringSort() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + boolean[][] tfcOptions = new boolean[][] { + new boolean[] { false, false, false }, + new boolean[] { false, false, true }, + new boolean[] { false, true, false }, + new boolean[] { false, true, true }, + new boolean[] { true, false, false }, + new boolean[] { true, false, true }, + new boolean[] { true, true, false }, + new boolean[] { true, true, true }, + }; + String[] actualTFCClasses = new String[] { + "OutOfOrderOneComparatorNonScoringCollector", + "OutOfOrderOneComparatorScoringMaxScoreCollector", + "OutOfOrderOneComparatorScoringNoMaxScoreCollector", + "OutOfOrderOneComparatorScoringMaxScoreCollector", + "OutOfOrderOneComparatorNonScoringCollector", + "OutOfOrderOneComparatorScoringMaxScoreCollector", + "OutOfOrderOneComparatorScoringNoMaxScoreCollector", + "OutOfOrderOneComparatorScoringMaxScoreCollector" + }; + + BooleanQuery bq = new BooleanQuery(); + // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 + // which delegates to BS if there are no mandatory clauses. + bq.add(new MatchAllDocsQuery(), Occur.SHOULD); + // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return + // the clause instead of BQ. + bq.setMinimumNumberShouldMatch(1); + for (int i = 0; i < sort.length; i++) { + for (int j = 0; j < tfcOptions.length; j++) { + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, + tfcOptions[j][0], tfcOptions[j][1], tfcOptions[j][2], false); + + assertTrue(tdc.getClass().getName().endsWith("$"+actualTFCClasses[j])); + + full.search(bq, tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + assertEquals(10, sd.length); + } + } + } + + // OutOfOrderMulti*Collector + public void testOutOfOrderDocsScoringSortMulti() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE) }; + boolean[][] tfcOptions = new boolean[][] { + new boolean[] { false, false, false }, + new boolean[] { false, false, true }, + new boolean[] { false, true, false }, + new boolean[] { false, true, true }, + new boolean[] { true, false, false }, + new boolean[] { true, false, true }, + new boolean[] { true, true, false }, + new boolean[] { true, true, true }, + }; + String[] actualTFCClasses = new String[] { + "OutOfOrderMultiComparatorNonScoringCollector", + "OutOfOrderMultiComparatorScoringMaxScoreCollector", + "OutOfOrderMultiComparatorScoringNoMaxScoreCollector", + "OutOfOrderMultiComparatorScoringMaxScoreCollector", + "OutOfOrderMultiComparatorNonScoringCollector", + "OutOfOrderMultiComparatorScoringMaxScoreCollector", + "OutOfOrderMultiComparatorScoringNoMaxScoreCollector", + "OutOfOrderMultiComparatorScoringMaxScoreCollector" + }; + + BooleanQuery bq = new BooleanQuery(); + // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 + // which delegates to BS if there are no mandatory clauses. + bq.add(new MatchAllDocsQuery(), Occur.SHOULD); + // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return + // the clause instead of BQ. + bq.setMinimumNumberShouldMatch(1); + for (int i = 0; i < sort.length; i++) { + for (int j = 0; j < tfcOptions.length; j++) { + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, + tfcOptions[j][0], tfcOptions[j][1], tfcOptions[j][2], false); + + assertTrue(tdc.getClass().getName().endsWith("$"+actualTFCClasses[j])); + + full.search(bq, tdc); + + TopDocs td = tdc.topDocs(); + ScoreDoc[] sd = td.scoreDocs; + assertEquals(10, sd.length); + } + } + } + + public void testSortWithScoreAndMaxScoreTrackingNoResults() throws Exception { + + // Two Sort criteria to instantiate the multi/single comparators. + Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() }; + for (int i = 0; i < sort.length; i++) { + TopDocsCollector tdc = TopFieldCollector.create(sort[i], 10, true, true, true, true); + TopDocs td = tdc.topDocs(); + assertEquals(0, td.totalHits); + assertTrue(Float.isNaN(td.getMaxScore())); + } + } + + // runs a variety of sorts useful for multisearchers + private void runMultiSorts(Searcher multi, boolean isFull) throws Exception { + sort.setSort(SortField.FIELD_DOC); + String expected = isFull ? "ABCDEFGHIJ" : "ACEGIBDFHJ"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(new SortField ("int", SortField.INT)); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(new SortField ("int", SortField.INT), SortField.FIELD_DOC); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(new SortField("int", SortField.INT)); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC); + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); + + sort.setSort(new SortField("float", SortField.FLOAT)); + assertMatches(multi, queryA, sort, "GDHJCIEFAB"); + + sort.setSort(new SortField("string", SortField.STRING)); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); + + sort.setSort(new SortField("int", SortField.INT, true)); + expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI"; + assertMatches(multi, queryA, sort, expected); + + sort.setSort(new SortField("float", SortField.FLOAT, true)); + assertMatches(multi, queryA, sort, "BAFECIJHDG"); + + sort.setSort(new SortField("string", SortField.STRING, true)); + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); + + sort.setSort(new SortField("int", SortField.INT),new SortField("float", SortField.FLOAT)); + assertMatches(multi, queryA, sort, "IDHFGJEABC"); + + sort.setSort(new SortField("float", SortField.FLOAT),new SortField("string", SortField.STRING)); + assertMatches(multi, queryA, sort, "GDHJICEFAB"); + + sort.setSort(new SortField ("int", SortField.INT)); + assertMatches(multi, queryF, sort, "IZJ"); + + sort.setSort(new SortField ("int", SortField.INT, true)); + assertMatches(multi, queryF, sort, "JZI"); + + sort.setSort(new SortField ("float", SortField.FLOAT)); + assertMatches(multi, queryF, sort, "ZJI"); + + sort.setSort(new SortField ("string", SortField.STRING)); + assertMatches(multi, queryF, sort, "ZJI"); + + sort.setSort(new SortField ("string", SortField.STRING, true)); + assertMatches(multi, queryF, sort, "IJZ"); + + // up to this point, all of the searches should have "sane" + // FieldCache behavior, and should have reused hte cache in several cases + assertSaneFieldCaches(getName() + " various"); + // next we'll check Locale based (String[]) for 'string', so purge first + FieldCache.DEFAULT.purgeAllCaches(); + + sort.setSort(new SortField ("string", Locale.US) ); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); + + sort.setSort(new SortField ("string", Locale.US, true) ); + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); + + sort.setSort(new SortField ("string", Locale.UK) ); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); + + assertSaneFieldCaches(getName() + " Locale.US + Locale.UK"); + FieldCache.DEFAULT.purgeAllCaches(); + + } + + // make sure the documents returned by the search match the expected list + private void assertMatches(Searcher searcher, Query query, Sort sort, + String expectedResult) throws IOException { + //ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs; + TopDocs hits = searcher.search (query, null, Math.max(1, expectedResult.length()), sort); + ScoreDoc[] result = hits.scoreDocs; + assertEquals(hits.totalHits, expectedResult.length()); + StringBuilder buff = new StringBuilder(10); + int n = result.length; + for (int i=0; i getScores (ScoreDoc[] hits, Searcher searcher) + throws IOException { + HashMap scoreMap = new HashMap(); + int n = hits.length; + for (int i=0; i void assertSameValues (HashMap m1, HashMap m2) { + int n = m1.size(); + int m = m2.size(); + assertEquals (n, m); + Iterator iter = m1.keySet().iterator(); + while (iter.hasNext()) { + K key = iter.next(); + V o1 = m1.get(key); + V o2 = m2.get(key); + if (o1 instanceof Float) { + assertEquals(((Float)o1).floatValue(), ((Float)o2).floatValue(), 1e-6); + } else { + assertEquals (m1.get(key), m2.get(key)); + } + } + } + + public void testEmptyStringVsNullStringSort() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + doc.add(newField("f", "", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("t", "1", Field.Store.NO, Field.Index.NOT_ANALYZED)); + w.addDocument(doc); + w.commit(); + doc = new Document(); + doc.add(newField("t", "1", Field.Store.NO, Field.Index.NOT_ANALYZED)); + w.addDocument(doc); + + IndexReader r = IndexReader.open(w, true); + w.close(); + IndexSearcher s = newSearcher(r); + TopDocs hits = s.search(new TermQuery(new Term("t", "1")), null, 10, new Sort(new SortField("f", SortField.STRING))); + assertEquals(2, hits.totalHits); + // null sorts first + assertEquals(1, hits.scoreDocs[0].doc); + assertEquals(0, hits.scoreDocs[1].doc); + s.close(); + r.close(); + dir.close(); + } + + public void testLUCENE2142() throws IOException { + Directory indexStore = newDirectory(); + IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + for (int i=0; i<5; i++) { + Document doc = new Document(); + doc.add (new Field ("string", "a"+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add (new Field ("string", "b"+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); + writer.addDocument (doc); + } + writer.optimize(); // enforce one segment to have a higher unique term count in all cases + writer.close(); + sort.setSort( + new SortField("string", SortField.STRING), + SortField.FIELD_DOC ); + // this should not throw AIOOBE or RuntimeEx + IndexSearcher searcher = new IndexSearcher(indexStore, true); + searcher.search(new MatchAllDocsQuery(), null, 500, sort); + searcher.close(); + indexStore.close(); + } + + public void testCountingCollector() throws Exception { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + for (int i=0; i<5; i++) { + Document doc = new Document(); + doc.add (new Field ("string", "a"+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add (new Field ("string", "b"+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); + writer.addDocument (doc); + } + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(reader); + TotalHitCountCollector c = new TotalHitCountCollector(); + searcher.search(new MatchAllDocsQuery(), null, c); + assertEquals(5, c.getTotalHits()); + searcher.close(); + reader.close(); + indexStore.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSpanQueryFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSpanQueryFilter.java new file mode 100644 index 0000000..f555dc2 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSpanQueryFilter.java @@ -0,0 +1,81 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSpanQueryFilter extends LuceneTestCase { + + public void testFilterWorks() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < 500; i++) { + Document document = new Document(); + document.add(newField("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i), + Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(document); + } + IndexReader reader = writer.getReader(); + writer.close(); + + SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(10).trim())); + SpanQueryFilter filter = new SpanQueryFilter(query); + SpanFilterResult result = filter.bitSpans(reader); + DocIdSet docIdSet = result.getDocIdSet(); + assertTrue("docIdSet is null and it shouldn't be", docIdSet != null); + assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); + List spans = result.getPositions(); + assertTrue("spans is null and it shouldn't be", spans != null); + int size = getDocIdSetSize(docIdSet); + assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size); + for (final SpanFilterResult.PositionInfo info: spans) { + assertTrue("info is null and it shouldn't be", info != null); + //The doc should indicate the bit is on + assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc()); + //There should be two positions in each + assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2); + } + reader.close(); + dir.close(); + } + + int getDocIdSetSize(DocIdSet docIdSet) throws Exception { + int size = 0; + DocIdSetIterator it = docIdSet.iterator(); + while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + size++; + } + return size; + } + + public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception { + DocIdSetIterator it = docIdSet.iterator(); + assertTrue(msg, it.advance(docId) != DocIdSetIterator.NO_MORE_DOCS); + assertTrue(msg, it.docID() == docId); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSubScorerFreqs.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSubScorerFreqs.java new file mode 100644 index 0000000..46d7ed9 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestSubScorerFreqs.java @@ -0,0 +1,226 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import java.io.*; +import java.util.*; +import org.apache.lucene.document.*; +import org.apache.lucene.index.*; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.Scorer.ScorerVisitor; +import org.apache.lucene.store.*; +import org.apache.lucene.util.*; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestSubScorerFreqs extends LuceneTestCase { + + private static Directory dir; + private static IndexSearcher s; + + @BeforeClass + public static void makeIndex() throws Exception { + dir = new RAMDirectory(); + RandomIndexWriter w = new RandomIndexWriter( + random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + // make sure we have more than one segment occationally + int num = atLeast(31); + for (int i = 0; i < num; i++) { + Document doc = new Document(); + doc.add(newField("f", "a b c d b c d c d d", Field.Store.NO, + Field.Index.ANALYZED)); + w.addDocument(doc); + + doc = new Document(); + doc.add(newField("f", "a b c d", Field.Store.NO, Field.Index.ANALYZED)); + w.addDocument(doc); + } + + s = newSearcher(w.getReader()); + w.close(); + } + + @AfterClass + public static void finish() throws Exception { + s.getIndexReader().close(); + s.close(); + s = null; + dir.close(); + dir = null; + } + + private static class CountingCollector extends Collector { + private final Collector other; + private int docBase; + + public final Map> docCounts = new HashMap>(); + + private final Map subScorers = new HashMap(); + private final ScorerVisitor visitor = new MockScorerVisitor(); + private final EnumSet collect; + + private class MockScorerVisitor extends ScorerVisitor { + + @Override + public void visitOptional(Query parent, Query child, Scorer scorer) { + if (collect.contains(Occur.SHOULD)) + subScorers.put(child, scorer); + } + + @Override + public void visitProhibited(Query parent, Query child, Scorer scorer) { + if (collect.contains(Occur.MUST_NOT)) + subScorers.put(child, scorer); + } + + @Override + public void visitRequired(Query parent, Query child, Scorer scorer) { + if (collect.contains(Occur.MUST)) + subScorers.put(child, scorer); + } + + } + + public CountingCollector(Collector other) { + this(other, EnumSet.allOf(Occur.class)); + } + + public CountingCollector(Collector other, EnumSet collect) { + this.other = other; + this.collect = collect; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + other.setScorer(scorer); + scorer.visitScorers(visitor); + } + + @Override + public void collect(int doc) throws IOException { + final Map freqs = new HashMap(); + for (Map.Entry ent : subScorers.entrySet()) { + Scorer value = ent.getValue(); + int matchId = value.docID(); + freqs.put(ent.getKey(), matchId == doc ? value.freq() : 0.0f); + } + docCounts.put(doc + docBase, freqs); + other.collect(doc); + } + + @Override + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + this.docBase = docBase; + other.setNextReader(reader, docBase); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return other.acceptsDocsOutOfOrder(); + } + } + + private static final float FLOAT_TOLERANCE = 0.00001F; + + @Test + public void testTermQuery() throws Exception { + TermQuery q = new TermQuery(new Term("f", "d")); + CountingCollector c = new CountingCollector(TopScoreDocCollector.create(10, + true)); + s.search(q, null, c); + final int maxDocs = s.maxDoc(); + assertEquals(maxDocs, c.docCounts.size()); + for (int i = 0; i < maxDocs; i++) { + Map doc0 = c.docCounts.get(i); + assertEquals(1, doc0.size()); + assertEquals(4.0F, doc0.get(q), FLOAT_TOLERANCE); + + Map doc1 = c.docCounts.get(++i); + assertEquals(1, doc1.size()); + assertEquals(1.0F, doc1.get(q), FLOAT_TOLERANCE); + } + } + + @SuppressWarnings("unchecked") + @Test + public void testBooleanQuery() throws Exception { + TermQuery aQuery = new TermQuery(new Term("f", "a")); + TermQuery dQuery = new TermQuery(new Term("f", "d")); + TermQuery cQuery = new TermQuery(new Term("f", "c")); + TermQuery yQuery = new TermQuery(new Term("f", "y")); + + BooleanQuery query = new BooleanQuery(); + BooleanQuery inner = new BooleanQuery(); + + inner.add(cQuery, Occur.SHOULD); + inner.add(yQuery, Occur.MUST_NOT); + query.add(inner, Occur.MUST); + query.add(aQuery, Occur.MUST); + query.add(dQuery, Occur.MUST); + EnumSet[] occurList = new EnumSet[] {EnumSet.of(Occur.MUST), EnumSet.of(Occur.MUST, Occur.SHOULD)}; + for (EnumSet occur : occurList) { + CountingCollector c = new CountingCollector(TopScoreDocCollector.create( + 10, true), occur); + s.search(query, null, c); + final int maxDocs = s.maxDoc(); + assertEquals(maxDocs, c.docCounts.size()); + boolean includeOptional = occur.contains(Occur.SHOULD); + for (int i = 0; i < maxDocs; i++) { + Map doc0 = c.docCounts.get(i); + assertEquals(includeOptional ? 5 : 4, doc0.size()); + assertEquals(1.0F, doc0.get(aQuery), FLOAT_TOLERANCE); + assertEquals(4.0F, doc0.get(dQuery), FLOAT_TOLERANCE); + if (includeOptional) + assertEquals(3.0F, doc0.get(cQuery), FLOAT_TOLERANCE); + + Map doc1 = c.docCounts.get(++i); + assertEquals(includeOptional ? 5 : 4, doc1.size()); + assertEquals(1.0F, doc1.get(aQuery), FLOAT_TOLERANCE); + assertEquals(1.0F, doc1.get(dQuery), FLOAT_TOLERANCE); + if (includeOptional) + assertEquals(1.0F, doc1.get(cQuery), FLOAT_TOLERANCE); + + } + } + } + + @Test + public void testPhraseQuery() throws Exception { + PhraseQuery q = new PhraseQuery(); + q.add(new Term("f", "b")); + q.add(new Term("f", "c")); + CountingCollector c = new CountingCollector(TopScoreDocCollector.create(10, + true)); + s.search(q, null, c); + final int maxDocs = s.maxDoc(); + assertEquals(maxDocs, c.docCounts.size()); + for (int i = 0; i < maxDocs; i++) { + Map doc0 = c.docCounts.get(i); + assertEquals(1, doc0.size()); + assertEquals(2.0F, doc0.get(q), FLOAT_TOLERANCE); + + Map doc1 = c.docCounts.get(++i); + assertEquals(1, doc1.size()); + assertEquals(1.0F, doc1.get(q), FLOAT_TOLERANCE); + } + + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermRangeFilter.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermRangeFilter.java new file mode 100644 index 0000000..03b2b06 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermRangeFilter.java @@ -0,0 +1,488 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; +import java.util.Locale; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.store.Directory; +import org.junit.Test; + +/** + * A basic 'positive' Unit test class for the TermRangeFilter class. + * + *

+ * NOTE: at the moment, this class only tests for 'positive' results, it does + * not verify the results to ensure there are no 'false positives', nor does it + * adequately test 'negative' results. It also does not test that garbage in + * results in an Exception. + */ +public class TestTermRangeFilter extends BaseTestRangeFilter { + + @Test + public void testRangeFilterId() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body", "body")); + + // test id, bounded on both ends + + result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T), + numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F), + numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + + result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T), + numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + + result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F), + numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + + result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T), + numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + + result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T), + numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + + // unbounded id + + result = search.search(q, new TermRangeFilter("id", minIP, null, T, F), + numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T), + numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(q, new TermRangeFilter("id", minIP, null, F, F), + numDocs).scoreDocs; + assertEquals("not min, but up", numDocs - 1, result.length); + + result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F), + numDocs).scoreDocs; + assertEquals("not max, but down", numDocs - 1, result.length); + + result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F), + numDocs).scoreDocs; + assertEquals("med and up, not max", maxId - medId, result.length); + + result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T), + numDocs).scoreDocs; + assertEquals("not min, up to med", medId - minId, result.length); + + // very small sets + + result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F), + numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F), + numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F), + numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T), + numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q, new TermRangeFilter("id", null, minIP, F, T), + numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T), + numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F), + numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T), + numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + search.close(); + } + + @Test + public void testRangeFilterIdCollating() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + Collator c = Collator.getInstance(Locale.ENGLISH); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + Query q = new TermQuery(new Term("body", "body")); + + // test id, bounded on both ends + int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, + T, c), 1000).totalHits; + assertEquals("find all", numDocs, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits; + assertEquals("all but last", numDocs - 1, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits; + assertEquals("all but first", numDocs - 1, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits; + assertEquals("all but ends", numDocs - 2, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits; + assertEquals("med and up", 1 + maxId - medId, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits; + assertEquals("up to med", 1 + medId - minId, numHits); + + // unbounded id + + numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c), + 1000).totalHits; + assertEquals("min and up", numDocs, numHits); + + numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c), + 1000).totalHits; + assertEquals("max and down", numDocs, numHits); + + numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c), + 1000).totalHits; + assertEquals("not min, but up", numDocs - 1, numHits); + + numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c), + 1000).totalHits; + assertEquals("not max, but down", numDocs - 1, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits; + assertEquals("med and up, not max", maxId - medId, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits; + assertEquals("not min, up to med", medId - minId, numHits); + + // very small sets + + numHits = search.search(q, + new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits; + assertEquals("min,min,F,F", 0, numHits); + numHits = search.search(q, + new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits; + assertEquals("med,med,F,F", 0, numHits); + numHits = search.search(q, + new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits; + assertEquals("max,max,F,F", 0, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits; + assertEquals("min,min,T,T", 1, numHits); + numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c), + 1000).totalHits; + assertEquals("nul,min,F,T", 1, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits; + assertEquals("max,max,T,T", 1, numHits); + numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c), + 1000).totalHits; + assertEquals("max,nul,T,T", 1, numHits); + + numHits = search.search(q, + new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits; + assertEquals("med,med,T,T", 1, numHits); + + search.close(); + } + + @Test + public void testRangeFilterRand() throws IOException { + + IndexReader reader = signedIndexReader; + IndexSearcher search = newSearcher(reader); + + String minRP = pad(signedIndexDir.minR); + String maxRP = pad(signedIndexDir.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + Query q = new TermQuery(new Term("body", "body")); + + // test extremes, bounded on both ends + + result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T), + numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F), + numDocs).scoreDocs; + assertEquals("all but biggest", numDocs - 1, result.length); + + result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T), + numDocs).scoreDocs; + assertEquals("all but smallest", numDocs - 1, result.length); + + result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F), + numDocs).scoreDocs; + assertEquals("all but extremes", numDocs - 2, result.length); + + // unbounded + + result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F), + numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T), + numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F), + numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs - 1, result.length); + + result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F), + numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs - 1, result.length); + + // very small sets + + result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F), + numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F), + numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T), + numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T), + numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T), + numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F), + numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + search.close(); + } + + @Test + public void testRangeFilterRandCollating() throws IOException { + + // using the unsigned index because collation seems to ignore hyphens + IndexReader reader = unsignedIndexReader; + IndexSearcher search = newSearcher(reader); + + Collator c = Collator.getInstance(Locale.ENGLISH); + + String minRP = pad(unsignedIndexDir.minR); + String maxRP = pad(unsignedIndexDir.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + Query q = new TermQuery(new Term("body", "body")); + + // test extremes, bounded on both ends + + int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, + T, c), 1000).totalHits; + assertEquals("find all", numDocs, numHits); + + numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F, + c), 1000).totalHits; + assertEquals("all but biggest", numDocs - 1, numHits); + + numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T, + c), 1000).totalHits; + assertEquals("all but smallest", numDocs - 1, numHits); + + numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F, + c), 1000).totalHits; + assertEquals("all but extremes", numDocs - 2, numHits); + + // unbounded + + numHits = search.search(q, + new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits; + assertEquals("smallest and up", numDocs, numHits); + + numHits = search.search(q, + new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits; + assertEquals("biggest and down", numDocs, numHits); + + numHits = search.search(q, + new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits; + assertEquals("not smallest, but up", numDocs - 1, numHits); + + numHits = search.search(q, + new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits; + assertEquals("not biggest, but down", numDocs - 1, numHits); + + // very small sets + + numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F, + c), 1000).totalHits; + assertEquals("min,min,F,F", 0, numHits); + numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F, + c), 1000).totalHits; + assertEquals("max,max,F,F", 0, numHits); + + numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T, + c), 1000).totalHits; + assertEquals("min,min,T,T", 1, numHits); + numHits = search.search(q, + new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits; + assertEquals("nul,min,F,T", 1, numHits); + + numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T, + c), 1000).totalHits; + assertEquals("max,max,T,T", 1, numHits); + numHits = search.search(q, + new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits; + assertEquals("max,nul,T,T", 1, numHits); + + search.close(); + } + + @Test + public void testFarsi() throws Exception { + + /* build an index */ + Directory farsiIndex = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex); + Document doc = new Document(); + doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc + .add(newField("body", "body", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher search = newSearcher(reader); + Query q = new TermQuery(new Term("body", "body")); + + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + Collator collator = Collator.getInstance(new Locale("ar")); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a TermRangeFilter with a Farsi + // Collator (or an Arabic one for the case when Farsi is not supported). + int numHits = search.search(q, new TermRangeFilter("content", "\u062F", + "\u0698", T, T, collator), 1000).totalHits; + assertEquals("The index Term should not be included.", 0, numHits); + + numHits = search.search(q, new TermRangeFilter("content", "\u0633", + "\u0638", T, T, collator), 1000).totalHits; + assertEquals("The index Term should be included.", 1, numHits); + search.close(); + reader.close(); + farsiIndex.close(); + } + + @Test + public void testDanish() throws Exception { + + /* build an index */ + Directory danishIndex = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex); + // Danish collation orders the words below in the given order + // (example taken from TestSort.testInternationalSort() ). + String[] words = {"H\u00D8T", "H\u00C5T", "MAND"}; + for (int docnum = 0; docnum < words.length; ++docnum) { + Document doc = new Document(); + doc.add(newField("content", words[docnum], Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc.add(newField("body", "body", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + } + IndexReader reader = writer.getReader(); + writer.close(); + + IndexSearcher search = newSearcher(reader); + Query q = new TermQuery(new Term("body", "body")); + + Collator collator = Collator.getInstance(new Locale("da", "dk")); + + // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], + // but Danish collation does. + int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T", + "MAND", F, F, collator), 1000).totalHits; + assertEquals("The index Term should be included.", 1, numHits); + + numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T", + "MAND", F, F, collator), 1000).totalHits; + assertEquals("The index Term should not be included.", 0, numHits); + search.close(); + reader.close(); + danishIndex.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermRangeQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermRangeQuery.java new file mode 100644 index 0000000..bb11751 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermRangeQuery.java @@ -0,0 +1,410 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.io.Reader; +import java.util.Locale; +import java.util.Set; +import java.util.HashSet; +import java.util.Arrays; +import java.text.Collator; + + +public class TestTermRangeQuery extends LuceneTestCase { + + private int docCount = 0; + private Directory dir; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + public void testExclusive() throws Exception { + Query query = new TermRangeQuery("content", "A", "C", false, false); + initializeIndex(new String[] {"A", "B", "C", "D"}); + IndexSearcher searcher = new IndexSearcher(dir, true); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("A,B,C,D, only B in range", 1, hits.length); + searcher.close(); + + initializeIndex(new String[] {"A", "B", "D"}); + searcher = new IndexSearcher(dir, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("A,B,D, only B in range", 1, hits.length); + searcher.close(); + + addDoc("C"); + searcher = new IndexSearcher(dir, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("C added, still only B in range", 1, hits.length); + searcher.close(); + } + + public void testInclusive() throws Exception { + Query query = new TermRangeQuery("content", "A", "C", true, true); + + initializeIndex(new String[]{"A", "B", "C", "D"}); + IndexSearcher searcher = new IndexSearcher(dir, true); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("A,B,C,D - A,B,C in range", 3, hits.length); + searcher.close(); + + initializeIndex(new String[]{"A", "B", "D"}); + searcher = new IndexSearcher(dir, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("A,B,D - A and B in range", 2, hits.length); + searcher.close(); + + addDoc("C"); + searcher = new IndexSearcher(dir, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("C added - A, B, C in range", 3, hits.length); + searcher.close(); + } + + /** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE) + * with constant score and checks, that only the lower end of terms is put into the range */ + public void testTopTermsRewrite() throws Exception { + initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"}); + + IndexSearcher searcher = new IndexSearcher(dir, true); + TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true); + checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J"); + + final int savedClauseCount = BooleanQuery.getMaxClauseCount(); + try { + BooleanQuery.setMaxClauseCount(3); + checkBooleanTerms(searcher, query, "B", "C", "D"); + } finally { + BooleanQuery.setMaxClauseCount(savedClauseCount); + } + searcher.close(); + } + + private void checkBooleanTerms(Searcher searcher, TermRangeQuery query, String... terms) throws IOException { + query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50)); + final BooleanQuery bq = (BooleanQuery) searcher.rewrite(query); + final Set allowedTerms = new HashSet(Arrays.asList(terms)); + assertEquals(allowedTerms.size(), bq.clauses().size()); + for (BooleanClause c : bq.clauses()) { + assertTrue(c.getQuery() instanceof TermQuery); + final TermQuery tq = (TermQuery) c.getQuery(); + final String term = tq.getTerm().text(); + assertTrue("invalid term: "+ term, allowedTerms.contains(term)); + allowedTerms.remove(term); // remove to fail on double terms + } + assertEquals(0, allowedTerms.size()); + } + + public void testEqualsHashcode() { + Query query = new TermRangeQuery("content", "A", "C", true, true); + + query.setBoost(1.0f); + Query other = new TermRangeQuery("content", "A", "C", true, true); + other.setBoost(1.0f); + + assertEquals("query equals itself is true", query, query); + assertEquals("equivalent queries are equal", query, other); + assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); + + other.setBoost(2.0f); + assertFalse("Different boost queries are not equal", query.equals(other)); + + other = new TermRangeQuery("notcontent", "A", "C", true, true); + assertFalse("Different fields are not equal", query.equals(other)); + + other = new TermRangeQuery("content", "X", "C", true, true); + assertFalse("Different lower terms are not equal", query.equals(other)); + + other = new TermRangeQuery("content", "A", "Z", true, true); + assertFalse("Different upper terms are not equal", query.equals(other)); + + query = new TermRangeQuery("content", null, "C", true, true); + other = new TermRangeQuery("content", null, "C", true, true); + assertEquals("equivalent queries with null lowerterms are equal()", query, other); + assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); + + query = new TermRangeQuery("content", "C", null, true, true); + other = new TermRangeQuery("content", "C", null, true, true); + assertEquals("equivalent queries with null upperterms are equal()", query, other); + assertEquals("hashcode returns same value", query.hashCode(), other.hashCode()); + + query = new TermRangeQuery("content", null, "C", true, true); + other = new TermRangeQuery("content", "C", null, true, true); + assertFalse("queries with different upper and lower terms are not equal", query.equals(other)); + + query = new TermRangeQuery("content", "A", "C", false, false); + other = new TermRangeQuery("content", "A", "C", true, true); + assertFalse("queries with different inclusive are not equal", query.equals(other)); + + query = new TermRangeQuery("content", "A", "C", false, false); + other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance()); + assertFalse("a query with a collator is not equal to one without", query.equals(other)); + } + + public void testExclusiveCollating() throws Exception { + Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); + initializeIndex(new String[] {"A", "B", "C", "D"}); + IndexSearcher searcher = new IndexSearcher(dir, true); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("A,B,C,D, only B in range", 1, hits.length); + searcher.close(); + + initializeIndex(new String[] {"A", "B", "D"}); + searcher = new IndexSearcher(dir, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("A,B,D, only B in range", 1, hits.length); + searcher.close(); + + addDoc("C"); + searcher = new IndexSearcher(dir, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("C added, still only B in range", 1, hits.length); + searcher.close(); + } + + public void testInclusiveCollating() throws Exception { + Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); + + initializeIndex(new String[]{"A", "B", "C", "D"}); + IndexSearcher searcher = new IndexSearcher(dir, true); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("A,B,C,D - A,B,C in range", 3, hits.length); + searcher.close(); + + initializeIndex(new String[]{"A", "B", "D"}); + searcher = new IndexSearcher(dir, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("A,B,D - A and B in range", 2, hits.length); + searcher.close(); + + addDoc("C"); + searcher = new IndexSearcher(dir, true); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("C added - A, B, C in range", 3, hits.length); + searcher.close(); + } + + public void testFarsi() throws Exception { + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + Collator collator = Collator.getInstance(new Locale("ar")); + Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator); + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a TermRangeQuery with a Farsi + // Collator (or an Arabic one for the case when Farsi is not supported). + initializeIndex(new String[]{ "\u0633\u0627\u0628"}); + IndexSearcher searcher = new IndexSearcher(dir, true); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, hits.length); + + query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, hits.length); + searcher.close(); + } + + public void testDanish() throws Exception { + Collator collator = Collator.getInstance(new Locale("da", "dk")); + // Danish collation orders the words below in the given order (example taken + // from TestSort.testInternationalSort() ). + String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; + Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator); + + // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], + // but Danish collation does. + initializeIndex(words); + IndexSearcher searcher = new IndexSearcher(dir, true); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, hits.length); + + query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, hits.length); + searcher.close(); + } + + private static class SingleCharAnalyzer extends Analyzer { + + private static class SingleCharTokenizer extends Tokenizer { + char[] buffer = new char[1]; + boolean done = false; + CharTermAttribute termAtt; + + public SingleCharTokenizer(Reader r) { + super(r); + termAtt = addAttribute(CharTermAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (done) + return false; + else { + int count = input.read(buffer); + clearAttributes(); + done = true; + if (count == 1) { + termAtt.copyBuffer(buffer, 0, 1); + } + return true; + } + } + + @Override + public final void reset(Reader reader) throws IOException { + super.reset(reader); + done = false; + } + } + + @Override + public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { + Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); + if (tokenizer == null) { + tokenizer = new SingleCharTokenizer(reader); + setPreviousTokenStream(tokenizer); + } else + tokenizer.reset(reader); + return tokenizer; + } + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new SingleCharTokenizer(reader); + } + } + + private void initializeIndex(String[] values) throws IOException { + initializeIndex(values, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); + } + + private void initializeIndex(String[] values, Analyzer analyzer) throws IOException { + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE)); + for (int i = 0; i < values.length; i++) { + insertDoc(writer, values[i]); + } + writer.close(); + } + + // shouldnt create an analyzer for every doc? + private void addDoc(String content) throws IOException { + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND)); + insertDoc(writer, content); + writer.close(); + } + + private void insertDoc(IndexWriter writer, String content) throws IOException { + Document doc = new Document(); + + doc.add(newField("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("content", content, Field.Store.NO, Field.Index.ANALYZED)); + + writer.addDocument(doc); + docCount++; + } + + // LUCENE-38 + public void testExclusiveLowerNull() throws Exception { + Analyzer analyzer = new SingleCharAnalyzer(); + //http://issues.apache.org/jira/browse/LUCENE-38 + Query query = new TermRangeQuery("content", null, "C", + false, false); + initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer); + IndexSearcher searcher = new IndexSearcher(dir, true); + int numHits = searcher.search(query, null, 1000).totalHits; + // When Lucene-38 is fixed, use the assert on the next line: + assertEquals("A,B,,C,D => A, B & are in range", 3, numHits); + // until Lucene-38 is fixed, use this assert: + //assertEquals("A,B,,C,D => A, B & are in range", 2, hits.length()); + + searcher.close(); + initializeIndex(new String[] {"A", "B", "", "D"}, analyzer); + searcher = new IndexSearcher(dir, true); + numHits = searcher.search(query, null, 1000).totalHits; + // When Lucene-38 is fixed, use the assert on the next line: + assertEquals("A,B,,D => A, B & are in range", 3, numHits); + // until Lucene-38 is fixed, use this assert: + //assertEquals("A,B,,D => A, B & are in range", 2, hits.length()); + searcher.close(); + addDoc("C"); + searcher = new IndexSearcher(dir, true); + numHits = searcher.search(query, null, 1000).totalHits; + // When Lucene-38 is fixed, use the assert on the next line: + assertEquals("C added, still A, B & are in range", 3, numHits); + // until Lucene-38 is fixed, use this assert + //assertEquals("C added, still A, B & are in range", 2, hits.length()); + searcher.close(); + } + + // LUCENE-38 + public void testInclusiveLowerNull() throws Exception { + //http://issues.apache.org/jira/browse/LUCENE-38 + Analyzer analyzer = new SingleCharAnalyzer(); + Query query = new TermRangeQuery("content", null, "C", true, true); + initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer); + IndexSearcher searcher = new IndexSearcher(dir, true); + int numHits = searcher.search(query, null, 1000).totalHits; + // When Lucene-38 is fixed, use the assert on the next line: + assertEquals("A,B,,C,D => A,B,,C in range", 4, numHits); + // until Lucene-38 is fixed, use this assert + //assertEquals("A,B,,C,D => A,B,,C in range", 3, hits.length()); + searcher.close(); + initializeIndex(new String[]{"A", "B", "", "D"}, analyzer); + searcher = new IndexSearcher(dir, true); + numHits = searcher.search(query, null, 1000).totalHits; + // When Lucene-38 is fixed, use the assert on the next line: + assertEquals("A,B,,D - A, B and in range", 3, numHits); + // until Lucene-38 is fixed, use this assert + //assertEquals("A,B,,D => A, B and in range", 2, hits.length()); + searcher.close(); + addDoc("C"); + searcher = new IndexSearcher(dir, true); + numHits = searcher.search(query, null, 1000).totalHits; + // When Lucene-38 is fixed, use the assert on the next line: + assertEquals("C added => A,B,,C in range", 4, numHits); + // until Lucene-38 is fixed, use this assert + //assertEquals("C added => A,B,,C in range", 3, hits.length()); + searcher.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermScorer.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermScorer.java new file mode 100644 index 0000000..778d9ed --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermScorer.java @@ -0,0 +1,182 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestTermScorer extends LuceneTestCase { + protected Directory directory; + private static final String FIELD = "field"; + + protected String[] values = new String[] {"all", "dogs dogs", "like", + "playing", "fetch", "all"}; + protected IndexSearcher indexSearcher; + protected IndexReader indexReader; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < values.length; i++) { + Document doc = new Document(); + doc + .add(newField(FIELD, values[i], Field.Store.YES, + Field.Index.ANALYZED)); + writer.addDocument(doc); + } + writer.optimize(); + indexReader = writer.getReader(); + writer.close(); + indexSearcher = newSearcher(indexReader); + } + + @Override + public void tearDown() throws Exception { + indexSearcher.close(); + indexReader.close(); + directory.close(); + super.tearDown(); + } + + public void test() throws IOException { + + Term allTerm = new Term(FIELD, "all"); + TermQuery termQuery = new TermQuery(allTerm); + + Weight weight = indexSearcher.createNormalizedWeight(termQuery); + IndexReader sub = indexSearcher.getIndexReader().getSequentialSubReaders() == null ? + indexSearcher.getIndexReader() : indexSearcher.getIndexReader().getSequentialSubReaders()[0]; + Scorer ts = weight.scorer(sub, true, true); + // we have 2 documents with the term all in them, one document for all the + // other values + final List docs = new ArrayList(); + // must call next first + + ts.score(new Collector() { + private int base = 0; + private Scorer scorer; + + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + @Override + public void collect(int doc) throws IOException { + float score = scorer.score(); + doc = doc + base; + docs.add(new TestHit(doc, score)); + assertTrue("score " + score + " is not greater than 0", score > 0); + assertTrue("Doc: " + doc + " does not equal 0 or doc does not equal 5", + doc == 0 || doc == 5); + } + + @Override + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + assertTrue("docs Size: " + docs.size() + " is not: " + 2, docs.size() == 2); + TestHit doc0 = docs.get(0); + TestHit doc5 = docs.get(1); + // The scores should be the same + assertTrue(doc0.score + " does not equal: " + doc5.score, + doc0.score == doc5.score); + /* + * Score should be (based on Default Sim.: All floats are approximate tf = 1 + * numDocs = 6 docFreq(all) = 2 idf = ln(6/3) + 1 = 1.693147 idf ^ 2 = + * 2.8667 boost = 1 lengthNorm = 1 //there is 1 term in every document coord + * = 1 sumOfSquaredWeights = (idf * boost) ^ 2 = 1.693147 ^ 2 = 2.8667 + * queryNorm = 1 / (sumOfSquaredWeights)^0.5 = 1 /(1.693147) = 0.590 + * + * score = 1 * 2.8667 * 1 * 1 * 0.590 = 1.69 + */ + assertTrue(doc0.score + " does not equal: " + 1.6931472f, + doc0.score == 1.6931472f); + } + + public void testNext() throws Exception { + + Term allTerm = new Term(FIELD, "all"); + TermQuery termQuery = new TermQuery(allTerm); + + Weight weight = indexSearcher.createNormalizedWeight(termQuery); + + IndexReader sub = indexSearcher.getIndexReader().getSequentialSubReaders() == null ? + indexSearcher.getIndexReader() : indexSearcher.getIndexReader().getSequentialSubReaders()[0]; + Scorer ts = weight.scorer(sub, true, true); + assertTrue("next did not return a doc", + ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertTrue("score is not correct", ts.score() == 1.6931472f); + assertTrue("next did not return a doc", + ts.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertTrue("score is not correct", ts.score() == 1.6931472f); + assertTrue("next returned a doc and it should not have", + ts.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); + } + + public void testAdvance() throws Exception { + + Term allTerm = new Term(FIELD, "all"); + TermQuery termQuery = new TermQuery(allTerm); + + Weight weight = indexSearcher.createNormalizedWeight(termQuery); + + IndexReader sub = indexSearcher.getIndexReader().getSequentialSubReaders() == null ? + indexSearcher.getIndexReader() : indexSearcher.getIndexReader().getSequentialSubReaders()[0]; + + Scorer ts = weight.scorer(sub, true, true); + assertTrue("Didn't skip", ts.advance(3) != DocIdSetIterator.NO_MORE_DOCS); + // The next doc should be doc 5 + assertTrue("doc should be number 5", ts.docID() == 5); + } + + private class TestHit { + public int doc; + public float score; + + public TestHit(int doc, float score) { + this.doc = doc; + this.score = score; + } + + @Override + public String toString() { + return "TestHit{" + "doc=" + doc + ", score=" + score + "}"; + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermVectors.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermVectors.java new file mode 100644 index 0000000..e83f045 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTermVectors.java @@ -0,0 +1,446 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.*; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.SortedSet; + +public class TestTermVectors extends LuceneTestCase { + private IndexSearcher searcher; + private IndexReader reader; + private Directory directory; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy())); + //writer.setUseCompoundFile(true); + //writer.infoStream = System.out; + for (int i = 0; i < 1000; i++) { + Document doc = new Document(); + Field.TermVector termVector; + int mod3 = i % 3; + int mod2 = i % 2; + if (mod2 == 0 && mod3 == 0){ + termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; + } + else if (mod2 == 0){ + termVector = Field.TermVector.WITH_POSITIONS; + } + else if (mod3 == 0){ + termVector = Field.TermVector.WITH_OFFSETS; + } + else { + termVector = Field.TermVector.YES; + } + doc.add(new Field("field", English.intToEnglish(i), + Field.Store.YES, Field.Index.ANALYZED, termVector)); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + public void test() { + assertTrue(searcher != null); + } + + public void testTermVectors() { + Query query = new TermQuery(new Term("field", "seventy")); + try { + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(100, hits.length); + + for (int i = 0; i < hits.length; i++) + { + TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits[i].doc); + assertTrue(vector != null); + assertTrue(vector.length == 1); + } + } catch (IOException e) { + assertTrue(false); + } + } + + public void testTermVectorsFieldOrder() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(random, MockTokenizer.SIMPLE, true)); + Document doc = new Document(); + doc.add(new Field("c", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("a", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("b", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("x", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + writer.close(); + TermFreqVector[] v = reader.getTermFreqVectors(0); + assertEquals(4, v.length); + String[] expectedFields = new String[]{"a", "b", "c", "x"}; + int[] expectedPositions = new int[]{1, 2, 0}; + for(int i=0;i 0); + + for (int j = 0; j < terms.length; j++) { + int [] positions = posVec.getTermPositions(j); + TermVectorOffsetInfo [] offsets = posVec.getOffsets(j); + + if(shouldBePosVector){ + assertTrue(positions != null); + assertTrue(positions.length > 0); + } + else + assertTrue(positions == null); + + if(shouldBeOffVector){ + assertTrue(offsets != null); + assertTrue(offsets.length > 0); + } + else + assertTrue(offsets == null); + } + } + else{ + try{ + assertTrue(false); + } + catch(ClassCastException ignore){ + TermFreqVector freqVec = vector[0]; + String [] terms = freqVec.getTerms(); + assertTrue(terms != null && terms.length > 0); + } + + } + + } + } + + public void testTermOffsetVectors() { + Query query = new TermQuery(new Term("field", "fifty")); + try { + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(100, hits.length); + + for (int i = 0; i < hits.length; i++) + { + TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits[i].doc); + assertTrue(vector != null); + assertTrue(vector.length == 1); + + //assertTrue(); + } + } catch (IOException e) { + assertTrue(false); + } + } + + public void testKnownSetOfDocuments() throws IOException { + String test1 = "eating chocolate in a computer lab"; //6 terms + String test2 = "computer in a computer lab"; //5 terms + String test3 = "a chocolate lab grows old"; //5 terms + String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms + Map test4Map = new HashMap(); + test4Map.put("chocolate", Integer.valueOf(3)); + test4Map.put("lab", Integer.valueOf(2)); + test4Map.put("eating", Integer.valueOf(1)); + test4Map.put("computer", Integer.valueOf(1)); + test4Map.put("with", Integer.valueOf(1)); + test4Map.put("a", Integer.valueOf(1)); + test4Map.put("colored", Integer.valueOf(1)); + test4Map.put("in", Integer.valueOf(1)); + test4Map.put("an", Integer.valueOf(1)); + test4Map.put("computer", Integer.valueOf(1)); + test4Map.put("old", Integer.valueOf(1)); + + Document testDoc1 = new Document(); + setupDoc(testDoc1, test1); + Document testDoc2 = new Document(); + setupDoc(testDoc2, test2); + Document testDoc3 = new Document(); + setupDoc(testDoc3, test3); + Document testDoc4 = new Document(); + setupDoc(testDoc4, test4); + + Directory dir = newDirectory(); + + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.SIMPLE, true)) + .setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy())); + writer.addDocument(testDoc1); + writer.addDocument(testDoc2); + writer.addDocument(testDoc3); + writer.addDocument(testDoc4); + IndexReader reader = writer.getReader(); + writer.close(); + IndexSearcher knownSearcher = newSearcher(reader); + TermEnum termEnum = knownSearcher.reader.terms(); + TermDocs termDocs = knownSearcher.reader.termDocs(); + //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length); + + //Similarity sim = knownSearcher.getSimilarity(); + while (termEnum.next() == true) + { + Term term = termEnum.term(); + //System.out.println("Term: " + term); + termDocs.seek(term); + while (termDocs.next()) + { + int docId = termDocs.doc(); + int freq = termDocs.freq(); + //System.out.println("Doc Id: " + docId + " freq " + freq); + TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field"); + //float tf = sim.tf(freq); + //float idf = sim.idf(knownSearcher.docFreq(term), knownSearcher.maxDoc()); + //float qNorm = sim.queryNorm() + //This is fine since we don't have stop words + //float lNorm = sim.lengthNorm("field", vector.getTerms().length); + //float coord = sim.coord() + //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm); + assertTrue(vector != null); + String[] vTerms = vector.getTerms(); + int [] freqs = vector.getTermFrequencies(); + for (int i = 0; i < vTerms.length; i++) + { + if (term.text().equals(vTerms[i])) + { + assertTrue(freqs[i] == freq); + } + } + + } + //System.out.println("--------"); + } + Query query = new TermQuery(new Term("field", "chocolate")); + ScoreDoc[] hits = knownSearcher.search(query, null, 1000).scoreDocs; + //doc 3 should be the first hit b/c it is the shortest match + assertTrue(hits.length == 3); + /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString()); + System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0))); + System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString()); + System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1))); + System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " + hits.doc(2).toString()); + System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/ + assertTrue(hits[0].doc == 2); + assertTrue(hits[1].doc == 3); + assertTrue(hits[2].doc == 0); + TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits[1].doc, "field"); + assertTrue(vector != null); + //System.out.println("Vector: " + vector); + String[] terms = vector.getTerms(); + int [] freqs = vector.getTermFrequencies(); + assertTrue(terms != null && terms.length == 10); + for (int i = 0; i < terms.length; i++) { + String term = terms[i]; + //System.out.println("Term: " + term); + int freq = freqs[i]; + assertTrue(test4.indexOf(term) != -1); + Integer freqInt = test4Map.get(term); + assertTrue(freqInt != null); + assertTrue(freqInt.intValue() == freq); + } + SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); + knownSearcher.reader.getTermFreqVector(hits[1].doc, mapper); + SortedSet vectorEntrySet = mapper.getTermVectorEntrySet(); + assertTrue("mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.size() + " is not: " + 10, vectorEntrySet.size() == 10); + TermVectorEntry last = null; + for (final TermVectorEntry tve : vectorEntrySet) { + if (tve != null && last != null) + { + assertTrue("terms are not properly sorted", last.getFrequency() >= tve.getFrequency()); + Integer expectedFreq = test4Map.get(tve.getTerm()); + //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields + assertTrue("Frequency is not correct:", tve.getFrequency() == 2*expectedFreq.intValue()); + } + last = tve; + + } + + FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); + knownSearcher.reader.getTermFreqVector(hits[1].doc, fieldMapper); + Map> map = fieldMapper.getFieldToTerms(); + assertTrue("map Size: " + map.size() + " is not: " + 2, map.size() == 2); + vectorEntrySet = map.get("field"); + assertTrue("vectorEntrySet is null and it shouldn't be", vectorEntrySet != null); + assertTrue("vectorEntrySet Size: " + vectorEntrySet.size() + " is not: " + 10, vectorEntrySet.size() == 10); + knownSearcher.close(); + reader.close(); + dir.close(); + } + + private void setupDoc(Document doc, String text) + { + doc.add(new Field("field2", text, Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("field", text, Field.Store.YES, + Field.Index.ANALYZED, Field.TermVector.YES)); + //System.out.println("Document: " + doc); + } + + // Test only a few docs having vectors + public void testRareVectors() throws IOException { + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.SIMPLE, true)) + .setOpenMode(OpenMode.CREATE)); + writer.w.setInfoStream(VERBOSE ? System.out : null); + if (VERBOSE) { + System.out.println("TEST: now add non-vectors"); + } + for (int i = 0; i < 100; i++) { + Document doc = new Document(); + doc.add(new Field("field", English.intToEnglish(i), + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + writer.addDocument(doc); + } + if (VERBOSE) { + System.out.println("TEST: now add vectors"); + } + for(int i=0;i<10;i++) { + Document doc = new Document(); + doc.add(new Field("field", English.intToEnglish(100+i), + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + } + + if (VERBOSE) { + System.out.println("TEST: now getReader"); + } + IndexReader reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + + Query query = new TermQuery(new Term("field", "hundred")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(10, hits.length); + for (int i = 0; i < hits.length; i++) { + + TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits[i].doc); + assertTrue(vector != null); + assertTrue(vector.length == 1); + } + reader.close(); + } + + + // In a single doc, for the same field, mix the term + // vectors up + public void testMixedVectrosVectors() throws IOException { + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random, MockTokenizer.SIMPLE, true)).setOpenMode(OpenMode.CREATE)); + Document doc = new Document(); + doc.add(new Field("field", "one", + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(new Field("field", "one", + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.add(new Field("field", "one", + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); + doc.add(new Field("field", "one", + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); + doc.add(new Field("field", "one", + Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + writer.close(); + + searcher = newSearcher(reader); + + Query query = new TermQuery(new Term("field", "one")); + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals(1, hits.length); + + TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits[0].doc); + assertTrue(vector != null); + assertTrue(vector.length == 1); + TermPositionVector tfv = (TermPositionVector) vector[0]; + assertTrue(tfv.getField().equals("field")); + String[] terms = tfv.getTerms(); + assertEquals(1, terms.length); + assertEquals(terms[0], "one"); + assertEquals(5, tfv.getTermFrequencies()[0]); + + int[] positions = tfv.getTermPositions(0); + assertEquals(5, positions.length); + for(int i=0;i<5;i++) + assertEquals(i, positions[i]); + TermVectorOffsetInfo[] offsets = tfv.getOffsets(0); + assertEquals(5, offsets.length); + for(int i=0;i<5;i++) { + assertEquals(4*i, offsets[i].getStartOffset()); + assertEquals(4*i+3, offsets[i].getEndOffset()); + } + reader.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestThreadSafe.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestThreadSafe.java new file mode 100755 index 0000000..110bb94 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestThreadSafe.java @@ -0,0 +1,154 @@ +package org.apache.lucene.search; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.store.Directory; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.*; + +import java.util.Random; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import java.io.IOException; + +public class TestThreadSafe extends LuceneTestCase { + Directory dir1; + + IndexReader ir1; + + class Thr extends Thread { + final int iter; + final Random rand; + final AtomicBoolean failed; + + // pass in random in case we want to make things reproducable + public Thr(int iter, Random rand, AtomicBoolean failed) { + this.iter = iter; + this.rand = rand; + this.failed = failed; + } + + @Override + public void run() { + try { + for (int i=0; i fields = doc.getFields(); + for (final Fieldable f : fields ) { + validateField(f); + } + + } + + } + + + void validateField(Fieldable f) { + String val = f.stringValue(); + if (!val.startsWith("^") || !val.endsWith("$")) { + throw new RuntimeException("Invalid field:" + f.toString() + " val=" +val); + } + } + + String[] words = "now is the time for all good men to come to the aid of their country".split(" "); + + void buildDir(Directory dir, int nDocs, int maxFields, int maxFieldLen) throws IOException { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); + for (int j=0; j2000). + // but this is not a real failure, just noise. + private static final double MULTI_THREAD_SLACK = 7; + + private static final int N_DOCS = 3000; + private static final int N_THREADS = 50; + + private Searcher searcher; + private Directory directory; + private IndexReader reader; + + private final String FIELD_NAME = "body"; + private Query query; + + /** + * initializes searcher with a document set + */ + @Override + public void setUp() throws Exception { + super.setUp(); + final String docText[] = { + "docThatNeverMatchesSoWeCanRequireLastDocCollectedToBeGreaterThanZero", + "one blah three", + "one foo three multiOne", + "one foobar three multiThree", + "blueberry pancakes", + "blueberry pie", + "blueberry strudel", + "blueberry pizza", + }; + directory = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + + for (int i=0; i 0!", exceptionDoc > 0 ); + if (greedy) { + assertTrue("greedy="+greedy+" exceptionDoc="+exceptionDoc+" != lastCollected="+lastCollected, exceptionDoc==lastCollected); + assertTrue("greedy, but no hits found!", myHc.hitCount() > 0 ); + } else { + assertTrue("greedy="+greedy+" exceptionDoc="+exceptionDoc+" not > lastCollected="+lastCollected, exceptionDoc>lastCollected); + } + + // verify that elapsed time at exception is within valid limits + assertEquals( timoutException.getTimeAllowed(), TIME_ALLOWED); + // a) Not too early + assertTrue ( "elapsed="+timoutException.getTimeElapsed()+" <= (allowed-resolution)="+(TIME_ALLOWED-TimeLimitingCollector.getResolution()), + timoutException.getTimeElapsed() > TIME_ALLOWED-TimeLimitingCollector.getResolution()); + // b) Not too late. + // This part is problematic in a busy test system, so we just print a warning. + // We already verified that a timeout occurred, we just can't be picky about how long it took. + if (timoutException.getTimeElapsed() > maxTime(multiThreaded)) { + System.out.println("Informative: timeout exceeded (no action required: most probably just " + + " because the test machine is slower than usual): " + + "lastDoc="+exceptionDoc+ + " ,&& allowed="+timoutException.getTimeAllowed() + + " ,&& elapsed="+timoutException.getTimeElapsed() + + " >= " + maxTimeStr(multiThreaded)); + } + } + + private long maxTime(boolean multiThreaded) { + long res = 2 * TimeLimitingCollector.getResolution() + TIME_ALLOWED + SLOW_DOWN; // some slack for less noise in this test + if (multiThreaded) { + res *= MULTI_THREAD_SLACK; // larger slack + } + return res; + } + + private String maxTimeStr(boolean multiThreaded) { + String s = + "( " + + "2*resolution + TIME_ALLOWED + SLOW_DOWN = " + + "2*" + TimeLimitingCollector.getResolution() + " + " + TIME_ALLOWED + " + " + SLOW_DOWN + + ")"; + if (multiThreaded) { + s = MULTI_THREAD_SLACK + " * "+s; + } + return maxTime(multiThreaded) + " = " + s; + } + + /** + * Test timeout behavior when resolution is modified. + */ + public void testModifyResolution() { + try { + // increase and test + long resolution = 20 * TimeLimitingCollector.DEFAULT_RESOLUTION; //400 + TimeLimitingCollector.setResolution(resolution); + assertEquals(resolution, TimeLimitingCollector.getResolution()); + doTestTimeout(false,true); + // decrease much and test + resolution = 5; + TimeLimitingCollector.setResolution(resolution); + assertEquals(resolution, TimeLimitingCollector.getResolution()); + doTestTimeout(false,true); + // return to default and test + resolution = TimeLimitingCollector.DEFAULT_RESOLUTION; + TimeLimitingCollector.setResolution(resolution); + assertEquals(resolution, TimeLimitingCollector.getResolution()); + doTestTimeout(false,true); + } finally { + TimeLimitingCollector.setResolution(TimeLimitingCollector.DEFAULT_RESOLUTION); + } + } + + /** + * Test correctness with multiple searching threads. + */ + public void testSearchMultiThreaded() throws Exception { + doTestMultiThreads(false); + } + + /** + * Test correctness with multiple searching threads. + */ + public void testTimeoutMultiThreaded() throws Exception { + doTestMultiThreads(true); + } + + private void doTestMultiThreads(final boolean withTimeout) throws Exception { + Thread [] threadArray = new Thread[N_THREADS]; + final BitSet success = new BitSet(N_THREADS); + for( int i = 0; i < threadArray.length; ++i ) { + final int num = i; + threadArray[num] = new Thread() { + @Override + public void run() { + if (withTimeout) { + doTestTimeout(true,true); + } else { + doTestSearch(); + } + synchronized(success) { + success.set(num); + } + } + }; + } + for( int i = 0; i < threadArray.length; ++i ) { + threadArray[i].start(); + } + for( int i = 0; i < threadArray.length; ++i ) { + threadArray[i].join(); + } + assertEquals("some threads failed!", N_THREADS,success.cardinality()); + } + + // counting collector that can slow down at collect(). + private class MyHitCollector extends Collector { + private final BitSet bits = new BitSet(); + private int slowdown = 0; + private int lastDocCollected = -1; + private int docBase = 0; + + /** + * amount of time to wait on each collect to simulate a long iteration + */ + public void setSlowDown( int milliseconds ) { + slowdown = milliseconds; + } + + public int hitCount() { + return bits.cardinality(); + } + + public int getLastDocCollected() { + return lastDocCollected; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + // scorer is not needed + } + + @Override + public void collect(final int doc) throws IOException { + int docId = doc + docBase; + if( slowdown > 0 ) { + try { + Thread.sleep(slowdown); + } catch (InterruptedException ie) { + throw new ThreadInterruptedException(ie); + } + } + assert docId >= 0: " base=" + docBase + " doc=" + doc; + bits.set( docId ); + lastDocCollected = docId; + } + + @Override + public void setNextReader(IndexReader reader, int base) { + docBase = base; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTopDocsCollector.java new file mode 100644 index 0000000..c1e7ab9 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -0,0 +1,211 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestTopDocsCollector extends LuceneTestCase { + + private static final class MyTopsDocCollector extends TopDocsCollector { + + private int idx = 0; + private int base = 0; + + public MyTopsDocCollector(int size) { + super(new HitQueue(size, false)); + } + + @Override + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + if (results == null) { + return EMPTY_TOPDOCS; + } + + float maxScore = Float.NaN; + if (start == 0) { + maxScore = results[0].score; + } else { + for (int i = pq.size(); i > 1; i--) { pq.pop(); } + maxScore = pq.pop().score; + } + + return new TopDocs(totalHits, results, maxScore); + } + + @Override + public void collect(int doc) throws IOException { + ++totalHits; + pq.insertWithOverflow(new ScoreDoc(doc + base, scores[idx++])); + } + + @Override + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + base = docBase; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + // Don't do anything. Assign scores in random + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + + } + + // Scores array to be used by MyTopDocsCollector. If it is changed, MAX_SCORE + // must also change. + private static final float[] scores = new float[] { + 0.7767749f, 1.7839992f, 8.9925785f, 7.9608946f, 0.07948637f, 2.6356435f, + 7.4950366f, 7.1490803f, 8.108544f, 4.961808f, 2.2423935f, 7.285586f, 4.6699767f, + 2.9655676f, 6.953706f, 5.383931f, 6.9916306f, 8.365894f, 7.888485f, 8.723962f, + 3.1796896f, 0.39971232f, 1.3077754f, 6.8489285f, 9.17561f, 5.060466f, 7.9793315f, + 8.601509f, 4.1858315f, 0.28146625f + }; + + private static final float MAX_SCORE = 9.17561f; + + private Directory dir; + private IndexReader reader; + + private TopDocsCollector doSearch(int numResults) throws IOException { + Query q = new MatchAllDocsQuery(); + IndexSearcher searcher = newSearcher(reader); + TopDocsCollector tdc = new MyTopsDocCollector(numResults); + searcher.search(q, tdc); + searcher.close(); + return tdc; + } + + @Override + public void setUp() throws Exception { + super.setUp(); + + // populate an index with 30 documents, this should be enough for the test. + // The documents have no content - the test uses MatchAllDocsQuery(). + dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir); + for (int i = 0; i < 30; i++) { + writer.addDocument(new Document()); + } + reader = writer.getReader(); + writer.close(); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + dir.close(); + dir = null; + super.tearDown(); + } + + public void testInvalidArguments() throws Exception { + int numResults = 5; + TopDocsCollector tdc = doSearch(numResults); + + // start < 0 + assertEquals(0, tdc.topDocs(-1).scoreDocs.length); + + // start > pq.size() + assertEquals(0, tdc.topDocs(numResults + 1).scoreDocs.length); + + // start == pq.size() + assertEquals(0, tdc.topDocs(numResults).scoreDocs.length); + + // howMany < 0 + assertEquals(0, tdc.topDocs(0, -1).scoreDocs.length); + + // howMany == 0 + assertEquals(0, tdc.topDocs(0, 0).scoreDocs.length); + + } + + public void testZeroResults() throws Exception { + TopDocsCollector tdc = new MyTopsDocCollector(5); + assertEquals(0, tdc.topDocs(0, 1).scoreDocs.length); + } + + public void testFirstResultsPage() throws Exception { + TopDocsCollector tdc = doSearch(15); + assertEquals(10, tdc.topDocs(0, 10).scoreDocs.length); + } + + public void testSecondResultsPages() throws Exception { + TopDocsCollector tdc = doSearch(15); + // ask for more results than are available + assertEquals(5, tdc.topDocs(10, 10).scoreDocs.length); + + // ask for 5 results (exactly what there should be + tdc = doSearch(15); + assertEquals(5, tdc.topDocs(10, 5).scoreDocs.length); + + // ask for less results than there are + tdc = doSearch(15); + assertEquals(4, tdc.topDocs(10, 4).scoreDocs.length); + } + + public void testGetAllResults() throws Exception { + TopDocsCollector tdc = doSearch(15); + assertEquals(15, tdc.topDocs().scoreDocs.length); + } + + public void testGetResultsFromStart() throws Exception { + TopDocsCollector tdc = doSearch(15); + // should bring all results + assertEquals(15, tdc.topDocs(0).scoreDocs.length); + + tdc = doSearch(15); + // get the last 5 only. + assertEquals(5, tdc.topDocs(10).scoreDocs.length); + } + + public void testMaxScore() throws Exception { + // ask for all results + TopDocsCollector tdc = doSearch(15); + TopDocs td = tdc.topDocs(); + assertEquals(MAX_SCORE, td.getMaxScore(), 0f); + + // ask for 5 last results + tdc = doSearch(15); + td = tdc.topDocs(10); + assertEquals(MAX_SCORE, td.getMaxScore(), 0f); + } + + // This does not test the PQ's correctness, but whether topDocs() + // implementations return the results in decreasing score order. + public void testResultsOrder() throws Exception { + TopDocsCollector tdc = doSearch(15); + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + + assertEquals(MAX_SCORE, sd[0].score, 0f); + for (int i = 1; i < sd.length; i++) { + assertTrue(sd[i - 1].score >= sd[i].score); + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTopDocsMerge.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTopDocsMerge.java new file mode 100644 index 0000000..63cc50b --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestTopDocsMerge.java @@ -0,0 +1,273 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util._TestUtil; + +public class TestTopDocsMerge extends LuceneTestCase { + + private static class ShardSearcher { + private final IndexSearcher subSearcher; + + public ShardSearcher(IndexReader subReader) { + this.subSearcher = new IndexSearcher(subReader); + } + + public void search(Weight weight, Collector collector) throws IOException { + subSearcher.search(weight, null, collector); + } + + public TopDocs search(Weight weight, int topN) throws IOException { + return subSearcher.search(weight, null, topN); + } + + @Override + public String toString() { + return "ShardSearcher(" + subSearcher + ")"; + } + } + + public void testSort() throws Exception { + + IndexReader reader = null; + Directory dir = null; + + final int numDocs = atLeast(1000); + //final int numDocs = atLeast(50); + + final String[] tokens = new String[] {"a", "b", "c", "d", "e"}; + + if (VERBOSE) { + System.out.println("TEST: make index"); + } + + { + dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random, dir); + // w.setDoRandomOptimize(false); + + // w.w.getConfig().setMaxBufferedDocs(atLeast(100)); + + final String[] content = new String[atLeast(20)]; + + for(int contentIDX=0;contentIDX docFieldLocs = new ArrayList(); + if (hits instanceof TopFieldDocs) { + TopFieldDocs fieldHits = (TopFieldDocs) hits; + for(int fieldIDX=0;fieldIDX tdc = TopScoreDocCollector.create(3, inOrder[i]); + assertEquals("org.apache.lucene.search.TopScoreDocCollector$" + actualTSDCClass[i], tdc.getClass().getName()); + + searcher.search(new MatchAllDocsQuery(), tdc); + + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + assertEquals(3, sd.length); + for (int j = 0; j < sd.length; j++) { + assertEquals("expected doc Id " + j + " found " + sd[j].doc, j, sd[j].doc); + } + } + writer.close(); + searcher.close(); + reader.close(); + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestWildcard.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestWildcard.java new file mode 100644 index 0000000..e211257 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestWildcard.java @@ -0,0 +1,342 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; + +import java.io.IOException; + +/** + * TestWildcard tests the '*' and '?' wildcard characters. + */ +public class TestWildcard + extends LuceneTestCase { + + @Override + public void setUp() throws Exception { + super.setUp(); + } + + public void testEquals() { + WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a")); + WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a")); + WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a")); + + // reflexive? + assertEquals(wq1, wq2); + assertEquals(wq2, wq1); + + // transitive? + assertEquals(wq2, wq3); + assertEquals(wq1, wq3); + + assertFalse(wq1.equals(null)); + + FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a")); + assertFalse(wq1.equals(fq)); + assertFalse(fq.equals(wq1)); + } + + /** + * Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single + * TermQuery. The boost should be preserved, and the rewrite should return + * a ConstantScoreQuery if the WildcardQuery had a ConstantScore rewriteMethod. + */ + public void testTermWithoutWildcard() throws IOException { + Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"}); + IndexSearcher searcher = new IndexSearcher(indexStore, true); + + MultiTermQuery wq = new WildcardQuery(new Term("field", "nowildcard")); + assertMatches(searcher, wq, 1); + + wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + wq.setBoost(0.1F); + Query q = searcher.rewrite(wq); + assertTrue(q instanceof TermQuery); + assertEquals(q.getBoost(), wq.getBoost()); + + wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); + wq.setBoost(0.2F); + q = searcher.rewrite(wq); + assertTrue(q instanceof ConstantScoreQuery); + assertEquals(q.getBoost(), wq.getBoost()); + + wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); + wq.setBoost(0.3F); + q = searcher.rewrite(wq); + assertTrue(q instanceof ConstantScoreQuery); + assertEquals(q.getBoost(), wq.getBoost()); + + wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + wq.setBoost(0.4F); + q = searcher.rewrite(wq); + assertTrue(q instanceof ConstantScoreQuery); + assertEquals(q.getBoost(), wq.getBoost()); + searcher.close(); + indexStore.close(); + } + + /** + * Tests if a WildcardQuery with an empty term is rewritten to an empty BooleanQuery + */ + public void testEmptyTerm() throws IOException { + Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"}); + IndexSearcher searcher = new IndexSearcher(indexStore, true); + + MultiTermQuery wq = new WildcardQuery(new Term("field", "")); + wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + assertMatches(searcher, wq, 0); + Query q = searcher.rewrite(wq); + assertTrue(q instanceof BooleanQuery); + assertEquals(0, ((BooleanQuery) q).clauses().size()); + searcher.close(); + indexStore.close(); + } + + /** + * Tests if a WildcardQuery that has only a trailing * in the term is + * rewritten to a single PrefixQuery. The boost and rewriteMethod should be + * preserved. + */ + public void testPrefixTerm() throws IOException { + Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"}); + IndexSearcher searcher = new IndexSearcher(indexStore, true); + + MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*")); + assertMatches(searcher, wq, 2); + assertTrue(wq.getEnum(searcher.getIndexReader()) instanceof PrefixTermEnum); + + searcher.close(); + indexStore.close(); + } + + /** + * Tests Wildcard queries with an asterisk. + */ + public void testAsterisk() + throws IOException { + Directory indexStore = getIndexStore("body", new String[] + {"metal", "metals"}); + IndexSearcher searcher = new IndexSearcher(indexStore, true); + Query query1 = new TermQuery(new Term("body", "metal")); + Query query2 = new WildcardQuery(new Term("body", "metal*")); + Query query3 = new WildcardQuery(new Term("body", "m*tal")); + Query query4 = new WildcardQuery(new Term("body", "m*tal*")); + Query query5 = new WildcardQuery(new Term("body", "m*tals")); + + BooleanQuery query6 = new BooleanQuery(); + query6.add(query5, BooleanClause.Occur.SHOULD); + + BooleanQuery query7 = new BooleanQuery(); + query7.add(query3, BooleanClause.Occur.SHOULD); + query7.add(query5, BooleanClause.Occur.SHOULD); + + // Queries do not automatically lower-case search terms: + Query query8 = new WildcardQuery(new Term("body", "M*tal*")); + + assertMatches(searcher, query1, 1); + assertMatches(searcher, query2, 2); + assertMatches(searcher, query3, 1); + assertMatches(searcher, query4, 2); + assertMatches(searcher, query5, 1); + assertMatches(searcher, query6, 1); + assertMatches(searcher, query7, 2); + assertMatches(searcher, query8, 0); + assertMatches(searcher, new WildcardQuery(new Term("body", "*tall")), 0); + assertMatches(searcher, new WildcardQuery(new Term("body", "*tal")), 1); + assertMatches(searcher, new WildcardQuery(new Term("body", "*tal*")), 2); + searcher.close(); + indexStore.close(); + } + + /** + * LUCENE-2620 + */ + public void testLotsOfAsterisks() + throws IOException { + Directory indexStore = getIndexStore("body", new String[] + {"metal", "metals"}); + IndexSearcher searcher = new IndexSearcher(indexStore, true); + StringBuilder term = new StringBuilder(); + term.append("m"); + for (int i = 0; i < 512; i++) + term.append("*"); + term.append("tal"); + Query query3 = new WildcardQuery(new Term("body", term.toString())); + + assertMatches(searcher, query3, 1); + searcher.close(); + indexStore.close(); + } + + /** + * Tests Wildcard queries with a question mark. + * + * @throws IOException if an error occurs + */ + public void testQuestionmark() + throws IOException { + Directory indexStore = getIndexStore("body", new String[] + {"metal", "metals", "mXtals", "mXtXls"}); + IndexSearcher searcher = new IndexSearcher(indexStore, true); + Query query1 = new WildcardQuery(new Term("body", "m?tal")); + Query query2 = new WildcardQuery(new Term("body", "metal?")); + Query query3 = new WildcardQuery(new Term("body", "metals?")); + Query query4 = new WildcardQuery(new Term("body", "m?t?ls")); + Query query5 = new WildcardQuery(new Term("body", "M?t?ls")); + Query query6 = new WildcardQuery(new Term("body", "meta??")); + + assertMatches(searcher, query1, 1); + assertMatches(searcher, query2, 1); + assertMatches(searcher, query3, 0); + assertMatches(searcher, query4, 3); + assertMatches(searcher, query5, 0); + assertMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal' + searcher.close(); + indexStore.close(); + } + + private Directory getIndexStore(String field, String[] contents) + throws IOException { + Directory indexStore = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, indexStore); + for (int i = 0; i < contents.length; ++i) { + Document doc = new Document(); + doc.add(newField(field, contents[i], Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + writer.close(); + + return indexStore; + } + + private void assertMatches(IndexSearcher searcher, Query q, int expectedMatches) + throws IOException { + ScoreDoc[] result = searcher.search(q, null, 1000).scoreDocs; + assertEquals(expectedMatches, result.length); + } + + /** + * Test that wild card queries are parsed to the correct type and are searched correctly. + * This test looks at both parsing and execution of wildcard queries. + * Although placed here, it also tests prefix queries, verifying that + * prefix queries are not parsed into wild card queries, and viceversa. + * @throws Exception + */ + public void testParsingAndSearching() throws Exception { + String field = "content"; + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, field, new MockAnalyzer(random)); + qp.setAllowLeadingWildcard(true); + String docs[] = { + "\\ abcdefg1", + "\\79 hijklmn1", + "\\\\ opqrstu1", + }; + // queries that should find all docs + String matchAll[] = { + "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*" + }; + // queries that should find no docs + String matchNone[] = { + "a*h", "a?h", "*a*h", "?a", "a?", + }; + // queries that should be parsed to prefix queries + String matchOneDocPrefix[][] = { + {"a*", "ab*", "abc*", }, // these should find only doc 0 + {"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1 + {"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2 + }; + // queries that should be parsed to wildcard queries + String matchOneDocWild[][] = { + {"*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**"}, // these should find only doc 0 + {"*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**"}, // these should find only doc 1 + {"*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**"}, // these should find only doc 2 + }; + + // prepare the index + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < docs.length; i++) { + Document doc = new Document(); + doc.add(newField(field,docs[i],Store.NO,Index.ANALYZED)); + iw.addDocument(doc); + } + iw.close(); + + IndexSearcher searcher = new IndexSearcher(dir, true); + + // test queries that must find all + for (int i = 0; i < matchAll.length; i++) { + String qtxt = matchAll[i]; + Query q = qp.parse(qtxt); + if (VERBOSE) System.out.println("matchAll: qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals(docs.length,hits.length); + } + + // test queries that must find none + for (int i = 0; i < matchNone.length; i++) { + String qtxt = matchNone[i]; + Query q = qp.parse(qtxt); + if (VERBOSE) System.out.println("matchNone: qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals(0,hits.length); + } + + // test queries that must be prefix queries and must find only one doc + for (int i = 0; i < matchOneDocPrefix.length; i++) { + for (int j = 0; j < matchOneDocPrefix[i].length; j++) { + String qtxt = matchOneDocPrefix[i][j]; + Query q = qp.parse(qtxt); + if (VERBOSE) System.out.println("match 1 prefix: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + assertEquals(PrefixQuery.class, q.getClass()); + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals(1,hits.length); + assertEquals(i,hits[0].doc); + } + } + + // test queries that must be wildcard queries and must find only one doc + for (int i = 0; i < matchOneDocPrefix.length; i++) { + for (int j = 0; j < matchOneDocWild[i].length; j++) { + String qtxt = matchOneDocWild[i][j]; + Query q = qp.parse(qtxt); + if (VERBOSE) System.out.println("match 1 wild: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + assertEquals(WildcardQuery.class, q.getClass()); + ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; + assertEquals(1,hits.length); + assertEquals(i,hits[0].doc); + } + } + + searcher.close(); + dir.close(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestWildcardRandom.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestWildcardRandom.java new file mode 100644 index 0000000..ae91efd --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/TestWildcardRandom.java @@ -0,0 +1,139 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; +import java.text.NumberFormat; +import java.util.Locale; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Create an index with terms from 000-999. + * Generates random wildcards according to patterns, + * and validates the correct number of hits are returned. + */ +public class TestWildcardRandom extends LuceneTestCase { + private IndexSearcher searcher; + private IndexReader reader; + private Directory dir; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); + + Document doc = new Document(); + Field bogus1 = newField("bogus1", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS); + Field bogus2 = newField("zbogus2", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(field); + doc.add(bogus1); + doc.add(bogus2); + + NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH)); + for (int i = 0; i < 1000; i++) { + field.setValue(df.format(i)); + bogus1.setValue(_TestUtil.randomUnicodeString(random, 10)); + bogus2.setValue(_TestUtil.randomUnicodeString(random, 10)); + writer.addDocument(doc); + } + + reader = writer.getReader(); + searcher = newSearcher(reader); + writer.close(); + } + + private char N() { + return (char) (0x30 + random.nextInt(10)); + } + + private String fillPattern(String wildcardPattern) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < wildcardPattern.length(); i++) { + switch(wildcardPattern.charAt(i)) { + case 'N': + sb.append(N()); + break; + default: + sb.append(wildcardPattern.charAt(i)); + } + } + return sb.toString(); + } + + private void assertPatternHits(String pattern, int numHits) throws Exception { + // TODO: run with different rewrites + Query wq = new WildcardQuery(new Term("field", fillPattern(pattern))); + TopDocs docs = searcher.search(wq, 25); + assertEquals("Incorrect hits for pattern: " + pattern, numHits, docs.totalHits); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + dir.close(); + super.tearDown(); + } + + public void testWildcards() throws Exception {; + int num = atLeast(1); + for (int i = 0; i < num; i++) { + assertPatternHits("NNN", 1); + assertPatternHits("?NN", 10); + assertPatternHits("N?N", 10); + assertPatternHits("NN?", 10); + } + + for (int i = 0; i < num; i++) { + assertPatternHits("??N", 100); + assertPatternHits("N??", 100); + assertPatternHits("???", 1000); + + assertPatternHits("NN*", 10); + assertPatternHits("N*", 100); + assertPatternHits("*", 1000); + + assertPatternHits("*NN", 10); + assertPatternHits("*N", 100); + + assertPatternHits("N*N", 10); + + // combo of ? and * operators + assertPatternHits("?N*", 100); + assertPatternHits("N?*", 100); + + assertPatternHits("*N?", 100); + assertPatternHits("*??", 1000); + assertPatternHits("*?N", 100); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/FunctionTestSetup.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/FunctionTestSetup.java new file mode 100755 index 0000000..2b5d017 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/FunctionTestSetup.java @@ -0,0 +1,167 @@ +package org.apache.lucene.search.function; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.Ignore; + +/** + * Setup for function tests + */ +@Ignore +public abstract class FunctionTestSetup extends LuceneTestCase { + + /** + * Actual score computation order is slightly different than assumptios + * this allows for a small amount of variation + */ + protected static float TEST_SCORE_TOLERANCE_DELTA = 0.001f; + + protected static final int N_DOCS = 17; // select a primary number > 2 + + protected static final String ID_FIELD = "id"; + protected static final String TEXT_FIELD = "text"; + protected static final String INT_FIELD = "iii"; + protected static final String FLOAT_FIELD = "fff"; + + private static final String DOC_TEXT_LINES[] = { + "Well, this is just some plain text we use for creating the ", + "test documents. It used to be a text from an online collection ", + "devoted to first aid, but if there was there an (online) lawyers ", + "first aid collection with legal advices, \"it\" might have quite ", + "probably advised one not to include \"it\"'s text or the text of ", + "any other online collection in one's code, unless one has money ", + "that one don't need and one is happy to donate for lawyers ", + "charity. Anyhow at some point, rechecking the usage of this text, ", + "it became uncertain that this text is free to use, because ", + "the web site in the disclaimer of he eBook containing that text ", + "was not responding anymore, and at the same time, in projGut, ", + "searching for first aid no longer found that eBook as well. ", + "So here we are, with a perhaps much less interesting ", + "text for the test, but oh much much safer. ", + }; + + protected static Directory dir; + protected static Analyzer anlzr; + + @AfterClass + public static void afterClassFunctionTestSetup() throws Exception { + dir.close(); + dir = null; + anlzr = null; + } + + protected static void createIndex(boolean doMultiSegment) throws Exception { + if (VERBOSE) { + System.out.println("TEST: setUp"); + } + // prepare a small index with just a few documents. + dir = newDirectory(); + anlzr = new MockAnalyzer(random); + IndexWriterConfig iwc = newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random, dir, iwc); + if (doMultiSegment) { + iw.w.setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 7)); + } + + iw.w.setInfoStream(VERBOSE ? System.out : null); + // add docs not exactly in natural ID order, to verify we do check the order of docs by scores + int remaining = N_DOCS; + boolean done[] = new boolean[N_DOCS]; + int i = 0; + while (remaining > 0) { + if (done[i]) { + throw new Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!"); + } + addDoc(iw, i); + done[i] = true; + i = (i + 4) % N_DOCS; + remaining --; + } + if (!doMultiSegment) { + if (VERBOSE) { + System.out.println("TEST: setUp optimize"); + } + iw.optimize(); + } + iw.close(); + if (VERBOSE) { + System.out.println("TEST: setUp done close"); + } + } + + private static void addDoc(RandomIndexWriter iw, int i) throws Exception { + Document d = new Document(); + Fieldable f; + int scoreAndID = i + 1; + + f = newField(ID_FIELD, id2String(scoreAndID), Field.Store.YES, Field.Index.NOT_ANALYZED); // for debug purposes + f.setOmitNorms(true); + d.add(f); + + f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), Field.Store.NO, Field.Index.ANALYZED); // for regular search + f.setOmitNorms(true); + d.add(f); + + f = newField(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring + f.setOmitNorms(true); + d.add(f); + + f = newField(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring + f.setOmitNorms(true); + d.add(f); + + iw.addDocument(d); + log("added: " + d); + } + + // 17 --> ID00017 + protected static String id2String(int scoreAndID) { + String s = "000000000" + scoreAndID; + int n = ("" + N_DOCS).length() + 3; + int k = s.length() - n; + return "ID" + s.substring(k); + } + + // some text line for regular search + private static String textLine(int docNum) { + return DOC_TEXT_LINES[docNum % DOC_TEXT_LINES.length]; + } + + // extract expected doc score from its ID Field: "ID7" --> 7.0 + protected static float expectedFieldScore(String docIDFieldVal) { + return Float.parseFloat(docIDFieldVal.substring(2)); + } + + // debug messages (change DBG to true for anything to print) + protected static void log(Object o) { + if (VERBOSE) { + System.out.println(o.toString()); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java new file mode 100644 index 0000000..a85f040 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java @@ -0,0 +1,96 @@ +package org.apache.lucene.search.function; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; + +import java.io.IOException; + +/** + * Holds all implementations of classes in the o.a.l.s.function package as a + * back-compatibility test. It does not run any tests per-se, however if + * someone adds a method to an interface or abstract method to an abstract + * class, one of the implementations here will fail to compile and so we know + * back-compat policy was violated. + */ +final class JustCompileSearchFunction { + + private static final String UNSUPPORTED_MSG = "unsupported: used for back-compat testing only !"; + + static final class JustCompileDocValues extends DocValues { + @Override + public float floatVal(int doc) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public String toString(int doc) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileFieldCacheSource extends FieldCacheSource { + + public JustCompileFieldCacheSource(String field) { + super(field); + } + + @Override + public boolean cachedFieldSourceEquals(FieldCacheSource other) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int cachedFieldSourceHashCode() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public DocValues getCachedFieldValues(FieldCache cache, String field, + IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileValueSource extends ValueSource { + @Override + public String description() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean equals(Object o) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public DocValues getValues(IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java new file mode 100755 index 0000000..bf3a554 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java @@ -0,0 +1,349 @@ +package org.apache.lucene.search.function; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.search.*; +import org.junit.BeforeClass; +import org.junit.Test; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + +/** + * Test CustomScoreQuery search. + */ +public class TestCustomScoreQuery extends FunctionTestSetup { + + @BeforeClass + public static void beforeClass() throws Exception { + createIndex(true); + } + + /** + * Test that CustomScoreQuery of Type.BYTE returns the expected scores. + */ + @Test + public void testCustomScoreByte() throws Exception, ParseException { + // INT field values are small enough to be parsed as byte + doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.BYTE, 1.0); + doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.BYTE, 2.0); + } + + /** + * Test that CustomScoreQuery of Type.SHORT returns the expected scores. + */ + @Test + public void testCustomScoreShort() throws Exception, ParseException { + // INT field values are small enough to be parsed as short + doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.SHORT, 1.0); + doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.SHORT, 3.0); + } + + /** + * Test that CustomScoreQuery of Type.INT returns the expected scores. + */ + @Test + public void testCustomScoreInt() throws Exception, ParseException { + doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.INT, 1.0); + doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.INT, 4.0); + } + + /** + * Test that CustomScoreQuery of Type.FLOAT returns the expected scores. + */ + @Test + public void testCustomScoreFloat() throws Exception, ParseException { + // INT field can be parsed as float + doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.FLOAT, 1.0); + doTestCustomScore(INT_FIELD, FieldScoreQuery.Type.FLOAT, 5.0); + // same values, but in float format + doTestCustomScore(FLOAT_FIELD, FieldScoreQuery.Type.FLOAT, 1.0); + doTestCustomScore(FLOAT_FIELD, FieldScoreQuery.Type.FLOAT, 6.0); + } + + // must have static class otherwise serialization tests fail + private static class CustomAddQuery extends CustomScoreQuery { + // constructor + CustomAddQuery(Query q, ValueSourceQuery qValSrc) { + super(q, qValSrc); + } + + /*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#name() */ + @Override + public String name() { + return "customAdd"; + } + + @Override + protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) { + return new CustomScoreProvider(reader) { + @Override + public float customScore(int doc, float subQueryScore, float valSrcScore) { + return subQueryScore + valSrcScore; + } + + @Override + public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) { + float valSrcScore = valSrcExpl == null ? 0 : valSrcExpl.getValue(); + Explanation exp = new Explanation(valSrcScore + subQueryExpl.getValue(), "custom score: sum of:"); + exp.addDetail(subQueryExpl); + if (valSrcExpl != null) { + exp.addDetail(valSrcExpl); + } + return exp; + } + }; + } + } + + // must have static class otherwise serialization tests fail + private static class CustomMulAddQuery extends CustomScoreQuery { + // constructor + CustomMulAddQuery(Query q, ValueSourceQuery qValSrc1, ValueSourceQuery qValSrc2) { + super(q, new ValueSourceQuery[]{qValSrc1, qValSrc2}); + } + + /*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#name() */ + @Override + public String name() { + return "customMulAdd"; + } + + @Override + protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) { + return new CustomScoreProvider(reader) { + @Override + public float customScore(int doc, float subQueryScore, float valSrcScores[]) { + if (valSrcScores.length == 0) { + return subQueryScore; + } + if (valSrcScores.length == 1) { + return subQueryScore + valSrcScores[0]; + // confirm that skipping beyond the last doc, on the + // previous reader, hits NO_MORE_DOCS + } + return (subQueryScore + valSrcScores[0]) * valSrcScores[1]; // we know there are two + } + + @Override + public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpls[]) { + if (valSrcExpls.length == 0) { + return subQueryExpl; + } + Explanation exp = new Explanation(valSrcExpls[0].getValue() + subQueryExpl.getValue(), "sum of:"); + exp.addDetail(subQueryExpl); + exp.addDetail(valSrcExpls[0]); + if (valSrcExpls.length == 1) { + exp.setDescription("CustomMulAdd, sum of:"); + return exp; + } + Explanation exp2 = new Explanation(valSrcExpls[1].getValue() * exp.getValue(), "custom score: product of:"); + exp2.addDetail(valSrcExpls[1]); + exp2.addDetail(exp); + return exp2; + } + }; + } + } + + private final class CustomExternalQuery extends CustomScoreQuery { + + @Override + protected CustomScoreProvider getCustomScoreProvider(IndexReader reader) throws IOException { + final int[] values = FieldCache.DEFAULT.getInts(reader, INT_FIELD); + return new CustomScoreProvider(reader) { + @Override + public float customScore(int doc, float subScore, float valSrcScore) throws IOException { + assertTrue(doc <= reader.maxDoc()); + return values[doc]; + } + }; + } + + public CustomExternalQuery(Query q) { + super(q); + } + } + + @Test + public void testCustomExternalQuery() throws Exception { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD,anlzr); + String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. + Query q1 = qp.parse(qtxt); + + final Query q = new CustomExternalQuery(q1); + log(q); + + IndexSearcher s = new IndexSearcher(dir, true); + TopDocs hits = s.search(q, 1000); + assertEquals(N_DOCS, hits.totalHits); + for(int i=0;i 0); + assertEquals(s.search(q,1).totalHits, s.search(rewritten,1).totalHits); + + q = new TermRangeQuery(TEXT_FIELD, null, null, true, true); // everything + original = new CustomScoreQuery(q); + rewritten = (CustomScoreQuery) original.rewrite(s.getIndexReader()); + assertTrue("rewritten query should not be identical, as TermRangeQuery rewrites", original != rewritten); + assertTrue("no hits for query", s.search(rewritten,1).totalHits > 0); + assertEquals(s.search(q,1).totalHits, s.search(original,1).totalHits); + assertEquals(s.search(q,1).totalHits, s.search(rewritten,1).totalHits); + + s.close(); + } + + // Test that FieldScoreQuery returns docs with expected score. + private void doTestCustomScore(String field, FieldScoreQuery.Type tp, double dboost) throws Exception, ParseException { + float boost = (float) dboost; + IndexSearcher s = new IndexSearcher(dir, true); + FieldScoreQuery qValSrc = new FieldScoreQuery(field, tp); // a query that would score by the field + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, anlzr); + String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup. + + // regular (boolean) query. + Query q1 = qp.parse(qtxt); + log(q1); + + // custom query, that should score the same as q1. + Query q2CustomNeutral = new CustomScoreQuery(q1); + q2CustomNeutral.setBoost(boost); + log(q2CustomNeutral); + + // custom query, that should (by default) multiply the scores of q1 by that of the field + CustomScoreQuery q3CustomMul = new CustomScoreQuery(q1, qValSrc); + q3CustomMul.setStrict(true); + q3CustomMul.setBoost(boost); + log(q3CustomMul); + + // custom query, that should add the scores of q1 to that of the field + CustomScoreQuery q4CustomAdd = new CustomAddQuery(q1, qValSrc); + q4CustomAdd.setStrict(true); + q4CustomAdd.setBoost(boost); + log(q4CustomAdd); + + // custom query, that multiplies and adds the field score to that of q1 + CustomScoreQuery q5CustomMulAdd = new CustomMulAddQuery(q1, qValSrc, qValSrc); + q5CustomMulAdd.setStrict(true); + q5CustomMulAdd.setBoost(boost); + log(q5CustomMulAdd); + + // do al the searches + TopDocs td1 = s.search(q1, null, 1000); + TopDocs td2CustomNeutral = s.search(q2CustomNeutral, null, 1000); + TopDocs td3CustomMul = s.search(q3CustomMul, null, 1000); + TopDocs td4CustomAdd = s.search(q4CustomAdd, null, 1000); + TopDocs td5CustomMulAdd = s.search(q5CustomMulAdd, null, 1000); + + // put results in map so we can verify the scores although they have changed + Map h1 = topDocsToMap(td1); + Map h2CustomNeutral = topDocsToMap(td2CustomNeutral); + Map h3CustomMul = topDocsToMap(td3CustomMul); + Map h4CustomAdd = topDocsToMap(td4CustomAdd); + Map h5CustomMulAdd = topDocsToMap(td5CustomMulAdd); + + verifyResults(boost, s, + h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd, + q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd); + s.close(); + } + + // verify results are as expected. + private void verifyResults(float boost, IndexSearcher s, + Map h1, Map h2customNeutral, Map h3CustomMul, Map h4CustomAdd, Map h5CustomMulAdd, + Query q1, Query q2, Query q3, Query q4, Query q5) throws Exception { + + // verify numbers of matches + log("#hits = "+h1.size()); + assertEquals("queries should have same #hits",h1.size(),h2customNeutral.size()); + assertEquals("queries should have same #hits",h1.size(),h3CustomMul.size()); + assertEquals("queries should have same #hits",h1.size(),h4CustomAdd.size()); + assertEquals("queries should have same #hits",h1.size(),h5CustomMulAdd.size()); + + QueryUtils.check(random, q1,s); + QueryUtils.check(random, q2,s); + QueryUtils.check(random, q3,s); + QueryUtils.check(random, q4,s); + QueryUtils.check(random, q5,s); + + // verify scores ratios + for (final Integer doc : h1.keySet()) { + + log("doc = "+doc); + + float fieldScore = expectedFieldScore(s.getIndexReader().document(doc).get(ID_FIELD)); + log("fieldScore = " + fieldScore); + assertTrue("fieldScore should not be 0", fieldScore > 0); + + float score1 = h1.get(doc); + logResult("score1=", s, q1, doc, score1); + + float score2 = h2customNeutral.get(doc); + logResult("score2=", s, q2, doc, score2); + assertEquals("same score (just boosted) for neutral", boost * score1, score2, TEST_SCORE_TOLERANCE_DELTA); + + float score3 = h3CustomMul.get(doc); + logResult("score3=", s, q3, doc, score3); + assertEquals("new score for custom mul", boost * fieldScore * score1, score3, TEST_SCORE_TOLERANCE_DELTA); + + float score4 = h4CustomAdd.get(doc); + logResult("score4=", s, q4, doc, score4); + assertEquals("new score for custom add", boost * (fieldScore + score1), score4, TEST_SCORE_TOLERANCE_DELTA); + + float score5 = h5CustomMulAdd.get(doc); + logResult("score5=", s, q5, doc, score5); + assertEquals("new score for custom mul add", boost * fieldScore * (score1 + fieldScore), score5, TEST_SCORE_TOLERANCE_DELTA); + } + } + + private void logResult(String msg, Searcher s, Query q, int doc, float score1) throws IOException { + log(msg+" "+score1); + log("Explain by: "+q); + log(s.explain(q,doc)); + } + + // since custom scoring modifies the order of docs, map results + // by doc ids so that we can later compare/verify them + private Map topDocsToMap(TopDocs td) { + Map h = new HashMap(); + for (int i=0; i + * Tests here create an index with a few documents, each having + * an int value indexed field and a float value indexed field. + * The values of these fields are later used for scoring. + *

+ * The rank tests use Hits to verify that docs are ordered (by score) as expected. + *

+ * The exact score tests use TopDocs top to verify the exact score. + */ +public class TestFieldScoreQuery extends FunctionTestSetup { + + @BeforeClass + public static void beforeClass() throws Exception { + createIndex(true); + } + + /** Test that FieldScoreQuery of Type.BYTE returns docs in expected order. */ + @Test + public void testRankByte () throws Exception { + // INT field values are small enough to be parsed as byte + doTestRank(INT_FIELD,FieldScoreQuery.Type.BYTE); + } + + /** Test that FieldScoreQuery of Type.SHORT returns docs in expected order. */ + @Test + public void testRankShort () throws Exception { + // INT field values are small enough to be parsed as short + doTestRank(INT_FIELD,FieldScoreQuery.Type.SHORT); + } + + /** Test that FieldScoreQuery of Type.INT returns docs in expected order. */ + @Test + public void testRankInt () throws Exception { + doTestRank(INT_FIELD,FieldScoreQuery.Type.INT); + } + + /** Test that FieldScoreQuery of Type.FLOAT returns docs in expected order. */ + @Test + public void testRankFloat () throws Exception { + // INT field can be parsed as float + doTestRank(INT_FIELD,FieldScoreQuery.Type.FLOAT); + // same values, but in flot format + doTestRank(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT); + } + + // Test that FieldScoreQuery returns docs in expected order. + private void doTestRank (String field, FieldScoreQuery.Type tp) throws Exception { + IndexSearcher s = new IndexSearcher(dir, true); + Query q = new FieldScoreQuery(field,tp); + log("test: "+q); + QueryUtils.check(random, q,s); + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + assertEquals("All docs should be matched!",N_DOCS,h.length); + String prevID = "ID"+(N_DOCS+1); // greater than all ids of docs in this test + for (int i=0; i 7.0 + assertEquals("score of " + id + " shuould be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA); + } + s.close(); + } + + /** Test that FieldScoreQuery of Type.BYTE caches/reuses loaded values and consumes the proper RAM resources. */ + @Test + public void testCachingByte () throws Exception { + // INT field values are small enough to be parsed as byte + doTestCaching(INT_FIELD,FieldScoreQuery.Type.BYTE); + } + + /** Test that FieldScoreQuery of Type.SHORT caches/reuses loaded values and consumes the proper RAM resources. */ + @Test + public void testCachingShort () throws Exception { + // INT field values are small enough to be parsed as short + doTestCaching(INT_FIELD,FieldScoreQuery.Type.SHORT); + } + + /** Test that FieldScoreQuery of Type.INT caches/reuses loaded values and consumes the proper RAM resources. */ + @Test + public void testCachingInt () throws Exception { + doTestCaching(INT_FIELD,FieldScoreQuery.Type.INT); + } + + /** Test that FieldScoreQuery of Type.FLOAT caches/reuses loaded values and consumes the proper RAM resources. */ + @Test + public void testCachingFloat () throws Exception { + // INT field values can be parsed as float + doTestCaching(INT_FIELD,FieldScoreQuery.Type.FLOAT); + // same values, but in flot format + doTestCaching(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT); + } + + // Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources. + private void doTestCaching (String field, FieldScoreQuery.Type tp) throws Exception { + // prepare expected array types for comparison + HashMap expectedArrayTypes = new HashMap(); + expectedArrayTypes.put(FieldScoreQuery.Type.BYTE, new byte[0]); + expectedArrayTypes.put(FieldScoreQuery.Type.SHORT, new short[0]); + expectedArrayTypes.put(FieldScoreQuery.Type.INT, new int[0]); + expectedArrayTypes.put(FieldScoreQuery.Type.FLOAT, new float[0]); + + IndexSearcher s = new IndexSearcher(dir, true); + Object[] innerArray = new Object[s.getIndexReader().getSequentialSubReaders().length]; + + boolean warned = false; // print warning once. + for (int i=0; i<10; i++) { + FieldScoreQuery q = new FieldScoreQuery(field,tp); + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + assertEquals("All docs should be matched!",N_DOCS,h.length); + IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); + for (int j = 0; j < readers.length; j++) { + IndexReader reader = readers[j]; + try { + if (i == 0) { + innerArray[j] = q.valSrc.getValues(reader).getInnerArray(); + log(i + ". compare: " + innerArray[j].getClass() + " to " + + expectedArrayTypes.get(tp).getClass()); + assertEquals( + "field values should be cached in the correct array type!", + innerArray[j].getClass(), expectedArrayTypes.get(tp).getClass()); + } else { + log(i + ". compare: " + innerArray[j] + " to " + + q.valSrc.getValues(reader).getInnerArray()); + assertSame("field values should be cached and reused!", innerArray[j], + q.valSrc.getValues(reader).getInnerArray()); + } + } catch (UnsupportedOperationException e) { + if (!warned) { + System.err.println("WARNING: " + testName() + + " cannot fully test values of " + q); + warned = true; + } + } + } + } + s.close(); + // verify new values are reloaded (not reused) for a new reader + s = new IndexSearcher(dir, true); + FieldScoreQuery q = new FieldScoreQuery(field,tp); + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + assertEquals("All docs should be matched!",N_DOCS,h.length); + IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); + for (int j = 0; j < readers.length; j++) { + IndexReader reader = readers[j]; + try { + log("compare: " + innerArray + " to " + + q.valSrc.getValues(reader).getInnerArray()); + assertNotSame( + "cached field values should not be reused if reader as changed!", + innerArray, q.valSrc.getValues(reader).getInnerArray()); + } catch (UnsupportedOperationException e) { + if (!warned) { + System.err.println("WARNING: " + testName() + + " cannot fully test values of " + q); + warned = true; + } + } + } + s.close(); + } + + private String testName() { + return getClass().getName()+"."+ getName(); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/TestOrdValues.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/TestOrdValues.java new file mode 100644 index 0000000..2353c8c --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/function/TestOrdValues.java @@ -0,0 +1,266 @@ +package org.apache.lucene.search.function; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.*; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Test search based on OrdFieldSource and ReverseOrdFieldSource. + *

+ * Tests here create an index with a few documents, each having + * an indexed "id" field. + * The ord values of this field are later used for scoring. + *

+ * The order tests use Hits to verify that docs are ordered as expected. + *

+ * The exact score tests use TopDocs top to verify the exact score. + */ +public class TestOrdValues extends FunctionTestSetup { + + @BeforeClass + public static void beforeClass() throws Exception { + createIndex(false); + } + + /** + * Test OrdFieldSource + */ + @Test + public void testOrdFieldRank() throws CorruptIndexException, Exception { + doTestRank(ID_FIELD, true); + } + + /** + * Test ReverseOrdFieldSource + */ + @Test + public void testReverseOrdFieldRank() throws CorruptIndexException, Exception { + doTestRank(ID_FIELD, false); + } + + // Test that queries based on reverse/ordFieldScore scores correctly + private void doTestRank(String field, boolean inOrder) throws CorruptIndexException, Exception { + IndexSearcher s = new IndexSearcher(dir, true); + ValueSource vs; + if (inOrder) { + vs = new OrdFieldSource(field); + } else { + vs = new ReverseOrdFieldSource(field); + } + + Query q = new ValueSourceQuery(vs); + log("test: " + q); + QueryUtils.check(random, q, s); + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + assertEquals("All docs should be matched!", N_DOCS, h.length); + String prevID = inOrder + ? "IE" // greater than all ids of docs in this test ("ID0001", etc.) + : "IC"; // smaller than all ids of docs in this test ("ID0001", etc.) + + for (int i = 0; i < h.length; i++) { + String resID = s.doc(h[i].doc).get(ID_FIELD); + log(i + ". score=" + h[i].score + " - " + resID); + log(s.explain(q, h[i].doc)); + if (inOrder) { + assertTrue("res id " + resID + " should be < prev res id " + prevID, resID.compareTo(prevID) < 0); + } else { + assertTrue("res id " + resID + " should be > prev res id " + prevID, resID.compareTo(prevID) > 0); + } + prevID = resID; + } + s.close(); + } + + /** + * Test exact score for OrdFieldSource + */ + @Test + public void testOrdFieldExactScore() throws CorruptIndexException, Exception { + doTestExactScore(ID_FIELD, true); + } + + /** + * Test exact score for ReverseOrdFieldSource + */ + @Test + public void testReverseOrdFieldExactScore() throws CorruptIndexException, Exception { + doTestExactScore(ID_FIELD, false); + } + + + // Test that queries based on reverse/ordFieldScore returns docs with expected score. + private void doTestExactScore(String field, boolean inOrder) throws CorruptIndexException, Exception { + IndexSearcher s = new IndexSearcher(dir, true); + ValueSource vs; + if (inOrder) { + vs = new OrdFieldSource(field); + } else { + vs = new ReverseOrdFieldSource(field); + } + Query q = new ValueSourceQuery(vs); + TopDocs td = s.search(q, null, 1000); + assertEquals("All docs should be matched!", N_DOCS, td.totalHits); + ScoreDoc sd[] = td.scoreDocs; + for (int i = 0; i < sd.length; i++) { + float score = sd[i].score; + String id = s.getIndexReader().document(sd[i].doc).get(ID_FIELD); + log("-------- " + i + ". Explain doc " + id); + log(s.explain(q, sd[i].doc)); + float expectedScore = N_DOCS - i; + assertEquals("score of result " + i + " shuould be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA); + String expectedId = inOrder + ? id2String(N_DOCS - i) // in-order ==> larger values first + : id2String(i + 1); // reverse ==> smaller values first + assertTrue("id of result " + i + " shuould be " + expectedId + " != " + score, expectedId.equals(id)); + } + s.close(); + } + + /** + * Test caching OrdFieldSource + */ + @Test + public void testCachingOrd() throws CorruptIndexException, Exception { + doTestCaching(ID_FIELD, true); + } + + /** + * Test caching for ReverseOrdFieldSource + */ + @Test + public void testCachingReverseOrd() throws CorruptIndexException, Exception { + doTestCaching(ID_FIELD, false); + } + + // Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources. + private void doTestCaching(String field, boolean inOrder) throws CorruptIndexException, Exception { + IndexSearcher s = new IndexSearcher(dir, true); + Object innerArray = null; + + boolean warned = false; // print warning once + + for (int i = 0; i < 10; i++) { + ValueSource vs; + if (inOrder) { + vs = new OrdFieldSource(field); + } else { + vs = new ReverseOrdFieldSource(field); + } + ValueSourceQuery q = new ValueSourceQuery(vs); + ScoreDoc[] h = s.search(q, null, 1000).scoreDocs; + try { + assertEquals("All docs should be matched!", N_DOCS, h.length); + IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); + + for (IndexReader reader : readers) { + if (i == 0) { + innerArray = q.valSrc.getValues(reader).getInnerArray(); + } else { + log(i + ". compare: " + innerArray + " to " + q.valSrc.getValues(reader).getInnerArray()); + assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(reader).getInnerArray()); + } + } + } catch (UnsupportedOperationException e) { + if (!warned) { + System.err.println("WARNING: " + testName() + " cannot fully test values of " + q); + warned = true; + } + } + } + + ValueSource vs; + ValueSourceQuery q; + ScoreDoc[] h; + + // verify that different values are loaded for a different field + String field2 = INT_FIELD; + assertFalse(field.equals(field2)); // otherwise this test is meaningless. + if (inOrder) { + vs = new OrdFieldSource(field2); + } else { + vs = new ReverseOrdFieldSource(field2); + } + q = new ValueSourceQuery(vs); + h = s.search(q, null, 1000).scoreDocs; + assertEquals("All docs should be matched!", N_DOCS, h.length); + IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); + + for (IndexReader reader : readers) { + try { + log("compare (should differ): " + innerArray + " to " + + q.valSrc.getValues(reader).getInnerArray()); + assertNotSame( + "different values shuold be loaded for a different field!", + innerArray, q.valSrc.getValues(reader).getInnerArray()); + } catch (UnsupportedOperationException e) { + if (!warned) { + System.err.println("WARNING: " + testName() + + " cannot fully test values of " + q); + warned = true; + } + } + } + s.close(); + // verify new values are reloaded (not reused) for a new reader + s = new IndexSearcher(dir, true); + if (inOrder) { + vs = new OrdFieldSource(field); + } else { + vs = new ReverseOrdFieldSource(field); + } + q = new ValueSourceQuery(vs); + h = s.search(q, null, 1000).scoreDocs; + assertEquals("All docs should be matched!", N_DOCS, h.length); + readers = s.getIndexReader().getSequentialSubReaders(); + + for (IndexReader reader : readers) { + try { + log("compare (should differ): " + innerArray + " to " + + q.valSrc.getValues(reader).getInnerArray()); + assertNotSame( + "cached field values should not be reused if reader as changed!", + innerArray, q.valSrc.getValues(reader).getInnerArray()); + } catch (UnsupportedOperationException e) { + if (!warned) { + System.err.println("WARNING: " + testName() + + " cannot fully test values of " + q); + warned = true; + } + } + } + s.close(); + } + + private String testName() { + return getClass().getName() + "." + getName(); + } + + // LUCENE-1250 + public void testEqualsNull() throws Exception { + OrdFieldSource ofs = new OrdFieldSource("f"); + assertFalse(ofs.equals(null)); + + ReverseOrdFieldSource rofs = new ReverseOrdFieldSource("f"); + assertFalse(rofs.equals(null)); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/PayloadHelper.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/PayloadHelper.java new file mode 100644 index 0000000..7612567 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/PayloadHelper.java @@ -0,0 +1,139 @@ +package org.apache.lucene.search.payloads; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.Random; + +import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Payload; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.English; + +import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; + +/** + * + * + **/ +public class PayloadHelper { + + private byte[] payloadField = new byte[]{1}; + private byte[] payloadMultiField1 = new byte[]{2}; + private byte[] payloadMultiField2 = new byte[]{4}; + public static final String NO_PAYLOAD_FIELD = "noPayloadField"; + public static final String MULTI_FIELD = "multiField"; + public static final String FIELD = "field"; + + public IndexReader reader; + + public final class PayloadAnalyzer extends Analyzer { + + + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); + result = new PayloadFilter(result, fieldName); + return result; + } + } + + public final class PayloadFilter extends TokenFilter { + String fieldName; + int numSeen = 0; + PayloadAttribute payloadAtt; + + public PayloadFilter(TokenStream input, String fieldName) { + super(input); + this.fieldName = fieldName; + payloadAtt = addAttribute(PayloadAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + + if (input.incrementToken()) { + if (fieldName.equals(FIELD)) + { + payloadAtt.setPayload(new Payload(payloadField)); + } + else if (fieldName.equals(MULTI_FIELD)) + { + if (numSeen % 2 == 0) + { + payloadAtt.setPayload(new Payload(payloadMultiField1)); + } + else + { + payloadAtt.setPayload(new Payload(payloadMultiField2)); + } + numSeen++; + } + return true; + } + return false; + } + } + + /** + * Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField + * and analyzes them using the PayloadAnalyzer + * @param similarity The Similarity class to use in the Searcher + * @param numDocs The num docs to add + * @return An IndexSearcher + * @throws IOException + */ + // TODO: randomize + public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException { + Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); + PayloadAnalyzer analyzer = new PayloadAnalyzer(); + IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( + TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity)); + // writer.infoStream = System.out; + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + reader = IndexReader.open(writer, true); + writer.close(); + + IndexSearcher searcher = LuceneTestCase.newSearcher(reader); + searcher.setSimilarity(similarity); + return searcher; + } + + public void tearDown() throws Exception { + reader.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java new file mode 100644 index 0000000..03b19a0 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java @@ -0,0 +1,345 @@ +package org.apache.lucene.search.payloads; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.io.Reader; +import java.util.Collection; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Payload; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.Explanation.IDFExplanation; +import org.junit.AfterClass; +import org.junit.BeforeClass; + + +public class TestPayloadNearQuery extends LuceneTestCase { + private static IndexSearcher searcher; + private static IndexReader reader; + private static Directory directory; + private static BoostingSimilarity similarity = new BoostingSimilarity(); + private static byte[] payload2 = new byte[]{2}; + private static byte[] payload4 = new byte[]{4}; + + private static class PayloadAnalyzer extends Analyzer { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); + result = new PayloadFilter(result, fieldName); + return result; + } + } + + private static class PayloadFilter extends TokenFilter { + String fieldName; + int numSeen = 0; + protected PayloadAttribute payAtt; + + public PayloadFilter(TokenStream input, String fieldName) { + super(input); + this.fieldName = fieldName; + payAtt = addAttribute(PayloadAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + boolean result = false; + if (input.incrementToken() == true){ + if (numSeen % 2 == 0) { + payAtt.setPayload(new Payload(payload2)); + } else { + payAtt.setPayload(new Payload(payload4)); + } + numSeen++; + result = true; + } + return result; + } + } + + private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) { + String[] words = phrase.split("[\\s]+"); + SpanQuery clauses[] = new SpanQuery[words.length]; + for (int i=0;i -1); + assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f); + } + } + public void testMaxFunction() throws IOException { + PayloadNearQuery query; + TopDocs hits; + + query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction()); + QueryUtils.check(query); + // all 10 hits should have score = 4 (max payload value) + hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("should be 10 hits", hits.totalHits == 10); + for (int j = 0; j < hits.scoreDocs.length; j++) { + ScoreDoc doc = hits.scoreDocs[j]; + assertTrue(doc.score + " does not equal: " + 4, doc.score == 4); + Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc); + String exp = explain.toString(); + assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1); + assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f); + } + } + public void testMinFunction() throws IOException { + PayloadNearQuery query; + TopDocs hits; + + query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction()); + QueryUtils.check(query); + // all 10 hits should have score = 2 (min payload value) + hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("should be 10 hits", hits.totalHits == 10); + for (int j = 0; j < hits.scoreDocs.length; j++) { + ScoreDoc doc = hits.scoreDocs[j]; + assertTrue(doc.score + " does not equal: " + 2, doc.score == 2); + Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc); + String exp = explain.toString(); + assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1); + assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f); + } + } + private SpanQuery[] getClauses() { + SpanNearQuery q1, q2; + q1 = spanNearQuery("field2", "twenty two"); + q2 = spanNearQuery("field2", "twenty three"); + SpanQuery[] clauses = new SpanQuery[2]; + clauses[0] = q1; + clauses[1] = q2; + return clauses; + } + private SpanNearQuery spanNearQuery(String fieldName, String words) { + String[] wordList = words.split("[\\s]+"); + SpanQuery clauses[] = new SpanQuery[wordList.length]; + for (int i=0;i terms, Searcher searcher) throws IOException { + return new IDFExplanation() { + @Override + public float getIdf() { + return 1.0f; + } + @Override + public String explain() { + return "Inexplicable"; + } + }; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java new file mode 100644 index 0000000..b8433a9 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java @@ -0,0 +1,339 @@ +package org.apache.lucene.search.payloads; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.English; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.search.spans.Spans; +import org.apache.lucene.search.spans.TermSpans; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.LowerCaseTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Payload; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +import java.io.Reader; +import java.io.IOException; + + +/** + * + * + **/ +public class TestPayloadTermQuery extends LuceneTestCase { + private IndexSearcher searcher; + private IndexReader reader; + private BoostingSimilarity similarity = new BoostingSimilarity(); + private byte[] payloadField = new byte[]{1}; + private byte[] payloadMultiField1 = new byte[]{2}; + private byte[] payloadMultiField2 = new byte[]{4}; + protected Directory directory; + + private class PayloadAnalyzer extends Analyzer { + + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); + result = new PayloadFilter(result, fieldName); + return result; + } + } + + private class PayloadFilter extends TokenFilter { + String fieldName; + int numSeen = 0; + + PayloadAttribute payloadAtt; + + public PayloadFilter(TokenStream input, String fieldName) { + super(input); + this.fieldName = fieldName; + payloadAtt = addAttribute(PayloadAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + boolean hasNext = input.incrementToken(); + if (hasNext) { + if (fieldName.equals("field")) { + payloadAtt.setPayload(new Payload(payloadField)); + } else if (fieldName.equals("multiField")) { + if (numSeen % 2 == 0) { + payloadAtt.setPayload(new Payload(payloadMultiField1)); + } else { + payloadAtt.setPayload(new Payload(payloadMultiField2)); + } + numSeen++; + } + return true; + } else { + return false; + } + } + } + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) + .setSimilarity(similarity).setMergePolicy(newLogMergePolicy())); + //writer.infoStream = System.out; + for (int i = 0; i < 1000; i++) { + Document doc = new Document(); + Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); + //noPayloadField.setBoost(0); + doc.add(noPayloadField); + doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + + searcher = newSearcher(reader); + searcher.setSimilarity(similarity); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + public void test() throws IOException { + PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"), + new MaxPayloadFunction()); + TopDocs hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); + + //they should all have the exact same score, because they all contain seventy once, and we set + //all the other similarity factors to be 1 + + assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1); + for (int i = 0; i < hits.scoreDocs.length; i++) { + ScoreDoc doc = hits.scoreDocs[i]; + assertTrue(doc.score + " does not equal: " + 1, doc.score == 1); + } + CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true); + Spans spans = query.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans); + /*float score = hits.score(0); + for (int i =1; i < hits.length(); i++) + { + assertTrue("scores are not equal and they should be", score == hits.score(i)); + }*/ + + } + + public void testQuery() { + PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), + new MaxPayloadFunction()); + QueryUtils.check(boostingFuncTermQuery); + + SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")); + + assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery)); + + PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), + new AveragePayloadFunction()); + + QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2); + } + + public void testMultipleMatchesPerDoc() throws Exception { + PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), + new MaxPayloadFunction()); + TopDocs hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); + + //they should all have the exact same score, because they all contain seventy once, and we set + //all the other similarity factors to be 1 + + //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); + assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0); + //there should be exactly 10 items that score a 4, all the rest should score a 2 + //The 10 items are: 70 + i*100 where i in [0-9] + int numTens = 0; + for (int i = 0; i < hits.scoreDocs.length; i++) { + ScoreDoc doc = hits.scoreDocs[i]; + if (doc.doc % 10 == 0) { + numTens++; + assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0); + } else { + assertTrue(doc.score + " does not equal: " + 2, doc.score == 2); + } + } + assertTrue(numTens + " does not equal: " + 10, numTens == 10); + CheckHits.checkExplanations(query, "field", searcher, true); + Spans spans = query.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans); + //should be two matches per document + int count = 0; + //100 hits times 2 matches per hit, we should have 200 in count + while (spans.next()) { + count++; + } + assertTrue(count + " does not equal: " + 200, count == 200); + } + + //Set includeSpanScore to false, in which case just the payload score comes through. + public void testIgnoreSpanScorer() throws Exception { + PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), + new MaxPayloadFunction(), false); + + IndexSearcher theSearcher = new IndexSearcher(directory, true); + theSearcher.setSimilarity(new FullSimilarity()); + TopDocs hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); + + //they should all have the exact same score, because they all contain seventy once, and we set + //all the other similarity factors to be 1 + + //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); + assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0); + //there should be exactly 10 items that score a 4, all the rest should score a 2 + //The 10 items are: 70 + i*100 where i in [0-9] + int numTens = 0; + for (int i = 0; i < hits.scoreDocs.length; i++) { + ScoreDoc doc = hits.scoreDocs[i]; + if (doc.doc % 10 == 0) { + numTens++; + assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0); + } else { + assertTrue(doc.score + " does not equal: " + 2, doc.score == 2); + } + } + assertTrue(numTens + " does not equal: " + 10, numTens == 10); + CheckHits.checkExplanations(query, "field", searcher, true); + Spans spans = query.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans); + //should be two matches per document + int count = 0; + //100 hits times 2 matches per hit, we should have 200 in count + while (spans.next()) { + count++; + } + theSearcher.close(); + } + + public void testNoMatch() throws Exception { + PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"), + new MaxPayloadFunction()); + TopDocs hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0); + + } + + public void testNoPayload() throws Exception { + PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"), + new MaxPayloadFunction()); + PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"), + new MaxPayloadFunction()); + BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST); + BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT); + BooleanQuery query = new BooleanQuery(); + query.add(c1); + query.add(c2); + TopDocs hits = searcher.search(query, null, 100); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1); + int[] results = new int[1]; + results[0] = 0;//hits.scoreDocs[0].doc; + CheckHits.checkHitCollector(random, query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results); + } + + // must be static for weight serialization tests + static class BoostingSimilarity extends DefaultSimilarity { + + // TODO: Remove warning after API has been finalized + @Override + public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) { + //we know it is size 4 here, so ignore the offset/length + return payload[0]; + } + + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + //Make everything else 1 so we see the effect of the payload + //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + @Override + public float computeNorm(String fieldName, FieldInvertState state) { + return state.getBoost(); + } + + @Override + public float queryNorm(float sumOfSquaredWeights) { + return 1; + } + + @Override + public float sloppyFreq(int distance) { + return 1; + } + + @Override + public float coord(int overlap, int maxOverlap) { + return 1; + } + + @Override + public float idf(int docFreq, int numDocs) { + return 1; + } + + @Override + public float tf(float freq) { + return freq == 0 ? 0 : 1; + } + } + + static class FullSimilarity extends DefaultSimilarity{ + public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) { + //we know it is size 4 here, so ignore the offset/length + return payload[0]; + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java new file mode 100644 index 0000000..c687faf --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -0,0 +1,147 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.Similarity; + +/** + * Holds all implementations of classes in the o.a.l.s.spans package as a + * back-compatibility test. It does not run any tests per-se, however if + * someone adds a method to an interface or abstract method to an abstract + * class, one of the implementations here will fail to compile and so we know + * back-compat policy was violated. + */ +final class JustCompileSearchSpans { + + private static final String UNSUPPORTED_MSG = "unsupported: used for back-compat testing only !"; + + static final class JustCompileSpans extends Spans { + + @Override + public int doc() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int end() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean next() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean skipTo(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int start() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Collection getPayload() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean isPayloadAvailable() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileSpanQuery extends SpanQuery { + + @Override + public String getField() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public Spans getSpans(IndexReader reader) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public String toString(String field) { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompilePayloadSpans extends Spans { + + @Override + public Collection getPayload() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean isPayloadAvailable() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int doc() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int end() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean next() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public boolean skipTo(int target) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + @Override + public int start() { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + + } + + static final class JustCompileSpanScorer extends SpanScorer { + + protected JustCompileSpanScorer(Spans spans, Weight weight, + Similarity similarity, byte[] norms) throws IOException { + super(spans, weight, similarity, norms); + } + + @Override + protected boolean setFreqCurrentDoc() throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED_MSG); + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestBasics.java new file mode 100644 index 0000000..a8a86f6 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -0,0 +1,632 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseTokenizer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Payload; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests basic search capabilities. + * + *

Uses a collection of 1000 documents, each the english rendition of their + * document number. For example, the document numbered 333 has text "three + * hundred thirty three". + * + *

Tests are each a single query, and its hits are checked to ensure that + * all and only the correct documents are returned, thus providing end-to-end + * testing of the indexing and search code. + * + */ +public class TestBasics extends LuceneTestCase { + private static IndexSearcher searcher; + private static IndexReader reader; + private static Directory directory; + + static final class SimplePayloadFilter extends TokenFilter { + String fieldName; + int pos; + final PayloadAttribute payloadAttr; + final CharTermAttribute termAttr; + + public SimplePayloadFilter(TokenStream input, String fieldName) { + super(input); + this.fieldName = fieldName; + pos = 0; + payloadAttr = input.addAttribute(PayloadAttribute.class); + termAttr = input.addAttribute(CharTermAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes())); + pos++; + return true; + } else { + return false; + } + } + + @Override + public void reset() throws IOException { + super.reset(); + pos = 0; + } + } + + static final Analyzer simplePayloadAnalyzer = new Analyzer() { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new SimplePayloadFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader), fieldName); + } + + }; + + @BeforeClass + public static void beforeClass() throws Exception { + directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, simplePayloadAnalyzer) + .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy())); + //writer.infoStream = System.out; + for (int i = 0; i < 2000; i++) { + Document doc = new Document(); + doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + reader = writer.getReader(); + searcher = newSearcher(reader); + writer.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + searcher = null; + reader = null; + directory = null; + } + + @Test + public void testTerm() throws Exception { + Query query = new TermQuery(new Term("field", "seventy")); + checkHits(query, new int[] + {70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 270, 271, 272, 273, 274, 275, 276, 277, 278, + 279, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470, 471, + 472, 473, 474, 475, 476, 477, 478, 479, 570, 571, 572, 573, 574, + 575, 576, 577, 578, 579, 670, 671, 672, 673, 674, 675, 676, 677, + 678, 679, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 870, + 871, 872, 873, 874, 875, 876, 877, 878, 879, 970, 971, 972, 973, + 974, 975, 976, 977, 978, 979, 1070, 1071, 1072, 1073, 1074, 1075, + 1076, 1077, 1078, 1079, 1170, 1171, 1172, 1173, 1174, 1175, 1176, + 1177, 1178, 1179, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, + 1278, 1279, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, + 1379, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, + 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1670, + 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1770, 1771, + 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1870, 1871, 1872, + 1873, 1874, 1875, 1876, 1877, + 1878, 1879, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, + 1979}); + } + + @Test + public void testTerm2() throws Exception { + Query query = new TermQuery(new Term("field", "seventish")); + checkHits(query, new int[] {}); + } + + @Test + public void testPhrase() throws Exception { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "seventy")); + query.add(new Term("field", "seven")); + checkHits(query, new int[] + {77, 177, 277, 377, 477, 577, 677, 777, 877, + 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1777, 1877, 1977}); + } + + @Test + public void testPhrase2() throws Exception { + PhraseQuery query = new PhraseQuery(); + query.add(new Term("field", "seventish")); + query.add(new Term("field", "sevenon")); + checkHits(query, new int[] {}); + } + + @Test + public void testBoolean() throws Exception { + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term("field", "seventy")), BooleanClause.Occur.MUST); + query.add(new TermQuery(new Term("field", "seven")), BooleanClause.Occur.MUST); + checkHits(query, new int[] + {77, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775, 776, 777, + 778, 779, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, + 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1877, + 1977}); + } + + @Test + public void testBoolean2() throws Exception { + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term("field", "sevento")), BooleanClause.Occur.MUST); + query.add(new TermQuery(new Term("field", "sevenly")), BooleanClause.Occur.MUST); + checkHits(query, new int[] {}); + } + + @Test + public void testSpanNearExact() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "seventy")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "seven")); + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 0, true); + checkHits(query, new int[] + {77, 177, 277, 377, 477, 577, 677, 777, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1777, 1877, 1977}); + + assertTrue(searcher.explain(query, 77).getValue() > 0.0f); + assertTrue(searcher.explain(query, 977).getValue() > 0.0f); + + QueryUtils.check(term1); + QueryUtils.check(term2); + QueryUtils.checkUnequal(term1,term2); + } + + public void testSpanTermQuery() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "seventy")); + checkHits(term1, new int[] + { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, + 171, 172, 173, 174, 175, 176, 177, 178, 179, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 370, 371, 372, 373, 374, 375, 376, 377, 378, + 379, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 570, 571, 572, + 573, 574, 575, 576, 577, 578, 579, 670, 671, 672, 673, 674, 675, 676, + 677, 678, 679, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 870, + 871, 872, 873, 874, 875, 876, 877, 878, 879, 970, 971, 972, 973, 974, + 975, 976, 977, 978, 979, 1070, 1071, 1072, 1073, 1074, 1075, 1076, + 1077, 1078, 1079, 1170, 1270, 1370, 1470, 1570, 1670, 1770, 1870, 1970, + 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1271, 1272, 1273, + 1274, 1275, 1276, 1277, 1278, 1279, 1371, 1372, 1373, 1374, 1375, 1376, + 1377, 1378, 1379, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, + 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1671, 1672, 1673, + 1674, 1675, 1676, 1677, 1678, 1679, 1771, 1772, 1773, 1774, 1775, 1776, + 1777, 1778, 1779, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, + 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979 }); + } + + @Test + public void testSpanNearUnordered() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six")); + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, false); + + checkHits(query, new int[] + {609, 629, 639, 649, 659, 669, 679, 689, 699, 906, 926, 936, 946, 956, + 966, 976, 986, 996, 1609, 1629, 1639, 1649, 1659, 1669, + 1679, 1689, 1699, 1906, 1926, 1936, 1946, 1956, 1966, 1976, 1986, + 1996}); + } + + @Test + public void testSpanNearOrdered() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six")); + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, true); + checkHits(query, new int[] + {906, 926, 936, 946, 956, 966, 976, 986, 996, 1906, 1926, 1936, 1946, 1956, 1966, 1976, 1986, 1996}); + } + + @Test + public void testSpanNot() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one")); + SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, true); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty")); + SpanNotQuery query = new SpanNotQuery(near, term3); + + checkHits(query, new int[] + {801, 821, 831, 851, 861, 871, 881, 891, 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891}); + + assertTrue(searcher.explain(query, 801).getValue() > 0.0f); + assertTrue(searcher.explain(query, 891).getValue() > 0.0f); + } + + @Test + public void testSpanWithMultipleNotSingle() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one")); + SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, true); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty")); + + SpanOrQuery or = new SpanOrQuery(term3); + + SpanNotQuery query = new SpanNotQuery(near, or); + + checkHits(query, new int[] + {801, 821, 831, 851, 861, 871, 881, 891, + 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891}); + + assertTrue(searcher.explain(query, 801).getValue() > 0.0f); + assertTrue(searcher.explain(query, 891).getValue() > 0.0f); + } + + @Test + public void testSpanWithMultipleNotMany() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one")); + SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, true); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty")); + SpanTermQuery term4 = new SpanTermQuery(new Term("field", "sixty")); + SpanTermQuery term5 = new SpanTermQuery(new Term("field", "eighty")); + + SpanOrQuery or = new SpanOrQuery(term3, term4, term5); + + SpanNotQuery query = new SpanNotQuery(near, or); + + checkHits(query, new int[] + {801, 821, 831, 851, 871, 891, 1801, 1821, 1831, 1851, 1871, 1891}); + + assertTrue(searcher.explain(query, 801).getValue() > 0.0f); + assertTrue(searcher.explain(query, 891).getValue() > 0.0f); + } + + @Test + public void testNpeInSpanNearWithSpanNot() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one")); + SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 4, true); + SpanTermQuery hun = new SpanTermQuery(new Term("field", "hundred")); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty")); + SpanNearQuery exclude = new SpanNearQuery(new SpanQuery[] {hun, term3}, + 1, true); + + SpanNotQuery query = new SpanNotQuery(near, exclude); + + checkHits(query, new int[] + {801, 821, 831, 851, 861, 871, 881, 891, + 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891}); + + assertTrue(searcher.explain(query, 801).getValue() > 0.0f); + assertTrue(searcher.explain(query, 891).getValue() > 0.0f); + } + + @Test + public void testNpeInSpanNearInSpanFirstInSpanNot() throws Exception { + int n = 5; + SpanTermQuery hun = new SpanTermQuery(new Term("field", "hundred")); + SpanTermQuery term40 = new SpanTermQuery(new Term("field", "forty")); + SpanTermQuery term40c = (SpanTermQuery)term40.clone(); + + SpanFirstQuery include = new SpanFirstQuery(term40, n); + SpanNearQuery near = new SpanNearQuery(new SpanQuery[]{hun, term40c}, + n-1, true); + SpanFirstQuery exclude = new SpanFirstQuery(near, n-1); + SpanNotQuery q = new SpanNotQuery(include, exclude); + + checkHits(q, new int[]{40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, + 1049, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1240, 1241, 1242, 1243, 1244, + 1245, 1246, 1247, 1248, 1249, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1440, 1441, 1442, + 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1640, + 1641, 1642, 1643, 1644, 1645, 1646, 1647, + 1648, 1649, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1840, 1841, 1842, 1843, 1844, 1845, 1846, + 1847, 1848, 1849, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949}); + } + + @Test + public void testSpanFirst() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); + SpanFirstQuery query = new SpanFirstQuery(term1, 1); + + checkHits(query, new int[] + {5, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, + 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, + 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, + 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, + 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, + 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, + 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, + 598, 599}); + + assertTrue(searcher.explain(query, 5).getValue() > 0.0f); + assertTrue(searcher.explain(query, 599).getValue() > 0.0f); + + } + + @Test + public void testSpanPositionRange() throws Exception { + SpanPositionRangeQuery query; + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); + query = new SpanPositionRangeQuery(term1, 1, 2); + checkHits(query, new int[] + {25,35, 45, 55, 65, 75, 85, 95}); + assertTrue(searcher.explain(query, 25).getValue() > 0.0f); + assertTrue(searcher.explain(query, 95).getValue() > 0.0f); + + query = new SpanPositionRangeQuery(term1, 0, 1); + checkHits(query, new int[] + {5, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, + 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, + 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, + 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, + 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, + 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, + 578, 579, 580, 581, 582, 583, 584, + 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, + 598, 599}); + + query = new SpanPositionRangeQuery(term1, 6, 7); + checkHits(query, new int[]{}); + } + + @Test + public void testSpanPayloadCheck() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); + Payload pay = new Payload(("pos: " + 5).getBytes()); + SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.getData())); + checkHits(query, new int[] + {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995}); + assertTrue(searcher.explain(query, 1125).getValue() > 0.0f); + + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred")); + SpanNearQuery snq; + SpanQuery[] clauses; + List list; + Payload pay2; + clauses = new SpanQuery[2]; + clauses[0] = term1; + clauses[1] = term2; + snq = new SpanNearQuery(clauses, 0, true); + pay = new Payload(("pos: " + 0).getBytes()); + pay2 = new Payload(("pos: " + 1).getBytes()); + list = new ArrayList(); + list.add(pay.getData()); + list.add(pay2.getData()); + query = new SpanNearPayloadCheckQuery(snq, list); + checkHits(query, new int[] + {500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599}); + clauses = new SpanQuery[3]; + clauses[0] = term1; + clauses[1] = term2; + clauses[2] = new SpanTermQuery(new Term("field", "five")); + snq = new SpanNearQuery(clauses, 0, true); + pay = new Payload(("pos: " + 0).getBytes()); + pay2 = new Payload(("pos: " + 1).getBytes()); + Payload pay3 = new Payload(("pos: " + 2).getBytes()); + list = new ArrayList(); + list.add(pay.getData()); + list.add(pay2.getData()); + list.add(pay3.getData()); + query = new SpanNearPayloadCheckQuery(snq, list); + checkHits(query, new int[] + {505}); + } + + public void testComplexSpanChecks() throws Exception { + SpanTermQuery one = new SpanTermQuery(new Term("field", "one")); + SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand")); + //should be one position in between + SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred")); + SpanTermQuery three = new SpanTermQuery(new Term("field", "three")); + + SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[]{one, thous}, 0, true); + SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[]{hundred, three}, 0, true); + SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[]{oneThous, hundredThree}, 1, true); + SpanQuery query; + //this one's too small + query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2); + checkHits(query, new int[]{}); + //this one's just right + query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6); + checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903}); + + Collection payloads = new ArrayList(); + Payload pay = new Payload(("pos: " + 0).getBytes()); + Payload pay2 = new Payload(("pos: " + 1).getBytes()); + Payload pay3 = new Payload(("pos: " + 3).getBytes()); + Payload pay4 = new Payload(("pos: " + 4).getBytes()); + payloads.add(pay.getData()); + payloads.add(pay2.getData()); + payloads.add(pay3.getData()); + payloads.add(pay4.getData()); + query = new SpanNearPayloadCheckQuery(oneThousHunThree, payloads); + checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903}); + + } + + + @Test + public void testSpanOr() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "thirty")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "three")); + SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 0, true); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty")); + SpanTermQuery term4 = new SpanTermQuery(new Term("field", "seven")); + SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {term3, term4}, + 0, true); + + SpanOrQuery query = new SpanOrQuery(near1, near2); + + checkHits(query, new int[] + {33, 47, 133, 147, 233, 247, 333, 347, 433, 447, 533, 547, 633, 647, 733, + 747, 833, 847, 933, 947, 1033, 1047, 1133, 1147, 1233, 1247, 1333, + 1347, 1433, 1447, 1533, 1547, 1633, 1647, 1733, 1747, 1833, 1847, 1933, 1947}); + + assertTrue(searcher.explain(query, 33).getValue() > 0.0f); + assertTrue(searcher.explain(query, 947).getValue() > 0.0f); + } + + @Test + public void testSpanExactNested() throws Exception { + SpanTermQuery term1 = new SpanTermQuery(new Term("field", "three")); + SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred")); + SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {term1, term2}, + 0, true); + SpanTermQuery term3 = new SpanTermQuery(new Term("field", "thirty")); + SpanTermQuery term4 = new SpanTermQuery(new Term("field", "three")); + SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {term3, term4}, + 0, true); + + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {near1, near2}, + 0, true); + + checkHits(query, new int[] {333, 1333}); + + assertTrue(searcher.explain(query, 333).getValue() > 0.0f); + } + + @Test + public void testSpanNearOr() throws Exception { + + SpanTermQuery t1 = new SpanTermQuery(new Term("field","six")); + SpanTermQuery t3 = new SpanTermQuery(new Term("field","seven")); + + SpanTermQuery t5 = new SpanTermQuery(new Term("field","seven")); + SpanTermQuery t6 = new SpanTermQuery(new Term("field","six")); + + SpanOrQuery to1 = new SpanOrQuery(t1, t3); + SpanOrQuery to2 = new SpanOrQuery(t5, t6); + + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {to1, to2}, + 10, true); + + checkHits(query, new int[] + {606, 607, 626, 627, 636, 637, 646, 647, 656, 657, 666, 667, 676, 677, + 686, 687, 696, 697, 706, 707, 726, 727, 736, 737, 746, 747, 756, + 757, 766, 767, 776, 777, 786, 787, 796, 797, 1606, 1607, 1626, + 1627, 1636, 1637, 1646, 1647, 1656, 1657, 1666, 1667, 1676, 1677, + 1686, 1687, 1696, 1697, 1706, 1707, 1726, 1727, 1736, 1737, + 1746, 1747, 1756, 1757, 1766, 1767, 1776, 1777, 1786, 1787, 1796, + 1797}); + } + + @Test + public void testSpanComplex1() throws Exception { + + SpanTermQuery t1 = new SpanTermQuery(new Term("field","six")); + SpanTermQuery t2 = new SpanTermQuery(new Term("field","hundred")); + SpanNearQuery tt1 = new SpanNearQuery(new SpanQuery[] {t1, t2}, 0,true); + + SpanTermQuery t3 = new SpanTermQuery(new Term("field","seven")); + SpanTermQuery t4 = new SpanTermQuery(new Term("field","hundred")); + SpanNearQuery tt2 = new SpanNearQuery(new SpanQuery[] {t3, t4}, 0,true); + + SpanTermQuery t5 = new SpanTermQuery(new Term("field","seven")); + SpanTermQuery t6 = new SpanTermQuery(new Term("field","six")); + + SpanOrQuery to1 = new SpanOrQuery(tt1, tt2); + SpanOrQuery to2 = new SpanOrQuery(t5, t6); + + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {to1, to2}, + 100, true); + + checkHits(query, new int[] + {606, 607, 626, 627, 636, 637, 646, 647, 656, 657, 666, 667, 676, 677, 686, 687, 696, + 697, 706, 707, 726, 727, 736, 737, 746, 747, 756, 757, + 766, 767, 776, 777, 786, 787, 796, 797, 1606, 1607, 1626, 1627, 1636, 1637, 1646, + 1647, 1656, 1657, + 1666, 1667, 1676, 1677, 1686, 1687, 1696, 1697, 1706, 1707, 1726, 1727, 1736, 1737, + 1746, 1747, 1756, 1757, 1766, 1767, 1776, 1777, 1786, 1787, 1796, 1797}); + } + + @Test + public void testSpansSkipTo() throws Exception { + SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy")); + SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy")); + Spans s1 = t1.getSpans(searcher.getIndexReader()); + Spans s2 = t2.getSpans(searcher.getIndexReader()); + + assertTrue(s1.next()); + assertTrue(s2.next()); + + boolean hasMore = true; + + do { + hasMore = skipToAccoringToJavaDocs(s1, s1.doc()); + assertEquals(hasMore, s2.skipTo(s2.doc())); + assertEquals(s1.doc(), s2.doc()); + } while (hasMore); + } + + /** Skips to the first match beyond the current, whose document number is + * greater than or equal to target.

Returns true iff there is such + * a match.

Behaves as if written:

+   *   boolean skipTo(int target) {
+   *     do {
+   *       if (!next())
+   *       return false;
+   *     } while (target > doc());
+   *     return true;
+   *   }
+   * 
+ */ + private boolean skipToAccoringToJavaDocs(Spans s, int target) + throws Exception { + do { + if (!s.next()) + return false; + } while (target > s.doc()); + return true; + + } + + private void checkHits(Query query, int[] results) throws IOException { + CheckHits.checkHits(random, query, "field", searcher, results); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java new file mode 100644 index 0000000..4bae375 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -0,0 +1,352 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +public class TestFieldMaskingSpanQuery extends LuceneTestCase { + + protected static Document doc(Field[] fields) { + Document doc = new Document(); + for (int i = 0; i < fields.length; i++) { + doc.add(fields[i]); + } + return doc; + } + + protected static Field field(String name, String value) { + return newField(name, value, Field.Store.NO, Field.Index.ANALYZED); + } + + protected static IndexSearcher searcher; + protected static Directory directory; + protected static IndexReader reader; + + @BeforeClass + public static void beforeClass() throws Exception { + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + + writer.addDocument(doc(new Field[] { field("id", "0") + , + field("gender", "male"), + field("first", "james"), + field("last", "jones") })); + + writer.addDocument(doc(new Field[] { field("id", "1") + , + field("gender", "male"), + field("first", "james"), + field("last", "smith") + , + field("gender", "female"), + field("first", "sally"), + field("last", "jones") })); + + writer.addDocument(doc(new Field[] { field("id", "2") + , + field("gender", "female"), + field("first", "greta"), + field("last", "jones") + , + field("gender", "female"), + field("first", "sally"), + field("last", "smith") + , + field("gender", "male"), + field("first", "james"), + field("last", "jones") })); + + writer.addDocument(doc(new Field[] { field("id", "3") + , + field("gender", "female"), + field("first", "lisa"), + field("last", "jones") + , + field("gender", "male"), + field("first", "bob"), + field("last", "costas") })); + + writer.addDocument(doc(new Field[] { field("id", "4") + , + field("gender", "female"), + field("first", "sally"), + field("last", "smith") + , + field("gender", "female"), + field("first", "linda"), + field("last", "dixit") + , + field("gender", "male"), + field("first", "bubba"), + field("last", "jones") })); + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher.close(); + searcher = null; + reader.close(); + reader = null; + directory.close(); + directory = null; + } + + protected void check(SpanQuery q, int[] docs) throws Exception { + CheckHits.checkHitCollector(random, q, null, searcher, docs); + } + + public void testRewrite0() throws Exception { + SpanQuery q = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("last", "sally")) , "first"); + q.setBoost(8.7654321f); + SpanQuery qr = (SpanQuery) searcher.rewrite(q); + + QueryUtils.checkEqual(q, qr); + + Set terms = new HashSet(); + qr.extractTerms(terms); + assertEquals(1, terms.size()); + } + + public void testRewrite1() throws Exception { + // mask an anon SpanQuery class that rewrites to something else. + SpanQuery q = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("last", "sally")) { + @Override + public Query rewrite(IndexReader reader) { + return new SpanOrQuery(new SpanQuery[] { + new SpanTermQuery(new Term("first", "sally")), + new SpanTermQuery(new Term("first", "james")) }); + } + }, "first"); + + SpanQuery qr = (SpanQuery) searcher.rewrite(q); + + QueryUtils.checkUnequal(q, qr); + + Set terms = new HashSet(); + qr.extractTerms(terms); + assertEquals(2, terms.size()); + } + + public void testRewrite2() throws Exception { + SpanQuery q1 = new SpanTermQuery(new Term("last", "smith")); + SpanQuery q2 = new SpanTermQuery(new Term("last", "jones")); + SpanQuery q = new SpanNearQuery(new SpanQuery[] + { q1, new FieldMaskingSpanQuery(q2, "last")}, 1, true ); + Query qr = searcher.rewrite(q); + + QueryUtils.checkEqual(q, qr); + + HashSet set = new HashSet(); + qr.extractTerms(set); + assertEquals(2, set.size()); + } + + public void testEquality1() { + SpanQuery q1 = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("last", "sally")) , "first"); + SpanQuery q2 = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("last", "sally")) , "first"); + SpanQuery q3 = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("last", "sally")) , "XXXXX"); + SpanQuery q4 = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("last", "XXXXX")) , "first"); + SpanQuery q5 = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("xXXX", "sally")) , "first"); + QueryUtils.checkEqual(q1, q2); + QueryUtils.checkUnequal(q1, q3); + QueryUtils.checkUnequal(q1, q4); + QueryUtils.checkUnequal(q1, q5); + + SpanQuery qA = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("last", "sally")) , "first"); + qA.setBoost(9f); + SpanQuery qB = new FieldMaskingSpanQuery + (new SpanTermQuery(new Term("last", "sally")) , "first"); + QueryUtils.checkUnequal(qA, qB); + qB.setBoost(9f); + QueryUtils.checkEqual(qA, qB); + + } + + public void testNoop0() throws Exception { + SpanQuery q1 = new SpanTermQuery(new Term("last", "sally")); + SpanQuery q = new FieldMaskingSpanQuery(q1, "first"); + check(q, new int[] { /* :EMPTY: */ }); + } + public void testNoop1() throws Exception { + SpanQuery q1 = new SpanTermQuery(new Term("last", "smith")); + SpanQuery q2 = new SpanTermQuery(new Term("last", "jones")); + SpanQuery q = new SpanNearQuery(new SpanQuery[] + { q1, new FieldMaskingSpanQuery(q2, "last")}, 0, true ); + check(q, new int[] { 1, 2 }); + q = new SpanNearQuery(new SpanQuery[] + { new FieldMaskingSpanQuery(q1, "last"), + new FieldMaskingSpanQuery(q2, "last")}, 0, true ); + check(q, new int[] { 1, 2 }); + } + + public void testSimple1() throws Exception { + SpanQuery q1 = new SpanTermQuery(new Term("first", "james")); + SpanQuery q2 = new SpanTermQuery(new Term("last", "jones")); + SpanQuery q = new SpanNearQuery(new SpanQuery[] + { q1, new FieldMaskingSpanQuery(q2, "first")}, -1, false ); + check(q, new int[] { 0, 2 }); + q = new SpanNearQuery(new SpanQuery[] + { new FieldMaskingSpanQuery(q2, "first"), q1}, -1, false ); + check(q, new int[] { 0, 2 }); + q = new SpanNearQuery(new SpanQuery[] + { q2, new FieldMaskingSpanQuery(q1, "last")}, -1, false ); + check(q, new int[] { 0, 2 }); + q = new SpanNearQuery(new SpanQuery[] + { new FieldMaskingSpanQuery(q1, "last"), q2}, -1, false ); + check(q, new int[] { 0, 2 }); + + } + + public void testSimple2() throws Exception { + SpanQuery q1 = new SpanTermQuery(new Term("gender", "female")); + SpanQuery q2 = new SpanTermQuery(new Term("last", "smith")); + SpanQuery q = new SpanNearQuery(new SpanQuery[] + { q1, new FieldMaskingSpanQuery(q2, "gender")}, -1, false ); + check(q, new int[] { 2, 4 }); + q = new SpanNearQuery(new SpanQuery[] + { new FieldMaskingSpanQuery(q1, "id"), + new FieldMaskingSpanQuery(q2, "id") }, -1, false ); + check(q, new int[] { 2, 4 }); + } + + public void testSpans0() throws Exception { + SpanQuery q1 = new SpanTermQuery(new Term("gender", "female")); + SpanQuery q2 = new SpanTermQuery(new Term("first", "james")); + SpanQuery q = new SpanOrQuery(new SpanQuery[] + { q1, new FieldMaskingSpanQuery(q2, "gender")}); + check(q, new int[] { 0, 1, 2, 3, 4 }); + + Spans span = q.getSpans(searcher.getIndexReader()); + + assertEquals(true, span.next()); + assertEquals(s(0,0,1), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(1,0,1), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(1,1,2), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(2,0,1), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(2,1,2), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(2,2,3), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(3,0,1), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(4,0,1), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(4,1,2), s(span)); + + assertEquals(false, span.next()); + } + + public void testSpans1() throws Exception { + SpanQuery q1 = new SpanTermQuery(new Term("first", "sally")); + SpanQuery q2 = new SpanTermQuery(new Term("first", "james")); + SpanQuery qA = new SpanOrQuery(new SpanQuery[] { q1, q2 }); + SpanQuery qB = new FieldMaskingSpanQuery(qA, "id"); + + check(qA, new int[] { 0, 1, 2, 4 }); + check(qB, new int[] { 0, 1, 2, 4 }); + + Spans spanA = qA.getSpans(searcher.getIndexReader()); + Spans spanB = qB.getSpans(searcher.getIndexReader()); + + while (spanA.next()) { + assertTrue("spanB not still going", spanB.next()); + assertEquals("spanA not equal spanB", s(spanA), s(spanB)); + } + assertTrue("spanB still going even tough spanA is done", !(spanB.next())); + + } + + public void testSpans2() throws Exception { + SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female")); + SpanQuery qA2 = new SpanTermQuery(new Term("first", "james")); + SpanQuery qA = new SpanOrQuery(new SpanQuery[] + { qA1, new FieldMaskingSpanQuery(qA2, "gender")}); + SpanQuery qB = new SpanTermQuery(new Term("last", "jones")); + SpanQuery q = new SpanNearQuery(new SpanQuery[] + { new FieldMaskingSpanQuery(qA, "id"), + new FieldMaskingSpanQuery(qB, "id") }, -1, false ); + check(q, new int[] { 0, 1, 2, 3 }); + + Spans span = q.getSpans(searcher.getIndexReader()); + + assertEquals(true, span.next()); + assertEquals(s(0,0,1), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(1,1,2), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(2,0,1), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(2,2,3), s(span)); + + assertEquals(true, span.next()); + assertEquals(s(3,0,1), s(span)); + + assertEquals(false, span.next()); + } + + public String s(Spans span) { + return s(span.doc(), span.start(), span.end()); + } + public String s(int doc, int start, int end) { + return "s(" + doc + "," + start + "," + end +")"; + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java new file mode 100644 index 0000000..d3fe2db --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -0,0 +1,187 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestNearSpansOrdered extends LuceneTestCase { + protected IndexSearcher searcher; + protected Directory directory; + protected IndexReader reader; + + public static final String FIELD = "field"; + public static final QueryParser qp = + new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer(random)); + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + } + + protected String[] docFields = { + "w1 w2 w3 w4 w5", + "w1 w3 w2 w3 zz", + "w1 xx w2 yy w3", + "w1 w3 xx w2 yy w3 zz" + }; + + protected SpanNearQuery makeQuery(String s1, String s2, String s3, + int slop, boolean inOrder) { + return new SpanNearQuery + (new SpanQuery[] { + new SpanTermQuery(new Term(FIELD, s1)), + new SpanTermQuery(new Term(FIELD, s2)), + new SpanTermQuery(new Term(FIELD, s3)) }, + slop, + inOrder); + } + protected SpanNearQuery makeQuery() { + return makeQuery("w1","w2","w3",1,true); + } + + public void testSpanNearQuery() throws Exception { + SpanNearQuery q = makeQuery(); + CheckHits.checkHits(random, q, FIELD, searcher, new int[] {0,1}); + } + + public String s(Spans span) { + return s(span.doc(), span.start(), span.end()); + } + public String s(int doc, int start, int end) { + return "s(" + doc + "," + start + "," + end +")"; + } + + public void testNearSpansNext() throws Exception { + SpanNearQuery q = makeQuery(); + Spans span = q.getSpans(searcher.getIndexReader()); + assertEquals(true, span.next()); + assertEquals(s(0,0,3), s(span)); + assertEquals(true, span.next()); + assertEquals(s(1,0,4), s(span)); + assertEquals(false, span.next()); + } + + /** + * test does not imply that skipTo(doc+1) should work exactly the + * same as next -- it's only applicable in this case since we know doc + * does not contain more than one span + */ + public void testNearSpansSkipToLikeNext() throws Exception { + SpanNearQuery q = makeQuery(); + Spans span = q.getSpans(searcher.getIndexReader()); + assertEquals(true, span.skipTo(0)); + assertEquals(s(0,0,3), s(span)); + assertEquals(true, span.skipTo(1)); + assertEquals(s(1,0,4), s(span)); + assertEquals(false, span.skipTo(2)); + } + + public void testNearSpansNextThenSkipTo() throws Exception { + SpanNearQuery q = makeQuery(); + Spans span = q.getSpans(searcher.getIndexReader()); + assertEquals(true, span.next()); + assertEquals(s(0,0,3), s(span)); + assertEquals(true, span.skipTo(1)); + assertEquals(s(1,0,4), s(span)); + assertEquals(false, span.next()); + } + + public void testNearSpansNextThenSkipPast() throws Exception { + SpanNearQuery q = makeQuery(); + Spans span = q.getSpans(searcher.getIndexReader()); + assertEquals(true, span.next()); + assertEquals(s(0,0,3), s(span)); + assertEquals(false, span.skipTo(2)); + } + + public void testNearSpansSkipPast() throws Exception { + SpanNearQuery q = makeQuery(); + Spans span = q.getSpans(searcher.getIndexReader()); + assertEquals(false, span.skipTo(2)); + } + + public void testNearSpansSkipTo0() throws Exception { + SpanNearQuery q = makeQuery(); + Spans span = q.getSpans(searcher.getIndexReader()); + assertEquals(true, span.skipTo(0)); + assertEquals(s(0,0,3), s(span)); + } + + public void testNearSpansSkipTo1() throws Exception { + SpanNearQuery q = makeQuery(); + Spans span = q.getSpans(searcher.getIndexReader()); + assertEquals(true, span.skipTo(1)); + assertEquals(s(1,0,4), s(span)); + } + + /** + * not a direct test of NearSpans, but a demonstration of how/when + * this causes problems + */ + public void testSpanNearScorerSkipTo1() throws Exception { + SpanNearQuery q = makeQuery(); + Weight w = searcher.createNormalizedWeight(q); + Scorer s = w.scorer(searcher.getIndexReader(), true, false); + assertEquals(1, s.advance(1)); + } + + /** + * not a direct test of NearSpans, but a demonstration of how/when + * this causes problems + */ + public void testSpanNearScorerExplain() throws Exception { + SpanNearQuery q = makeQuery(); + Explanation e = searcher.explain(q, 1); + assertTrue("Scorer explanation value for doc#1 isn't positive: " + + e.toString(), + 0.0f < e.getValue()); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java new file mode 100644 index 0000000..d2793e6 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java @@ -0,0 +1,543 @@ +package org.apache.lucene.search.spans; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseTokenizer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Payload; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.payloads.PayloadHelper; +import org.apache.lucene.search.payloads.PayloadSpanUtil; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.LuceneTestCase; + +public class TestPayloadSpans extends LuceneTestCase { + private IndexSearcher searcher; + private Similarity similarity = new DefaultSimilarity(); + protected IndexReader indexReader; + private IndexReader closeIndexReader; + private Directory directory; + + @Override + public void setUp() throws Exception { + super.setUp(); + PayloadHelper helper = new PayloadHelper(); + searcher = helper.setUp(random, similarity, 1000); + indexReader = searcher.getIndexReader(); + } + + public void testSpanTermQuery() throws Exception { + SpanTermQuery stq; + Spans spans; + stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy")); + spans = stq.getSpans(indexReader); + assertTrue("spans is null and it shouldn't be", spans != null); + checkSpans(spans, 100, 1, 1, 1); + + stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy")); + spans = stq.getSpans(indexReader); + assertTrue("spans is null and it shouldn't be", spans != null); + checkSpans(spans, 100, 0, 0, 0); + } + + public void testSpanFirst() throws IOException { + + SpanQuery match; + SpanFirstQuery sfq; + match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); + sfq = new SpanFirstQuery(match, 2); + Spans spans = sfq.getSpans(indexReader); + checkSpans(spans, 109, 1, 1, 1); + //Test more complicated subclause + SpanQuery[] clauses = new SpanQuery[2]; + clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); + clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred")); + match = new SpanNearQuery(clauses, 0, true); + sfq = new SpanFirstQuery(match, 2); + checkSpans(sfq.getSpans(indexReader), 100, 2, 1, 1); + + match = new SpanNearQuery(clauses, 0, false); + sfq = new SpanFirstQuery(match, 2); + checkSpans(sfq.getSpans(indexReader), 100, 2, 1, 1); + + } + + public void testSpanNot() throws Exception { + SpanQuery[] clauses = new SpanQuery[2]; + clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); + clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); + SpanQuery spq = new SpanNearQuery(clauses, 5, true); + SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two"))); + + + + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + + Document doc = new Document(); + doc.add(newField(PayloadHelper.FIELD, "one two three one four three", + Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + writer.close(); + + checkSpans(snq.getSpans(reader), 1,new int[]{2}); + reader.close(); + directory.close(); + } + + public void testNestedSpans() throws Exception { + SpanTermQuery stq; + Spans spans; + IndexSearcher searcher = getSearcher(); + stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark")); + spans = stq.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + checkSpans(spans, 0, null); + + + SpanQuery[] clauses = new SpanQuery[3]; + clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); + clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); + clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); + SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false); + + spans = spanNearQuery.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + checkSpans(spans, 2, new int[]{3,3}); + + + clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); + clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); + clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); + + spanNearQuery = new SpanNearQuery(clauses, 6, true); + + + spans = spanNearQuery.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + checkSpans(spans, 1, new int[]{3}); + + clauses = new SpanQuery[2]; + + clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); + clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); + + spanNearQuery = new SpanNearQuery(clauses, 6, true); + + // xx within 6 of rr + + SpanQuery[] clauses2 = new SpanQuery[2]; + + clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); + clauses2[1] = spanNearQuery; + + SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false); + + // yy within 6 of xx within 6 of rr + + spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + checkSpans(spans, 2, new int[]{3,3}); + searcher.close(); + closeIndexReader.close(); + directory.close(); + } + + public void testFirstClauseWithoutPayload() throws Exception { + Spans spans; + IndexSearcher searcher = getSearcher(); + + SpanQuery[] clauses = new SpanQuery[3]; + clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload")); + clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq")); + clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss")); + + SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true); + + SpanQuery[] clauses2 = new SpanQuery[2]; + + clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp")); + clauses2[1] = spanNearQuery; + + SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false); + + SpanQuery[] clauses3 = new SpanQuery[2]; + + clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np")); + clauses3[1] = snq; + + SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); + + spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + checkSpans(spans, 1, new int[]{3}); + searcher.close(); + closeIndexReader.close(); + directory.close(); + } + + public void testHeavilyNestedSpanQuery() throws Exception { + Spans spans; + IndexSearcher searcher = getSearcher(); + + SpanQuery[] clauses = new SpanQuery[3]; + clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); + clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")); + clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); + + SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 5, true); + + clauses = new SpanQuery[3]; + clauses[0] = spanNearQuery; + clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "five")); + clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "six")); + + SpanNearQuery spanNearQuery2 = new SpanNearQuery(clauses, 6, true); + + SpanQuery[] clauses2 = new SpanQuery[2]; + clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "eleven")); + clauses2[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ten")); + SpanNearQuery spanNearQuery3 = new SpanNearQuery(clauses2, 2, false); + + SpanQuery[] clauses3 = new SpanQuery[3]; + clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nine")); + clauses3[1] = spanNearQuery2; + clauses3[2] = spanNearQuery3; + + SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); + + spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); + assertTrue("spans is null and it shouldn't be", spans != null); + checkSpans(spans, 2, new int[]{8, 8}); + searcher.close(); + closeIndexReader.close(); + directory.close(); + } + + public void testShrinkToAfterShortestMatch() throws CorruptIndexException, + LockObtainFailedException, IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); + + Document doc = new Document(); + doc.add(new Field("content", new StringReader("a b c d e f g h i j a k"))); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + IndexSearcher is = newSearcher(reader); + writer.close(); + + SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); + SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); + SpanQuery[] sqs = { stq1, stq2 }; + SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); + Spans spans = snq.getSpans(is.getIndexReader()); + + TopDocs topDocs = is.search(snq, 1); + Set payloadSet = new HashSet(); + for (int i = 0; i < topDocs.scoreDocs.length; i++) { + while (spans.next()) { + Collection payloads = spans.getPayload(); + + for (final byte [] payload : payloads) { + payloadSet.add(new String(payload)); + } + } + } + assertEquals(2, payloadSet.size()); + assertTrue(payloadSet.contains("a:Noise:10")); + assertTrue(payloadSet.contains("k:Noise:11")); + is.close(); + reader.close(); + directory.close(); + } + + public void testShrinkToAfterShortestMatch2() throws CorruptIndexException, + LockObtainFailedException, IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); + + Document doc = new Document(); + doc.add(new Field("content", new StringReader("a b a d k f a h i k a k"))); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + IndexSearcher is = newSearcher(reader); + writer.close(); + + SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); + SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); + SpanQuery[] sqs = { stq1, stq2 }; + SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); + Spans spans = snq.getSpans(is.getIndexReader()); + + TopDocs topDocs = is.search(snq, 1); + Set payloadSet = new HashSet(); + for (int i = 0; i < topDocs.scoreDocs.length; i++) { + while (spans.next()) { + Collection payloads = spans.getPayload(); + for (final byte[] payload : payloads) { + payloadSet.add(new String(payload)); + } + } + } + assertEquals(2, payloadSet.size()); + assertTrue(payloadSet.contains("a:Noise:10")); + assertTrue(payloadSet.contains("k:Noise:11")); + is.close(); + reader.close(); + directory.close(); + } + + public void testShrinkToAfterShortestMatch3() throws CorruptIndexException, + LockObtainFailedException, IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); + + Document doc = new Document(); + doc.add(new Field("content", new StringReader("j k a l f k k p a t a k l k t a"))); + writer.addDocument(doc); + IndexReader reader = writer.getReader(); + IndexSearcher is = newSearcher(reader); + writer.close(); + + SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); + SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); + SpanQuery[] sqs = { stq1, stq2 }; + SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); + Spans spans = snq.getSpans(is.getIndexReader()); + + TopDocs topDocs = is.search(snq, 1); + Set payloadSet = new HashSet(); + for (int i = 0; i < topDocs.scoreDocs.length; i++) { + while (spans.next()) { + Collection payloads = spans.getPayload(); + + for (final byte [] payload : payloads) { + payloadSet.add(new String(payload)); + } + } + } + assertEquals(2, payloadSet.size()); + if(VERBOSE) { + for (final String payload : payloadSet) + System.out.println("match:" + payload); + + } + assertTrue(payloadSet.contains("a:Noise:10")); + assertTrue(payloadSet.contains("k:Noise:11")); + is.close(); + reader.close(); + directory.close(); + } + + public void testPayloadSpanUtil() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + + Document doc = new Document(); + doc.add(newField(PayloadHelper.FIELD,"xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + + IndexReader reader = writer.getReader(); + writer.close(); + IndexSearcher searcher = newSearcher(reader); + + PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getIndexReader()); + + Collection payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr"))); + if(VERBOSE) + System.out.println("Num payloads:" + payloads.size()); + for (final byte [] bytes : payloads) { + if(VERBOSE) + System.out.println(new String(bytes)); + } + searcher.close(); + reader.close(); + directory.close(); + } + + private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads, + int expectedPayloadLength, int expectedFirstByte) throws IOException { + assertTrue("spans is null and it shouldn't be", spans != null); + //each position match should have a span associated with it, since there is just one underlying term query, there should + //only be one entry in the span + int seen = 0; + while (spans.next() == true) + { + //if we expect payloads, then isPayloadAvailable should be true + if (expectedNumPayloads > 0) { + assertTrue("isPayloadAvailable is not returning the correct value: " + spans.isPayloadAvailable() + + " and it should be: " + (expectedNumPayloads > 0), + spans.isPayloadAvailable() == true); + } else { + assertTrue("isPayloadAvailable should be false", spans.isPayloadAvailable() == false); + } + //See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token + if (spans.isPayloadAvailable()) { + Collection payload = spans.getPayload(); + assertTrue("payload Size: " + payload.size() + " is not: " + expectedNumPayloads, payload.size() == expectedNumPayloads); + for (final byte [] thePayload : payload) { + assertTrue("payload[0] Size: " + thePayload.length + " is not: " + expectedPayloadLength, + thePayload.length == expectedPayloadLength); + assertTrue(thePayload[0] + " does not equal: " + expectedFirstByte, thePayload[0] == expectedFirstByte); + + } + + } + seen++; + } + assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans); + } + + private IndexSearcher getSearcher() throws Exception { + directory = newDirectory(); + String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"}; + RandomIndexWriter writer = new RandomIndexWriter(random, directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); + + Document doc = null; + for(int i = 0; i < docs.length; i++) { + doc = new Document(); + String docText = docs[i]; + doc.add(newField(PayloadHelper.FIELD,docText, Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + + closeIndexReader = writer.getReader(); + writer.close(); + + IndexSearcher searcher = newSearcher(closeIndexReader); + return searcher; + } + + private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException { + int cnt = 0; + + while (spans.next() == true) { + if(VERBOSE) + System.out.println("\nSpans Dump --"); + if (spans.isPayloadAvailable()) { + Collection payload = spans.getPayload(); + if(VERBOSE) + System.out.println("payloads for span:" + payload.size()); + for (final byte [] bytes : payload) { + if(VERBOSE) + System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " " + + new String(bytes)); + } + + assertEquals(numPayloads[cnt],payload.size()); + } else { + assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0",numPayloads.length > 0 && numPayloads[cnt] > 0 ); + } + cnt++; + } + + assertEquals(numSpans, cnt); + } + + final class PayloadAnalyzer extends Analyzer { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); + result = new PayloadFilter(result, fieldName); + return result; + } + } + + final class PayloadFilter extends TokenFilter { + String fieldName; + int numSeen = 0; + Set entities = new HashSet(); + Set nopayload = new HashSet(); + int pos; + PayloadAttribute payloadAtt; + CharTermAttribute termAtt; + PositionIncrementAttribute posIncrAtt; + + public PayloadFilter(TokenStream input, String fieldName) { + super(input); + this.fieldName = fieldName; + pos = 0; + entities.add("xx"); + entities.add("one"); + nopayload.add("nopayload"); + nopayload.add("np"); + termAtt = addAttribute(CharTermAttribute.class); + posIncrAtt = addAttribute(PositionIncrementAttribute.class); + payloadAtt = addAttribute(PayloadAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + String token = termAtt.toString(); + + if (!nopayload.contains(token)) { + if (entities.contains(token)) { + payloadAtt.setPayload(new Payload((token + ":Entity:"+ pos ).getBytes())); + } else { + payloadAtt.setPayload(new Payload((token + ":Noise:" + pos ).getBytes())); + } + } + pos += posIncrAtt.getPositionIncrement(); + return true; + } + return false; + } + } + + public final class TestPayloadAnalyzer extends Analyzer { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); + result = new PayloadFilter(result, fieldName); + return result; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanExplanations.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanExplanations.java new file mode 100644 index 0000000..c5da9d5 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanExplanations.java @@ -0,0 +1,177 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.*; + + +/** + * TestExplanations subclass focusing on span queries + */ +public class TestSpanExplanations extends TestExplanations { + + /* simple SpanTermQueries */ + + public void testST1() throws Exception { + SpanQuery q = st("w1"); + qtest(q, new int[] {0,1,2,3}); + } + public void testST2() throws Exception { + SpanQuery q = st("w1"); + q.setBoost(1000); + qtest(q, new int[] {0,1,2,3}); + } + public void testST4() throws Exception { + SpanQuery q = st("xx"); + qtest(q, new int[] {2,3}); + } + public void testST5() throws Exception { + SpanQuery q = st("xx"); + q.setBoost(1000); + qtest(q, new int[] {2,3}); + } + + /* some SpanFirstQueries */ + + public void testSF1() throws Exception { + SpanQuery q = sf(("w1"),1); + qtest(q, new int[] {0,1,2,3}); + } + public void testSF2() throws Exception { + SpanQuery q = sf(("w1"),1); + q.setBoost(1000); + qtest(q, new int[] {0,1,2,3}); + } + public void testSF4() throws Exception { + SpanQuery q = sf(("xx"),2); + qtest(q, new int[] {2}); + } + public void testSF5() throws Exception { + SpanQuery q = sf(("yy"),2); + qtest(q, new int[] { }); + } + public void testSF6() throws Exception { + SpanQuery q = sf(("yy"),4); + q.setBoost(1000); + qtest(q, new int[] {2}); + } + + /* some SpanOrQueries */ + + public void testSO1() throws Exception { + SpanQuery q = sor("w1","QQ"); + qtest(q, new int[] {0,1,2,3}); + } + public void testSO2() throws Exception { + SpanQuery q = sor("w1","w3","zz"); + qtest(q, new int[] {0,1,2,3}); + } + public void testSO3() throws Exception { + SpanQuery q = sor("w5","QQ","yy"); + qtest(q, new int[] {0,2,3}); + } + public void testSO4() throws Exception { + SpanQuery q = sor("w5","QQ","yy"); + qtest(q, new int[] {0,2,3}); + } + + + + /* some SpanNearQueries */ + + public void testSNear1() throws Exception { + SpanQuery q = snear("w1","QQ",100,true); + qtest(q, new int[] {}); + } + public void testSNear2() throws Exception { + SpanQuery q = snear("w1","xx",100,true); + qtest(q, new int[] {2,3}); + } + public void testSNear3() throws Exception { + SpanQuery q = snear("w1","xx",0,true); + qtest(q, new int[] {2}); + } + public void testSNear4() throws Exception { + SpanQuery q = snear("w1","xx",1,true); + qtest(q, new int[] {2,3}); + } + public void testSNear5() throws Exception { + SpanQuery q = snear("xx","w1",0,false); + qtest(q, new int[] {2}); + } + + public void testSNear6() throws Exception { + SpanQuery q = snear("w1","w2","QQ",100,true); + qtest(q, new int[] {}); + } + public void testSNear7() throws Exception { + SpanQuery q = snear("w1","xx","w2",100,true); + qtest(q, new int[] {2,3}); + } + public void testSNear8() throws Exception { + SpanQuery q = snear("w1","xx","w2",0,true); + qtest(q, new int[] {2}); + } + public void testSNear9() throws Exception { + SpanQuery q = snear("w1","xx","w2",1,true); + qtest(q, new int[] {2,3}); + } + public void testSNear10() throws Exception { + SpanQuery q = snear("xx","w1","w2",0,false); + qtest(q, new int[] {2}); + } + public void testSNear11() throws Exception { + SpanQuery q = snear("w1","w2","w3",1,true); + qtest(q, new int[] {0,1}); + } + + + /* some SpanNotQueries */ + + public void testSNot1() throws Exception { + SpanQuery q = snot(sf("w1",10),st("QQ")); + qtest(q, new int[] {0,1,2,3}); + } + public void testSNot2() throws Exception { + SpanQuery q = snot(sf("w1",10),st("QQ")); + q.setBoost(1000); + qtest(q, new int[] {0,1,2,3}); + } + public void testSNot4() throws Exception { + SpanQuery q = snot(sf("w1",10),st("xx")); + qtest(q, new int[] {0,1,2,3}); + } + public void testSNot5() throws Exception { + SpanQuery q = snot(sf("w1",10),st("xx")); + q.setBoost(1000); + qtest(q, new int[] {0,1,2,3}); + } + public void testSNot7() throws Exception { + SpanQuery f = snear("w1","w3",10,true); + f.setBoost(1000); + SpanQuery q = snot(f, st("xx")); + qtest(q, new int[] {0,1,3}); + } + public void testSNot10() throws Exception { + SpanQuery t = st("xx"); + t.setBoost(10000); + SpanQuery q = snot(snear("w1","w3",10,true), t); + qtest(q, new int[] {0,1,3}); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanExplanationsOfNonMatches.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanExplanationsOfNonMatches.java new file mode 100644 index 0000000..ed6406d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanExplanationsOfNonMatches.java @@ -0,0 +1,41 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.CheckHits; + + +/** + * subclass of TestSimpleExplanations that verifies non matches. + */ +public class TestSpanExplanationsOfNonMatches + extends TestSpanExplanations { + + /** + * Overrides superclass to ignore matches and focus on non-matches + * + * @see CheckHits#checkNoMatchExplanations + */ + @Override + public void qtest(Query q, int[] expDocNrs) throws Exception { + CheckHits.checkNoMatchExplanations(q, FIELD, searcher, expDocNrs); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java new file mode 100644 index 0000000..f91998d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java @@ -0,0 +1,63 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.StopAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSpanFirstQuery extends LuceneTestCase { + public void testStartPositions() throws Exception { + Directory dir = newDirectory(); + + // mimic StopAnalyzer + Analyzer analyzer = new StopAnalyzer(TEST_VERSION_CURRENT); + + RandomIndexWriter writer = new RandomIndexWriter(random, dir, analyzer); + Document doc = new Document(); + doc.add(newField("field", "the quick brown fox", Field.Index.ANALYZED)); + writer.addDocument(doc); + Document doc2 = new Document(); + doc2.add(newField("field", "quick brown fox", Field.Index.ANALYZED)); + writer.addDocument(doc2); + + IndexReader reader = writer.getReader(); + IndexSearcher searcher = newSearcher(reader); + + // user queries on "starts-with quick" + SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1); + assertEquals(1, searcher.search(sfq, 10).totalHits); + + // user queries on "starts-with the quick" + SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2); + sfq = new SpanNotQuery(include, sfq); + assertEquals(1, searcher.search(sfq, 10).totalHits); + + writer.close(); + searcher.close(); + reader.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java new file mode 100644 index 0000000..9470788 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java @@ -0,0 +1,100 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests for {@link SpanMultiTermQueryWrapper}, wrapping a few MultiTermQueries. + */ +public class TestSpanMultiTermQueryWrapper extends LuceneTestCase { + private Directory directory; + private IndexReader reader; + private Searcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, directory); + Document doc = new Document(); + Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(field); + + field.setValue("quick brown fox"); + iw.addDocument(doc); + field.setValue("jumps over lazy broun dog"); + iw.addDocument(doc); + field.setValue("jumps over extremely very lazy broxn dog"); + iw.addDocument(doc); + reader = iw.getReader(); + iw.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testWildcard() throws Exception { + WildcardQuery wq = new WildcardQuery(new Term("field", "bro?n")); + SpanQuery swq = new SpanMultiTermQueryWrapper(wq); + // will only match quick brown fox + SpanFirstQuery sfq = new SpanFirstQuery(swq, 2); + assertEquals(1, searcher.search(sfq, 10).totalHits); + } + + public void testPrefix() throws Exception { + WildcardQuery wq = new WildcardQuery(new Term("field", "extrem*")); + SpanQuery swq = new SpanMultiTermQueryWrapper(wq); + // will only match "jumps over extremely very lazy broxn dog" + SpanFirstQuery sfq = new SpanFirstQuery(swq, 3); + assertEquals(1, searcher.search(sfq, 10).totalHits); + } + + public void testFuzzy() throws Exception { + FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan")); + SpanQuery sfq = new SpanMultiTermQueryWrapper(fq); + // will not match quick brown fox + SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 3, 6); + assertEquals(2, searcher.search(sprq, 10).totalHits); + } + + public void testFuzzy2() throws Exception { + // maximum of 1 term expansion + FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan"), 0.5f, 0, 1); + SpanQuery sfq = new SpanMultiTermQueryWrapper(fq); + // will only match jumps over lazy broun dog + SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 0, 100); + assertEquals(1, searcher.search(sprq, 10).totalHits); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpans.java new file mode 100644 index 0000000..6610124 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -0,0 +1,492 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; + +public class TestSpans extends LuceneTestCase { + private IndexSearcher searcher; + private IndexReader reader; + private Directory directory; + + public static final String field = "field"; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + for (int i = 0; i < docFields.length; i++) { + Document doc = new Document(); + doc.add(newField(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED)); + writer.addDocument(doc); + } + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + private String[] docFields = { + "w1 w2 w3 w4 w5", + "w1 w3 w2 w3", + "w1 xx w2 yy w3", + "w1 w3 xx w2 yy w3", + "u2 u2 u1", + "u2 xx u2 u1", + "u2 u2 xx u1", + "u2 xx u2 yy u1", + "u2 xx u1 u2", + "u2 u1 xx u2", + "u1 u2 xx u2", + "t1 t2 t1 t3 t2 t3" + }; + + public SpanTermQuery makeSpanTermQuery(String text) { + return new SpanTermQuery(new Term(field, text)); + } + + private void checkHits(Query query, int[] results) throws IOException { + CheckHits.checkHits(random, query, field, searcher, results); + } + + private void orderedSlopTest3SQ( + SpanQuery q1, + SpanQuery q2, + SpanQuery q3, + int slop, + int[] expectedDocs) throws IOException { + boolean ordered = true; + SpanNearQuery snq = new SpanNearQuery( new SpanQuery[]{q1,q2,q3}, slop, ordered); + checkHits(snq, expectedDocs); + } + + public void orderedSlopTest3(int slop, int[] expectedDocs) throws IOException { + orderedSlopTest3SQ( + makeSpanTermQuery("w1"), + makeSpanTermQuery("w2"), + makeSpanTermQuery("w3"), + slop, + expectedDocs); + } + + public void orderedSlopTest3Equal(int slop, int[] expectedDocs) throws IOException { + orderedSlopTest3SQ( + makeSpanTermQuery("w1"), + makeSpanTermQuery("w3"), + makeSpanTermQuery("w3"), + slop, + expectedDocs); + } + + public void orderedSlopTest1Equal(int slop, int[] expectedDocs) throws IOException { + orderedSlopTest3SQ( + makeSpanTermQuery("u2"), + makeSpanTermQuery("u2"), + makeSpanTermQuery("u1"), + slop, + expectedDocs); + } + + public void testSpanNearOrdered01() throws Exception { + orderedSlopTest3(0, new int[] {0}); + } + + public void testSpanNearOrdered02() throws Exception { + orderedSlopTest3(1, new int[] {0,1}); + } + + public void testSpanNearOrdered03() throws Exception { + orderedSlopTest3(2, new int[] {0,1,2}); + } + + public void testSpanNearOrdered04() throws Exception { + orderedSlopTest3(3, new int[] {0,1,2,3}); + } + + public void testSpanNearOrdered05() throws Exception { + orderedSlopTest3(4, new int[] {0,1,2,3}); + } + + public void testSpanNearOrderedEqual01() throws Exception { + orderedSlopTest3Equal(0, new int[] {}); + } + + public void testSpanNearOrderedEqual02() throws Exception { + orderedSlopTest3Equal(1, new int[] {1}); + } + + public void testSpanNearOrderedEqual03() throws Exception { + orderedSlopTest3Equal(2, new int[] {1}); + } + + public void testSpanNearOrderedEqual04() throws Exception { + orderedSlopTest3Equal(3, new int[] {1,3}); + } + + public void testSpanNearOrderedEqual11() throws Exception { + orderedSlopTest1Equal(0, new int[] {4}); + } + + public void testSpanNearOrderedEqual12() throws Exception { + orderedSlopTest1Equal(0, new int[] {4}); + } + + public void testSpanNearOrderedEqual13() throws Exception { + orderedSlopTest1Equal(1, new int[] {4,5,6}); + } + + public void testSpanNearOrderedEqual14() throws Exception { + orderedSlopTest1Equal(2, new int[] {4,5,6,7}); + } + + public void testSpanNearOrderedEqual15() throws Exception { + orderedSlopTest1Equal(3, new int[] {4,5,6,7}); + } + + public void testSpanNearOrderedOverlap() throws Exception { + boolean ordered = true; + int slop = 1; + SpanNearQuery snq = new SpanNearQuery( + new SpanQuery[] { + makeSpanTermQuery("t1"), + makeSpanTermQuery("t2"), + makeSpanTermQuery("t3") }, + slop, + ordered); + Spans spans = snq.getSpans(searcher.getIndexReader()); + + assertTrue("first range", spans.next()); + assertEquals("first doc", 11, spans.doc()); + assertEquals("first start", 0, spans.start()); + assertEquals("first end", 4, spans.end()); + + assertTrue("second range", spans.next()); + assertEquals("second doc", 11, spans.doc()); + assertEquals("second start", 2, spans.start()); + assertEquals("second end", 6, spans.end()); + + assertFalse("third range", spans.next()); + } + + + public void testSpanNearUnOrdered() throws Exception { + + //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test + SpanNearQuery snq; + snq = new SpanNearQuery( + new SpanQuery[] { + makeSpanTermQuery("u1"), + makeSpanTermQuery("u2") }, + 0, + false); + Spans spans = snq.getSpans(searcher.getIndexReader()); + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 4, spans.doc()); + assertEquals("start", 1, spans.start()); + assertEquals("end", 3, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 5, spans.doc()); + assertEquals("start", 2, spans.start()); + assertEquals("end", 4, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 8, spans.doc()); + assertEquals("start", 2, spans.start()); + assertEquals("end", 4, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 9, spans.doc()); + assertEquals("start", 0, spans.start()); + assertEquals("end", 2, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 10, spans.doc()); + assertEquals("start", 0, spans.start()); + assertEquals("end", 2, spans.end()); + assertTrue("Has next and it shouldn't: " + spans.doc(), spans.next() == false); + + SpanNearQuery u1u2 = new SpanNearQuery(new SpanQuery[]{makeSpanTermQuery("u1"), + makeSpanTermQuery("u2")}, 0, false); + snq = new SpanNearQuery( + new SpanQuery[] { + u1u2, + makeSpanTermQuery("u2") + }, + 1, + false); + spans = snq.getSpans(searcher.getIndexReader()); + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 4, spans.doc()); + assertEquals("start", 0, spans.start()); + assertEquals("end", 3, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + //unordered spans can be subsets + assertEquals("doc", 4, spans.doc()); + assertEquals("start", 1, spans.start()); + assertEquals("end", 3, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 5, spans.doc()); + assertEquals("start", 0, spans.start()); + assertEquals("end", 4, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 5, spans.doc()); + assertEquals("start", 2, spans.start()); + assertEquals("end", 4, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 8, spans.doc()); + assertEquals("start", 0, spans.start()); + assertEquals("end", 4, spans.end()); + + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 8, spans.doc()); + assertEquals("start", 2, spans.start()); + assertEquals("end", 4, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 9, spans.doc()); + assertEquals("start", 0, spans.start()); + assertEquals("end", 2, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 9, spans.doc()); + assertEquals("start", 0, spans.start()); + assertEquals("end", 4, spans.end()); + + assertTrue("Does not have next and it should", spans.next()); + assertEquals("doc", 10, spans.doc()); + assertEquals("start", 0, spans.start()); + assertEquals("end", 2, spans.end()); + + assertTrue("Has next and it shouldn't", spans.next() == false); + } + + + + private Spans orSpans(String[] terms) throws Exception { + SpanQuery[] sqa = new SpanQuery[terms.length]; + for (int i = 0; i < terms.length; i++) { + sqa[i] = makeSpanTermQuery(terms[i]); + } + return (new SpanOrQuery(sqa)).getSpans(searcher.getIndexReader()); + } + + private void tstNextSpans(Spans spans, int doc, int start, int end) + throws Exception { + assertTrue("next", spans.next()); + assertEquals("doc", doc, spans.doc()); + assertEquals("start", start, spans.start()); + assertEquals("end", end, spans.end()); + } + + public void testSpanOrEmpty() throws Exception { + Spans spans = orSpans(new String[0]); + assertFalse("empty next", spans.next()); + + SpanOrQuery a = new SpanOrQuery( new SpanQuery[0] ); + SpanOrQuery b = new SpanOrQuery( new SpanQuery[0] ); + assertTrue("empty should equal", a.equals(b)); + } + + public void testSpanOrSingle() throws Exception { + Spans spans = orSpans(new String[] {"w5"}); + tstNextSpans(spans, 0, 4, 5); + assertFalse("final next", spans.next()); + } + + public void testSpanOrMovesForward() throws Exception { + Spans spans = orSpans(new String[] {"w1", "xx"}); + + spans.next(); + int doc = spans.doc(); + assertEquals(0, doc); + + spans.skipTo(0); + doc = spans.doc(); + + // LUCENE-1583: + // according to Spans, a skipTo to the same doc or less + // should still call next() on the underlying Spans + assertEquals(1, doc); + + } + + public void testSpanOrDouble() throws Exception { + Spans spans = orSpans(new String[] {"w5", "yy"}); + tstNextSpans(spans, 0, 4, 5); + tstNextSpans(spans, 2, 3, 4); + tstNextSpans(spans, 3, 4, 5); + tstNextSpans(spans, 7, 3, 4); + assertFalse("final next", spans.next()); + } + + public void testSpanOrDoubleSkip() throws Exception { + Spans spans = orSpans(new String[] {"w5", "yy"}); + assertTrue("initial skipTo", spans.skipTo(3)); + assertEquals("doc", 3, spans.doc()); + assertEquals("start", 4, spans.start()); + assertEquals("end", 5, spans.end()); + tstNextSpans(spans, 7, 3, 4); + assertFalse("final next", spans.next()); + } + + public void testSpanOrUnused() throws Exception { + Spans spans = orSpans(new String[] {"w5", "unusedTerm", "yy"}); + tstNextSpans(spans, 0, 4, 5); + tstNextSpans(spans, 2, 3, 4); + tstNextSpans(spans, 3, 4, 5); + tstNextSpans(spans, 7, 3, 4); + assertFalse("final next", spans.next()); + } + + public void testSpanOrTripleSameDoc() throws Exception { + Spans spans = orSpans(new String[] {"t1", "t2", "t3"}); + tstNextSpans(spans, 11, 0, 1); + tstNextSpans(spans, 11, 1, 2); + tstNextSpans(spans, 11, 2, 3); + tstNextSpans(spans, 11, 3, 4); + tstNextSpans(spans, 11, 4, 5); + tstNextSpans(spans, 11, 5, 6); + assertFalse("final next", spans.next()); + } + + public void testSpanScorerZeroSloppyFreq() throws Exception { + boolean ordered = true; + int slop = 1; + + final Similarity sim = new DefaultSimilarity() { + @Override + public float sloppyFreq(int distance) { + return 0.0f; + } + }; + + SpanNearQuery snq = new SpanNearQuery( + new SpanQuery[] { + makeSpanTermQuery("t1"), + makeSpanTermQuery("t2") }, + slop, + ordered) { + @Override + public Similarity getSimilarity(Searcher s) { + return sim; + } + }; + + Scorer spanScorer = searcher.createNormalizedWeight(snq).scorer(searcher.getIndexReader(), true, false); + + assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertEquals("first doc number", spanScorer.docID(), 11); + float score = spanScorer.score(); + assertTrue("first doc score should be zero, " + score, score == 0.0f); + assertTrue("no second doc", spanScorer.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); + } + + // LUCENE-1404 + private void addDoc(IndexWriter writer, String id, String text) throws IOException { + final Document doc = new Document(); + doc.add( newField("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED) ); + doc.add( newField("text", text, Field.Store.YES, Field.Index.ANALYZED) ); + writer.addDocument(doc); + } + + // LUCENE-1404 + private int hitCount(Searcher searcher, String word) throws Throwable { + return searcher.search(new TermQuery(new Term("text", word)), 10).totalHits; + } + + // LUCENE-1404 + private SpanQuery createSpan(String value) { + return new SpanTermQuery(new Term("text", value)); + } + + // LUCENE-1404 + private SpanQuery createSpan(int slop, boolean ordered, SpanQuery[] clauses) { + return new SpanNearQuery(clauses, slop, ordered); + } + + // LUCENE-1404 + private SpanQuery createSpan(int slop, boolean ordered, String term1, String term2) { + return createSpan(slop, ordered, new SpanQuery[] {createSpan(term1), createSpan(term2)}); + } + + // LUCENE-1404 + public void testNPESpanQuery() throws Throwable { + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + // Add documents + addDoc(writer, "1", "the big dogs went running to the market"); + addDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); + + // Commit + writer.close(); + + // Get searcher + final IndexReader reader = IndexReader.open(dir, true); + final IndexSearcher searcher = newSearcher(reader); + + // Control (make sure docs indexed) + assertEquals(2, hitCount(searcher, "the")); + assertEquals(1, hitCount(searcher, "cat")); + assertEquals(1, hitCount(searcher, "dogs")); + assertEquals(0, hitCount(searcher, "rabbit")); + + // This throws exception (it shouldn't) + assertEquals(1, + searcher.search(createSpan(0, true, + new SpanQuery[] {createSpan(4, false, "chased", "cat"), + createSpan("ate")}), 10).totalHits); + searcher.close(); + reader.close(); + dir.close(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java new file mode 100644 index 0000000..ce5c95f --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java @@ -0,0 +1,175 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.*; +import org.apache.lucene.store.Directory; + +/******************************************************************************* + * Tests the span query bug in Lucene. It demonstrates that SpanTermQuerys don't + * work correctly in a BooleanQuery. + * + */ +public class TestSpansAdvanced extends LuceneTestCase { + + // location to the index + protected Directory mDirectory; + protected IndexReader reader; + protected IndexSearcher searcher; + + // field names in the index + private final static String FIELD_ID = "ID"; + protected final static String FIELD_TEXT = "TEXT"; + + /** + * Initializes the tests by adding 4 identical documents to the index. + */ + @Override + public void setUp() throws Exception { + super.setUp(); + // create test index + mDirectory = newDirectory(); + final RandomIndexWriter writer = new RandomIndexWriter(random, + mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT, + new StandardAnalyzer(TEST_VERSION_CURRENT)).setMergePolicy(newLogMergePolicy())); + + addDocument(writer, "1", "I think it should work."); + addDocument(writer, "2", "I think it should work."); + addDocument(writer, "3", "I think it should work."); + addDocument(writer, "4", "I think it should work."); + reader = writer.getReader(); + writer.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + mDirectory.close(); + mDirectory = null; + super.tearDown(); + } + + /** + * Adds the document to the index. + * + * @param writer the Lucene index writer + * @param id the unique id of the document + * @param text the text of the document + * @throws IOException + */ + protected void addDocument(final RandomIndexWriter writer, final String id, + final String text) throws IOException { + + final Document document = new Document(); + document.add(newField(FIELD_ID, id, Field.Store.YES, + Field.Index.NOT_ANALYZED)); + document.add(newField(FIELD_TEXT, text, Field.Store.YES, + Field.Index.ANALYZED)); + writer.addDocument(document); + } + + /** + * Tests two span queries. + * + * @throws IOException + */ + public void testBooleanQueryWithSpanQueries() throws IOException { + + doTestBooleanQueryWithSpanQueries(searcher, 0.3884282f); + } + + /** + * Tests two span queries. + * + * @throws IOException + */ + protected void doTestBooleanQueryWithSpanQueries(IndexSearcher s, + final float expectedScore) throws IOException { + + final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "work")); + final BooleanQuery query = new BooleanQuery(); + query.add(spanQuery, BooleanClause.Occur.MUST); + query.add(spanQuery, BooleanClause.Occur.MUST); + final String[] expectedIds = new String[] {"1", "2", "3", "4"}; + final float[] expectedScores = new float[] {expectedScore, expectedScore, + expectedScore, expectedScore}; + assertHits(s, query, "two span queries", expectedIds, expectedScores); + } + + /** + * Checks to see if the hits are what we expected. + * + * @param query the query to execute + * @param description the description of the search + * @param expectedIds the expected document ids of the hits + * @param expectedScores the expected scores of the hits + * + * @throws IOException + */ + protected static void assertHits(Searcher s, Query query, + final String description, final String[] expectedIds, + final float[] expectedScores) throws IOException { + QueryUtils.check(random, query, s); + + final float tolerance = 1e-5f; + + // Hits hits = searcher.search(query); + // hits normalizes and throws things off if one score is greater than 1.0 + TopDocs topdocs = s.search(query, null, 10000); + + /***** + * // display the hits System.out.println(hits.length() + + * " hits for search: \"" + description + '\"'); for (int i = 0; i < + * hits.length(); i++) { System.out.println(" " + FIELD_ID + ':' + + * hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')'); } + *****/ + + // did we get the hits we expected + assertEquals(expectedIds.length, topdocs.totalHits); + for (int i = 0; i < topdocs.totalHits; i++) { + // System.out.println(i + " exp: " + expectedIds[i]); + // System.out.println(i + " field: " + hits.doc(i).get(FIELD_ID)); + + int id = topdocs.scoreDocs[i].doc; + float score = topdocs.scoreDocs[i].score; + Document doc = s.doc(id); + assertEquals(expectedIds[i], doc.get(FIELD_ID)); + boolean scoreEq = Math.abs(expectedScores[i] - score) < tolerance; + if (!scoreEq) { + System.out.println(i + " warning, expected score: " + expectedScores[i] + + ", actual " + score); + System.out.println(s.explain(query, id)); + } + assertEquals(expectedScores[i], score, tolerance); + assertEquals(s.explain(query, id).getValue(), score, tolerance); + } + } + +} \ No newline at end of file diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java new file mode 100644 index 0000000..27c3ca1 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java @@ -0,0 +1,124 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.*; + +/******************************************************************************* + * Some expanded tests to make sure my patch doesn't break other SpanTermQuery + * functionality. + * + */ +public class TestSpansAdvanced2 extends TestSpansAdvanced { + IndexSearcher searcher2; + IndexReader reader2; + + /** + * Initializes the tests by adding documents to the index. + */ + @Override + public void setUp() throws Exception { + super.setUp(); + + // create test index + final RandomIndexWriter writer = new RandomIndexWriter(random, mDirectory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)) + .setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); + addDocument(writer, "A", "Should we, could we, would we?"); + addDocument(writer, "B", "It should. Should it?"); + addDocument(writer, "C", "It shouldn't."); + addDocument(writer, "D", "Should we, should we, should we."); + reader2 = writer.getReader(); + writer.close(); + + // re-open the searcher since we added more docs + searcher2 = newSearcher(reader2); + } + + @Override + public void tearDown() throws Exception { + searcher2.close(); + reader2.close(); + super.tearDown(); + } + + /** + * Verifies that the index has the correct number of documents. + * + * @throws Exception + */ + public void testVerifyIndex() throws Exception { + final IndexReader reader = IndexReader.open(mDirectory, true); + assertEquals(8, reader.numDocs()); + reader.close(); + } + + /** + * Tests a single span query that matches multiple documents. + * + * @throws IOException + */ + public void testSingleSpanQuery() throws IOException { + + final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should")); + final String[] expectedIds = new String[] {"B", "D", "1", "2", "3", "4", + "A"}; + final float[] expectedScores = new float[] {0.625f, 0.45927936f, + 0.35355338f, 0.35355338f, 0.35355338f, 0.35355338f, 0.26516503f,}; + assertHits(searcher2, spanQuery, "single span query", expectedIds, + expectedScores); + } + + /** + * Tests a single span query that matches multiple documents. + * + * @throws IOException + */ + public void testMultipleDifferentSpanQueries() throws IOException { + + final Query spanQuery1 = new SpanTermQuery(new Term(FIELD_TEXT, "should")); + final Query spanQuery2 = new SpanTermQuery(new Term(FIELD_TEXT, "we")); + final BooleanQuery query = new BooleanQuery(); + query.add(spanQuery1, BooleanClause.Occur.MUST); + query.add(spanQuery2, BooleanClause.Occur.MUST); + final String[] expectedIds = new String[] {"D", "A"}; + // these values were pre LUCENE-413 + // final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f }; + final float[] expectedScores = new float[] {1.0191123f, 0.93163157f}; + assertHits(searcher2, query, "multiple different span queries", + expectedIds, expectedScores); + } + + /** + * Tests two span queries. + * + * @throws IOException + */ + @Override + public void testBooleanQueryWithSpanQueries() throws IOException { + + doTestBooleanQueryWithSpanQueries(searcher2, 0.73500174f); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestBufferedIndexInput.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestBufferedIndexInput.java new file mode 100755 index 0000000..0a0bc45 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestBufferedIndexInput.java @@ -0,0 +1,381 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.NIOFSDirectory.NIOFSIndexInput; +import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.ArrayUtil; + +public class TestBufferedIndexInput extends LuceneTestCase { + + private static void writeBytes(File aFile, long size) throws IOException{ + OutputStream stream = null; + try { + stream = new FileOutputStream(aFile); + for (int i = 0; i < size; i++) { + stream.write(byten(i)); + } + stream.flush(); + } finally { + if (stream != null) { + stream.close(); + } + } + } + + private static final long TEST_FILE_LENGTH = 100*1024; + + // Call readByte() repeatedly, past the buffer boundary, and see that it + // is working as expected. + // Our input comes from a dynamically generated/ "file" - see + // MyBufferedIndexInput below. + public void testReadByte() throws Exception { + MyBufferedIndexInput input = new MyBufferedIndexInput(); + for (int i = 0; i < BufferedIndexInput.BUFFER_SIZE * 10; i++) { + assertEquals(input.readByte(), byten(i)); + } + } + + // Call readBytes() repeatedly, with various chunk sizes (from 1 byte to + // larger than the buffer size), and see that it returns the bytes we expect. + // Our input comes from a dynamically generated "file" - + // see MyBufferedIndexInput below. + public void testReadBytes() throws Exception { + MyBufferedIndexInput input = new MyBufferedIndexInput(); + runReadBytes(input, BufferedIndexInput.BUFFER_SIZE, random); + + // This tests the workaround code for LUCENE-1566 where readBytesInternal + // provides a workaround for a JVM Bug that incorrectly raises a OOM Error + // when a large byte buffer is passed to a file read. + // NOTE: this does only test the chunked reads and NOT if the Bug is triggered. + //final int tmpFileSize = 1024 * 1024 * 5; + final int inputBufferSize = 128; + File tmpInputFile = _TestUtil.createTempFile("IndexInput", "tmpFile", TEMP_DIR); + tmpInputFile.deleteOnExit(); + writeBytes(tmpInputFile, TEST_FILE_LENGTH); + + // run test with chunk size of 10 bytes + runReadBytesAndClose(new SimpleFSIndexInput(tmpInputFile, + inputBufferSize, 10), inputBufferSize, random); + + // run test with chunk size of 10 bytes + runReadBytesAndClose(new NIOFSIndexInput(tmpInputFile, + inputBufferSize, 10), inputBufferSize, random); + } + + private void runReadBytesAndClose(IndexInput input, int bufferSize, Random r) + throws IOException { + try { + runReadBytes(input, bufferSize, r); + } finally { + input.close(); + } + } + + private void runReadBytes(IndexInput input, int bufferSize, Random r) + throws IOException { + + int pos = 0; + // gradually increasing size: + for (int size = 1; size < bufferSize * 10; size = size + size / 200 + 1) { + checkReadBytes(input, size, pos); + pos += size; + if (pos >= TEST_FILE_LENGTH) { + // wrap + pos = 0; + input.seek(0L); + } + } + // wildly fluctuating size: + for (long i = 0; i < 100; i++) { + final int size = r.nextInt(10000); + checkReadBytes(input, 1+size, pos); + pos += 1+size; + if (pos >= TEST_FILE_LENGTH) { + // wrap + pos = 0; + input.seek(0L); + } + } + // constant small size (7 bytes): + for (int i = 0; i < bufferSize; i++) { + checkReadBytes(input, 7, pos); + pos += 7; + if (pos >= TEST_FILE_LENGTH) { + // wrap + pos = 0; + input.seek(0L); + } + } + } + + private byte[] buffer = new byte[10]; + + private void checkReadBytes(IndexInput input, int size, int pos) throws IOException{ + // Just to see that "offset" is treated properly in readBytes(), we + // add an arbitrary offset at the beginning of the array + int offset = size % 10; // arbitrary + buffer = ArrayUtil.grow(buffer, offset+size); + assertEquals(pos, input.getFilePointer()); + long left = TEST_FILE_LENGTH - input.getFilePointer(); + if (left <= 0) { + return; + } else if (left < size) { + size = (int) left; + } + input.readBytes(buffer, offset, size); + assertEquals(pos+size, input.getFilePointer()); + for(int i=0; i allIndexInputs = new ArrayList(); + + Random rand; + + private Directory dir; + + public MockFSDirectory(File path, Random rand) throws IOException { + this.rand = rand; + lockFactory = NoLockFactory.getNoLockFactory(); + dir = new SimpleFSDirectory(path, null); + } + + @Override + public IndexInput openInput(String name) throws IOException { + return openInput(name, BufferedIndexInput.BUFFER_SIZE); + } + + public void tweakBufferSizes() { + //int count = 0; + for (final IndexInput ip : allIndexInputs) { + BufferedIndexInput bii = (BufferedIndexInput) ip; + int bufferSize = 1024+Math.abs(rand.nextInt() % 32768); + bii.setBufferSize(bufferSize); + //count++; + } + //System.out.println("tweak'd " + count + " buffer sizes"); + } + + @Override + public IndexInput openInput(String name, int bufferSize) throws IOException { + // Make random changes to buffer size + bufferSize = 1+Math.abs(rand.nextInt() % 10); + IndexInput f = dir.openInput(name, bufferSize); + allIndexInputs.add(f); + return f; + } + + @Override + public IndexOutput createOutput(String name) throws IOException { + return dir.createOutput(name); + } + + @Override + public void close() throws IOException { + dir.close(); + } + + @Override + public void deleteFile(String name) + throws IOException + { + dir.deleteFile(name); + } + @Override + @Deprecated + /* @deprecated Lucene never uses this API; it will be + * removed in 4.0. */ + public void touchFile(String name) + throws IOException + { + dir.touchFile(name); + } + @Override + public long fileModified(String name) + throws IOException + { + return dir.fileModified(name); + } + @Override + public boolean fileExists(String name) + throws IOException + { + return dir.fileExists(name); + } + @Override + public String[] listAll() + throws IOException + { + return dir.listAll(); + } + + @Override + public long fileLength(String name) throws IOException { + return dir.fileLength(name); + } + + + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestCopyBytes.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestCopyBytes.java new file mode 100644 index 0000000..08f41e4 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestCopyBytes.java @@ -0,0 +1,107 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +import org.junit.Test; + +public class TestCopyBytes extends LuceneTestCase { + + private byte value(int idx) { + return (byte) ((idx%256) * (1+(idx/256))); + } + + + @Test + public void testCopyBytes() throws Exception { + int num = atLeast(10); + for(int iter=0;iter test2 + final IndexInput in = dir.openInput("test"); + + out = dir.createOutput("test2"); + + upto = 0; + while(upto < size) { + if (random.nextBoolean()) { + out.writeByte(in.readByte()); + upto++; + } else { + final int chunk = Math.min(_TestUtil.nextInt(random, 1, bytes.length), size-upto); + out.copyBytes(in, chunk); + upto += chunk; + } + } + assertEquals(size, upto); + out.close(); + in.close(); + + // verify + IndexInput in2 = dir.openInput("test2"); + upto = 0; + while(upto < size) { + if (random.nextBoolean()) { + final byte v = in2.readByte(); + assertEquals(value(upto), v); + upto++; + } else { + final int limit = Math.min(_TestUtil.nextInt(random, 1, bytes.length), size-upto); + in2.readBytes(bytes, 0, limit); + for(int byteIdx=0;byteIdx fileExtensions = new HashSet(); + fileExtensions.add(IndexFileNames.FIELDS_EXTENSION); + fileExtensions.add(IndexFileNames.FIELDS_INDEX_EXTENSION); + + MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); + primaryDir.setCheckIndexOnClose(false); // only part of an index + MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(random, new RAMDirectory()); + secondaryDir.setCheckIndexOnClose(false); // only part of an index + + FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true); + IndexWriter writer = new IndexWriter( + fsd, + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)). + setMergePolicy(newLogMergePolicy(false)) + ); + TestIndexWriterReader.createIndexNoClose(true, "ram", writer); + IndexReader reader = IndexReader.open(writer, true); + assertEquals(100, reader.maxDoc()); + writer.commit(); + // we should see only fdx,fdt files here + String[] files = primaryDir.listAll(); + assertTrue(files.length > 0); + for (int x=0; x < files.length; x++) { + String ext = FileSwitchDirectory.getExtension(files[x]); + assertTrue(fileExtensions.contains(ext)); + } + files = secondaryDir.listAll(); + assertTrue(files.length > 0); + // we should not see fdx,fdt files here + for (int x=0; x < files.length; x++) { + String ext = FileSwitchDirectory.getExtension(files[x]); + assertFalse(fileExtensions.contains(ext)); + } + reader.close(); + writer.close(); + + files = fsd.listAll(); + for(int i=0;i singleBuffers = new HashMap(); + @Override + protected byte[] newBuffer(int size) { + capacity += size; + if (capacity <= MAX_VALUE) { + // below maxint we reuse buffers + byte buf[] = singleBuffers.get(Integer.valueOf(size)); + if (buf==null) { + buf = new byte[size]; + //System.out.println("allocate: "+size); + singleBuffers.put(Integer.valueOf(size),buf); + } + return buf; + } + //System.out.println("allocate: "+size); System.out.flush(); + return new byte[size]; + } + } + + /** Test huge RAMFile with more than Integer.MAX_VALUE bytes. (LUCENE-957) */ + public void testHugeFile() throws IOException { + DenseRAMFile f = new DenseRAMFile(); + // output part + RAMOutputStream out = new RAMOutputStream(f); + byte b1[] = new byte[RAMOutputStream.BUFFER_SIZE]; + byte b2[] = new byte[RAMOutputStream.BUFFER_SIZE / 3]; + for (int i = 0; i < b1.length; i++) { + b1[i] = (byte) (i & 0x0007F); + } + for (int i = 0; i < b2.length; i++) { + b2[i] = (byte) (i & 0x0003F); + } + long n = 0; + assertEquals("output length must match",n,out.length()); + while (n <= MAX_VALUE - b1.length) { + out.writeBytes(b1,0,b1.length); + out.flush(); + n += b1.length; + assertEquals("output length must match",n,out.length()); + } + //System.out.println("after writing b1's, length = "+out.length()+" (MAX_VALUE="+MAX_VALUE+")"); + int m = b2.length; + long L = 12; + for (int j=0; j= 1); + + for(final String lockName : lf.locksCreated.keySet()) { + MockLockFactory.MockLock lock = (MockLockFactory.MockLock) lf.locksCreated.get(lockName); + assertTrue("# calls to Lock.obtain is 0 (after instantiating IndexWriter)", + lock.lockAttempts > 0); + } + + writer.close(); + } + + // Verify: we can use the NoLockFactory with RAMDirectory w/ no + // exceptions raised: + // Verify: NoLockFactory allows two IndexWriters + public void testRAMDirectoryNoLocking() throws IOException { + Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); + dir.setLockFactory(NoLockFactory.getNoLockFactory()); + + assertTrue("RAMDirectory.setLockFactory did not take", + NoLockFactory.class.isInstance(dir.getLockFactory())); + + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + writer.commit(); // required so the second open succeed + // Create a 2nd IndexWriter. This is normally not allowed but it should run through since we're not + // using any locks: + IndexWriter writer2 = null; + try { + writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + } catch (Exception e) { + e.printStackTrace(System.out); + fail("Should not have hit an IOException with no locking"); + } + + writer.close(); + if (writer2 != null) { + writer2.close(); + } + } + + // Verify: SingleInstanceLockFactory is the default lock for RAMDirectory + // Verify: RAMDirectory does basic locking correctly (can't create two IndexWriters) + public void testDefaultRAMDirectory() throws IOException { + Directory dir = new RAMDirectory(); + + assertTrue("RAMDirectory did not use correct LockFactory: got " + dir.getLockFactory(), + SingleInstanceLockFactory.class.isInstance(dir.getLockFactory())); + + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + + // Create a 2nd IndexWriter. This should fail: + IndexWriter writer2 = null; + try { + writer2 = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND)); + fail("Should have hit an IOException with two IndexWriters on default SingleInstanceLockFactory"); + } catch (IOException e) { + } + + writer.close(); + if (writer2 != null) { + writer2.close(); + } + } + + public void testSimpleFSLockFactory() throws IOException { + // test string file instantiation + new SimpleFSLockFactory("test"); + } + + // Verify: do stress test, by opening IndexReaders and + // IndexWriters over & over in 2 threads and making sure + // no unexpected exceptions are raised: + public void testStressLocks() throws Exception { + _testStressLocks(null, _TestUtil.getTempDir("index.TestLockFactory6")); + } + + // Verify: do stress test, by opening IndexReaders and + // IndexWriters over & over in 2 threads and making sure + // no unexpected exceptions are raised, but use + // NativeFSLockFactory: + public void testStressLocksNativeFSLockFactory() throws Exception { + File dir = _TestUtil.getTempDir("index.TestLockFactory7"); + _testStressLocks(new NativeFSLockFactory(dir), dir); + } + + public void _testStressLocks(LockFactory lockFactory, File indexDir) throws Exception { + Directory dir = newFSDirectory(indexDir, lockFactory); + + // First create a 1 doc index: + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + addDoc(w); + w.close(); + + WriterThread writer = new WriterThread(100, dir); + SearcherThread searcher = new SearcherThread(100, dir); + writer.start(); + searcher.start(); + + while(writer.isAlive() || searcher.isAlive()) { + Thread.sleep(1000); + } + + assertTrue("IndexWriter hit unexpected exceptions", !writer.hitException); + assertTrue("IndexSearcher hit unexpected exceptions", !searcher.hitException); + + dir.close(); + // Cleanup + _TestUtil.rmDir(indexDir); + } + + // Verify: NativeFSLockFactory works correctly + public void testNativeFSLockFactory() throws IOException { + + NativeFSLockFactory f = new NativeFSLockFactory(TEMP_DIR); + + f.setLockPrefix("test"); + Lock l = f.makeLock("commit"); + Lock l2 = f.makeLock("commit"); + + assertTrue("failed to obtain lock", l.obtain()); + assertTrue("succeeded in obtaining lock twice", !l2.obtain()); + l.release(); + + assertTrue("failed to obtain 2nd lock after first one was freed", l2.obtain()); + l2.release(); + + // Make sure we can obtain first one again, test isLocked(): + assertTrue("failed to obtain lock", l.obtain()); + assertTrue(l.isLocked()); + assertTrue(l2.isLocked()); + l.release(); + assertFalse(l.isLocked()); + assertFalse(l2.isLocked()); + } + + + // Verify: NativeFSLockFactory works correctly if the lock file exists + public void testNativeFSLockFactoryLockExists() throws IOException { + + File lockFile = new File(TEMP_DIR, "test.lock"); + lockFile.createNewFile(); + + Lock l = new NativeFSLockFactory(TEMP_DIR).makeLock("test.lock"); + assertTrue("failed to obtain lock", l.obtain()); + l.release(); + assertFalse("failed to release lock", l.isLocked()); + if (lockFile.exists()) { + lockFile.delete(); + } + } + + public void testNativeFSLockReleaseByOtherLock() throws IOException { + + NativeFSLockFactory f = new NativeFSLockFactory(TEMP_DIR); + + f.setLockPrefix("test"); + Lock l = f.makeLock("commit"); + Lock l2 = f.makeLock("commit"); + + assertTrue("failed to obtain lock", l.obtain()); + try { + assertTrue(l2.isLocked()); + l2.release(); + fail("should not have reached here. LockReleaseFailedException should have been thrown"); + } catch (LockReleaseFailedException e) { + // expected + } finally { + l.release(); + } + } + + // Verify: NativeFSLockFactory assigns null as lockPrefix if the lockDir is inside directory + public void testNativeFSLockFactoryPrefix() throws IOException { + File fdir1 = _TestUtil.getTempDir("TestLockFactory.8"); + File fdir2 = _TestUtil.getTempDir("TestLockFactory.8.Lockdir"); + Directory dir1 = newFSDirectory(fdir1, new NativeFSLockFactory(fdir1)); + // same directory, but locks are stored somewhere else. The prefix of the lock factory should != null + Directory dir2 = newFSDirectory(fdir1, new NativeFSLockFactory(fdir2)); + + String prefix1 = dir1.getLockFactory().getLockPrefix(); + assertNull("Lock prefix for lockDir same as directory should be null", prefix1); + + String prefix2 = dir2.getLockFactory().getLockPrefix(); + assertNotNull("Lock prefix for lockDir outside of directory should be not null", prefix2); + + dir1.close(); + dir2.close(); + + _TestUtil.rmDir(fdir1); + _TestUtil.rmDir(fdir2); + } + + // Verify: default LockFactory has no prefix (ie + // write.lock is stored in index): + public void testDefaultFSLockFactoryPrefix() throws IOException { + // Make sure we get null prefix, which wont happen if setLockFactory is ever called. + File dirName = _TestUtil.getTempDir("TestLockFactory.10"); + + Directory dir = new SimpleFSDirectory(dirName); + assertNull("Default lock prefix should be null", dir.getLockFactory().getLockPrefix()); + dir.close(); + + dir = new MMapDirectory(dirName); + assertNull("Default lock prefix should be null", dir.getLockFactory().getLockPrefix()); + dir.close(); + + dir = new NIOFSDirectory(dirName); + assertNull("Default lock prefix should be null", dir.getLockFactory().getLockPrefix()); + dir.close(); + + _TestUtil.rmDir(dirName); + } + + private class WriterThread extends Thread { + private Directory dir; + private int numIteration; + public boolean hitException = false; + public WriterThread(int numIteration, Directory dir) { + this.numIteration = numIteration; + this.dir = dir; + } + @Override + public void run() { + IndexWriter writer = null; + for(int i=0;i locksCreated = Collections.synchronizedMap(new HashMap()); + public int makeLockCount = 0; + + @Override + public void setLockPrefix(String lockPrefix) { + super.setLockPrefix(lockPrefix); + lockPrefixSet = true; + } + + @Override + synchronized public Lock makeLock(String lockName) { + Lock lock = new MockLock(); + locksCreated.put(lockName, lock); + makeLockCount++; + return lock; + } + + @Override + public void clearLock(String specificLockName) {} + + public class MockLock extends Lock { + public int lockAttempts; + + @Override + public boolean obtain() { + lockAttempts++; + return true; + } + @Override + public void release() { + // do nothing + } + @Override + public boolean isLocked() { + return false; + } + } + } + + private void addDoc(IndexWriter writer) throws IOException { + Document doc = new Document(); + doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestMultiMMap.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestMultiMMap.java new file mode 100644 index 0000000..7669bb1 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/store/TestMultiMMap.java @@ -0,0 +1,148 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Tests MMapDirectory's MultiMMapIndexInput + *

+ * Because Java's ByteBuffer uses an int to address the + * values, it's necessary to access a file > + * Integer.MAX_VALUE in size using multiple byte buffers. + */ +public class TestMultiMMap extends LuceneTestCase { + File workDir; + + @Override + public void setUp() throws Exception { + super.setUp(); + assumeTrue("test requires a jre that supports unmapping", MMapDirectory.UNMAP_SUPPORTED); + workDir = _TestUtil.getTempDir("TestMultiMMap"); + workDir.mkdirs(); + } + + public void testSeekZero() throws Exception { + for (int i = 0; i < 31; i++) { + MMapDirectory mmapDir = new MMapDirectory(_TestUtil.getTempDir("testSeekZero")); + mmapDir.setMaxChunkSize(1< currentSize); + if (currentSize > 0) { + copyCost += currentSize; + double copyCostPerElement = ((double) copyCost)/currentSize; + assertTrue("cost " + copyCostPerElement, copyCostPerElement < 10.0); + } + currentSize = nextSize; + } + } + + public void testMaxSize() { + // intentionally pass invalid elemSizes: + for(int elemSize=0;elemSize<10;elemSize++) { + assertEquals(Integer.MAX_VALUE, ArrayUtil.oversize(Integer.MAX_VALUE, elemSize)); + assertEquals(Integer.MAX_VALUE, ArrayUtil.oversize(Integer.MAX_VALUE-1, elemSize)); + } + } + + public void testInvalidElementSizes() { + int num = atLeast(10000); + for (int iter = 0; iter < num; iter++) { + final int minTargetSize = random.nextInt(Integer.MAX_VALUE); + final int elemSize = random.nextInt(11); + final int v = ArrayUtil.oversize(minTargetSize, elemSize); + assertTrue(v >= minTargetSize); + } + } + + public void testParseInt() throws Exception { + int test; + try { + test = ArrayUtil.parseInt("".toCharArray()); + assertTrue(false); + } catch (NumberFormatException e) { + //expected + } + try { + test = ArrayUtil.parseInt("foo".toCharArray()); + assertTrue(false); + } catch (NumberFormatException e) { + //expected + } + try { + test = ArrayUtil.parseInt(String.valueOf(Long.MAX_VALUE).toCharArray()); + assertTrue(false); + } catch (NumberFormatException e) { + //expected + } + try { + test = ArrayUtil.parseInt("0.34".toCharArray()); + assertTrue(false); + } catch (NumberFormatException e) { + //expected + } + + try { + test = ArrayUtil.parseInt("1".toCharArray()); + assertTrue(test + " does not equal: " + 1, test == 1); + test = ArrayUtil.parseInt("-10000".toCharArray()); + assertTrue(test + " does not equal: " + -10000, test == -10000); + test = ArrayUtil.parseInt("1923".toCharArray()); + assertTrue(test + " does not equal: " + 1923, test == 1923); + test = ArrayUtil.parseInt("-1".toCharArray()); + assertTrue(test + " does not equal: " + -1, test == -1); + test = ArrayUtil.parseInt("foo 1923 bar".toCharArray(), 4, 4); + assertTrue(test + " does not equal: " + 1923, test == 1923); + } catch (NumberFormatException e) { + e.printStackTrace(); + assertTrue(false); + } + + } + + + private Integer[] createRandomArray(int maxSize) { + final Integer[] a = new Integer[random.nextInt(maxSize) + 1]; + for (int i = 0; i < a.length; i++) { + a[i] = Integer.valueOf(random.nextInt(a.length)); + } + return a; + } + + public void testQuickSort() { + int num = atLeast(50); + for (int i = 0; i < num; i++) { + Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); + ArrayUtil.quickSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + + a1 = createRandomArray(1000); + a2 = a1.clone(); + ArrayUtil.quickSort(a1, Collections.reverseOrder()); + Arrays.sort(a2, Collections.reverseOrder()); + assertArrayEquals(a2, a1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + ArrayUtil.quickSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + } + } + + private Integer[] createSparseRandomArray(int maxSize) { + final Integer[] a = new Integer[random.nextInt(maxSize) + 1]; + for (int i = 0; i < a.length; i++) { + a[i] = Integer.valueOf(random.nextInt(2)); + } + return a; + } + + // This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases) + public void testQuickToMergeSortFallback() { + int num = atLeast(50); + for (int i = 0; i < num; i++) { + Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone(); + ArrayUtil.quickSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + } + } + + public void testMergeSort() { + int num = atLeast(50); + for (int i = 0; i < num; i++) { + Integer[] a1 = createRandomArray(1000), a2 = a1.clone(); + ArrayUtil.mergeSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + + a1 = createRandomArray(1000); + a2 = a1.clone(); + ArrayUtil.mergeSort(a1, Collections.reverseOrder()); + Arrays.sort(a2, Collections.reverseOrder()); + assertArrayEquals(a2, a1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + ArrayUtil.mergeSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + } + } + + public void testInsertionSort() { + for (int i = 0, c = atLeast(500); i < c; i++) { + Integer[] a1 = createRandomArray(30), a2 = a1.clone(); + ArrayUtil.insertionSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + + a1 = createRandomArray(30); + a2 = a1.clone(); + ArrayUtil.insertionSort(a1, Collections.reverseOrder()); + Arrays.sort(a2, Collections.reverseOrder()); + assertArrayEquals(a2, a1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + ArrayUtil.insertionSort(a1); + Arrays.sort(a2); + assertArrayEquals(a2, a1); + } + } + + static class Item implements Comparable { + final int val, order; + + Item(int val, int order) { + this.val = val; + this.order = order; + } + + public int compareTo(Item other) { + return this.order - other.order; + } + + @Override + public String toString() { + return Integer.toString(val); + } + } + + public void testMergeSortStability() { + Item[] items = new Item[100]; + for (int i = 0; i < items.length; i++) { + // half of the items have value but same order. The value of this items is sorted, + // so they should always be in order after sorting. + // The other half has defined order, but no (-1) value (they should appear after + // all above, when sorted). + final boolean equal = random.nextBoolean(); + items[i] = new Item(equal ? (i+1) : -1, equal ? 0 : (random.nextInt(1000)+1)); + } + + if (VERBOSE) System.out.println("Before: " + Arrays.toString(items)); + // if you replace this with ArrayUtil.quickSort(), test should fail: + ArrayUtil.mergeSort(items); + if (VERBOSE) System.out.println("Sorted: " + Arrays.toString(items)); + + Item last = items[0]; + for (int i = 1; i < items.length; i++) { + final Item act = items[i]; + if (act.order == 0) { + // order of "equal" items should be not mixed up + assertTrue(act.val > last.val); + } + assertTrue(act.order >= last.order); + last = act; + } + } + + // should produce no exceptions + public void testEmptyArraySort() { + Integer[] a = new Integer[0]; + ArrayUtil.quickSort(a); + ArrayUtil.mergeSort(a); + ArrayUtil.insertionSort(a); + ArrayUtil.quickSort(a, Collections.reverseOrder()); + ArrayUtil.mergeSort(a, Collections.reverseOrder()); + ArrayUtil.insertionSort(a, Collections.reverseOrder()); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java new file mode 100644 index 0000000..cd3c580 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java @@ -0,0 +1,256 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.*; + +import java.util.Iterator; +import java.util.HashMap; +import java.util.Map; + +public class TestAttributeSource extends LuceneTestCase { + + public void testCaptureState() { + // init a first instance + AttributeSource src = new AttributeSource(); + CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class); + TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class); + termAtt.append("TestTerm"); + typeAtt.setType("TestType"); + final int hashCode = src.hashCode(); + + AttributeSource.State state = src.captureState(); + + // modify the attributes + termAtt.setEmpty().append("AnotherTestTerm"); + typeAtt.setType("AnotherTestType"); + assertTrue("Hash code should be different", hashCode != src.hashCode()); + + src.restoreState(state); + assertEquals("TestTerm", termAtt.toString()); + assertEquals("TestType", typeAtt.type()); + assertEquals("Hash code should be equal after restore", hashCode, src.hashCode()); + + // restore into an exact configured copy + AttributeSource copy = new AttributeSource(); + copy.addAttribute(CharTermAttribute.class); + copy.addAttribute(TypeAttribute.class); + copy.restoreState(state); + assertEquals("Both AttributeSources should have same hashCode after restore", src.hashCode(), copy.hashCode()); + assertEquals("Both AttributeSources should be equal after restore", src, copy); + + // init a second instance (with attributes in different order and one additional attribute) + AttributeSource src2 = new AttributeSource(); + typeAtt = src2.addAttribute(TypeAttribute.class); + FlagsAttribute flagsAtt = src2.addAttribute(FlagsAttribute.class); + termAtt = src2.addAttribute(CharTermAttribute.class); + flagsAtt.setFlags(12345); + + src2.restoreState(state); + assertEquals("TestTerm", termAtt.toString()); + assertEquals("TestType", typeAtt.type()); + assertEquals("FlagsAttribute should not be touched", 12345, flagsAtt.getFlags()); + + // init a third instance missing one Attribute + AttributeSource src3 = new AttributeSource(); + termAtt = src3.addAttribute(CharTermAttribute.class); + try { + src3.restoreState(state); + fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + // pass + } + } + + public void testCloneAttributes() { + final AttributeSource src = new AttributeSource(); + final FlagsAttribute flagsAtt = src.addAttribute(FlagsAttribute.class); + final TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class); + flagsAtt.setFlags(1234); + typeAtt.setType("TestType"); + + final AttributeSource clone = src.cloneAttributes(); + final Iterator> it = clone.getAttributeClassesIterator(); + assertEquals("FlagsAttribute must be the first attribute", FlagsAttribute.class, it.next()); + assertEquals("TypeAttribute must be the second attribute", TypeAttribute.class, it.next()); + assertFalse("No more attributes", it.hasNext()); + + final FlagsAttribute flagsAtt2 = clone.getAttribute(FlagsAttribute.class); + final TypeAttribute typeAtt2 = clone.getAttribute(TypeAttribute.class); + assertNotSame("FlagsAttribute of original and clone must be different instances", flagsAtt2, flagsAtt); + assertNotSame("TypeAttribute of original and clone must be different instances", typeAtt2, typeAtt); + assertEquals("FlagsAttribute of original and clone must be equal", flagsAtt2, flagsAtt); + assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt); + + // test copy back + flagsAtt2.setFlags(4711); + typeAtt2.setType("OtherType"); + clone.copyTo(src); + assertEquals("FlagsAttribute of original must now contain updated term", 4711, flagsAtt.getFlags()); + assertEquals("TypeAttribute of original must now contain updated type", "OtherType", typeAtt.type()); + // verify again: + assertNotSame("FlagsAttribute of original and clone must be different instances", flagsAtt2, flagsAtt); + assertNotSame("TypeAttribute of original and clone must be different instances", typeAtt2, typeAtt); + assertEquals("FlagsAttribute of original and clone must be equal", flagsAtt2, flagsAtt); + assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt); + } + + public void testToStringAndMultiAttributeImplementations() { + AttributeSource src = new AttributeSource(); + CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class); + TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class); + termAtt.append("TestTerm"); + typeAtt.setType("TestType"); + assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString()); + Iterator it = src.getAttributeImplsIterator(); + assertTrue("Iterator should have 2 attributes left", it.hasNext()); + assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next()); + assertTrue("Iterator should have 1 attributes left", it.hasNext()); + assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next()); + assertFalse("Iterator should have 0 attributes left", it.hasNext()); + + src = new AttributeSource(); + src.addAttributeImpl(new Token()); + // this should not add a new attribute as Token implements CharTermAttribute, too + termAtt = src.addAttribute(CharTermAttribute.class); + assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token); + // get the Token attribute and check, that it is the only one + it = src.getAttributeImplsIterator(); + Token tok = (Token) it.next(); + assertFalse("There should be only one attribute implementation instance", it.hasNext()); + + termAtt.setEmpty().append("TestTerm"); + assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString()); + } + + public void testDefaultAttributeFactory() throws Exception { + AttributeSource src = new AttributeSource(); + + assertTrue("CharTermAttribute is not implemented by CharTermAttributeImpl", + src.addAttribute(CharTermAttribute.class) instanceof CharTermAttributeImpl); + assertTrue("OffsetAttribute is not implemented by OffsetAttributeImpl", + src.addAttribute(OffsetAttribute.class) instanceof OffsetAttributeImpl); + assertTrue("FlagsAttribute is not implemented by FlagsAttributeImpl", + src.addAttribute(FlagsAttribute.class) instanceof FlagsAttributeImpl); + assertTrue("PayloadAttribute is not implemented by PayloadAttributeImpl", + src.addAttribute(PayloadAttribute.class) instanceof PayloadAttributeImpl); + assertTrue("PositionIncrementAttribute is not implemented by PositionIncrementAttributeImpl", + src.addAttribute(PositionIncrementAttribute.class) instanceof PositionIncrementAttributeImpl); + assertTrue("TypeAttribute is not implemented by TypeAttributeImpl", + src.addAttribute(TypeAttribute.class) instanceof TypeAttributeImpl); + } + + @SuppressWarnings("unchecked") + public void testInvalidArguments() throws Exception { + try { + AttributeSource src = new AttributeSource(); + src.addAttribute(Token.class); + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) {} + + try { + AttributeSource src = new AttributeSource(Token.TOKEN_ATTRIBUTE_FACTORY); + src.addAttribute(Token.class); + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) {} + + try { + AttributeSource src = new AttributeSource(); + // break this by unsafe cast + src.addAttribute((Class) Iterator.class); + fail("Should throw IllegalArgumentException"); + } catch (IllegalArgumentException iae) {} + } + + public void testLUCENE_3042() throws Exception { + final AttributeSource src1 = new AttributeSource(); + src1.addAttribute(CharTermAttribute.class).append("foo"); + int hash1 = src1.hashCode(); // this triggers a cached state + final AttributeSource src2 = new AttributeSource(src1); + src2.addAttribute(TypeAttribute.class).setType("bar"); + assertTrue("The hashCode is identical, so the captured state was preserved.", hash1 != src1.hashCode()); + assertEquals(src2.hashCode(), src1.hashCode()); + } + + // this class is included in external class check, so no assertion errors occur + @Deprecated + static class TestAttributeImpl extends AttributeImpl implements FlagsAttribute { + + private int flags = 0; + + public int getFlags() { return flags; } + public void setFlags(int flags) { this.flags = flags; } + + @Override + public void clear() { flags = 0; } + + @Override + public void copyTo(AttributeImpl target) { + FlagsAttribute t = (FlagsAttribute) target; + t.setFlags(flags); + } + + @Override + public String toString() { + return "foo=bar,moo=mae"; + } + + } + + // this class is excluded in external class check, so assertion on calling reflectWith should occur + @Deprecated + static class TestAttributeImpl2 extends TestAttributeImpl {} + + @Deprecated + public void testReflectionOfToString() throws Exception { + final AttributeSource src = new AttributeSource(); + final AttributeImpl att = new TestAttributeImpl(); + src.addAttributeImpl(att); + + assertSame("FlagsAttribute is not implemented by same instance of TestAttributeImpl", + att, src.addAttribute(FlagsAttribute.class)); + + final Map map = new HashMap(); + final AttributeReflector reflector = new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + assertSame(FlagsAttribute.class, attClass); + map.put(key, value); + } + }; + att.reflectWith(reflector); + assertEquals(2, map.size()); + assertEquals("bar", map.get("foo")); + assertEquals("mae", map.get("moo")); + + map.clear(); + src.reflectWith(reflector); + assertEquals(2, map.size()); + assertEquals("bar", map.get("foo")); + assertEquals("mae", map.get("moo")); + + map.clear(); + try { + new TestAttributeImpl2().reflectWith(reflector); + fail("TestAttributeImpl2 should fail assertion on toString() parsing"); + } catch (AssertionError e) { + // pass + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestBitUtil.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestBitUtil.java new file mode 100644 index 0000000..67e1b15 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestBitUtil.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +public class TestBitUtil extends LuceneTestCase { + + private static int slowNlz(long x) { + if (x == 0L) return 64; + int nlz = 0; + while ( ((x << nlz) & (1L << 63)) == 0) { + nlz++; + } + return nlz; + } + + private void checkNlz(long x) { + assertEquals(slowNlz(x), BitUtil.nlz(x)); + assertEquals(Long.numberOfLeadingZeros(x), BitUtil.nlz(x)); + } + + public void testNlz() { + checkNlz(0L); + checkNlz(1L); + checkNlz(-1L); + for (int i = 1; i <= 63; i++) { + checkNlz(1L << i); + checkNlz((1L << i) + (1L << (i>>1))); + } + } + + public void testBitUtils() { + long num = 100000; + assertEquals( 5, BitUtil.ntz(num) ); + assertEquals( 5, BitUtil.ntz2(num) ); + assertEquals( 5, BitUtil.ntz3(num) ); + + num = 10; + assertEquals( 1, BitUtil.ntz(num) ); + assertEquals( 1, BitUtil.ntz2(num) ); + assertEquals( 1, BitUtil.ntz3(num) ); + + for (int i=0; i<64; i++) { + num = 1L << i; + assertEquals( i, BitUtil.ntz(num) ); + assertEquals( i, BitUtil.ntz2(num) ); + assertEquals( i, BitUtil.ntz3(num) ); + } + } + + + private long testArg(int shift) { + return (1L << shift) + (1L << (shift>>1)); + } + + private long nlzBitUtilBasicLoop(int iters) { + long sumRes = 0; + while (iters-- >= 0) { + for (int i = 1; i <= 63; i++) { + long a = testArg(i); + sumRes += BitUtil.nlz(a); + sumRes += BitUtil.nlz(a+1); + sumRes += BitUtil.nlz(a-1); + sumRes += BitUtil.nlz(a+10); + sumRes += BitUtil.nlz(a-10); + } + } + return sumRes; + } + + private long nlzLongBasicLoop(int iters) { + long sumRes = 0; + while (iters-- >= 0) { + for (int i = 1; i <= 63; i++) { + long a = testArg(i); + sumRes += Long.numberOfLeadingZeros(a); + sumRes += Long.numberOfLeadingZeros(a+1); + sumRes += Long.numberOfLeadingZeros(a-1); + sumRes += Long.numberOfLeadingZeros(a+10); + sumRes += Long.numberOfLeadingZeros(a-10); + } + } + return sumRes; + } + + public void tstPerfNlz() { // See LUCENE-3197, prefer to use Long.numberOfLeadingZeros() over BitUtil.nlz(). + final long measureMilliSecs = 2000; + final int basicIters = 100000; + long startTime; + long endTime; + long curTime; + long dummy = 0; // avoid optimizing away + + dummy = 0; + int bitUtilLoops = 0; + startTime = System.currentTimeMillis(); + endTime = startTime + measureMilliSecs; + do { + dummy += nlzBitUtilBasicLoop(basicIters); + bitUtilLoops++; + curTime = System.currentTimeMillis(); + } while (curTime < endTime); + int bitUtilPsTime = (int) (1000000000 * (curTime - startTime) / (basicIters * 5 * 63 * (float) bitUtilLoops)); + System.out.println("BitUtil nlz time: " + (bitUtilPsTime/1) + " picosec/call, dummy: " + dummy); + + + dummy = 0; + int longLoops = 0; + startTime = System.currentTimeMillis(); + endTime = startTime + measureMilliSecs; + do { + dummy += nlzLongBasicLoop(basicIters); + longLoops++; + curTime = System.currentTimeMillis(); + } while (curTime < endTime); + int longPsTime = (int) (1000000000 * (curTime - startTime) / (basicIters * 5 * 63 * (float) longLoops)); + System.out.println("Long nlz time: " + longPsTime + " picosec/call, dummy: " + dummy); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestBitVector.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestBitVector.java new file mode 100644 index 0000000..9b486e8 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestBitVector.java @@ -0,0 +1,215 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; + +/** + * TestBitVector tests the BitVector, obviously. + */ +public class TestBitVector extends LuceneTestCase +{ + + /** + * Test the default constructor on BitVectors of various sizes. + * @throws Exception + */ + public void testConstructSize() throws Exception { + doTestConstructOfSize(8); + doTestConstructOfSize(20); + doTestConstructOfSize(100); + doTestConstructOfSize(1000); + } + + private void doTestConstructOfSize(int n) { + BitVector bv = new BitVector(n); + assertEquals(n,bv.size()); + } + + /** + * Test the get() and set() methods on BitVectors of various sizes. + * @throws Exception + */ + public void testGetSet() throws Exception { + doTestGetSetVectorOfSize(8); + doTestGetSetVectorOfSize(20); + doTestGetSetVectorOfSize(100); + doTestGetSetVectorOfSize(1000); + } + + private void doTestGetSetVectorOfSize(int n) { + BitVector bv = new BitVector(n); + for(int i=0;i=count1; i--) { + BitVector bv2 = new BitVector(d, "TESTBV"); + assertTrue(doCompare(bv,bv2)); + bv = bv2; + bv.clear(i); + assertEquals(i,bv.count()); + bv.write(d, "TESTBV"); + } + } + /** + * Compare two BitVectors. + * This should really be an equals method on the BitVector itself. + * @param bv One bit vector + * @param compare The second to compare + */ + private boolean doCompare(BitVector bv, BitVector compare) { + boolean equal = true; + for(int i=0;i= 2"); + } catch (IllegalArgumentException e) { + } + } + + @Test + public void testFillNoHighSurrogate() throws IOException { + Version[] versions = new Version[] { Version.LUCENE_30, TEST_VERSION_CURRENT }; + for (Version version : versions) { + CharacterUtils instance = CharacterUtils.getInstance(version); + Reader reader = new StringReader("helloworld"); + CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6); + assertTrue(instance.fill(buffer,reader)); + assertEquals(0, buffer.getOffset()); + assertEquals(6, buffer.getLength()); + assertEquals("hellow", new String(buffer.getBuffer())); + assertTrue(instance.fill(buffer,reader)); + assertEquals(4, buffer.getLength()); + assertEquals(0, buffer.getOffset()); + + assertEquals("orld", new String(buffer.getBuffer(), buffer.getOffset(), + buffer.getLength())); + assertFalse(instance.fill(buffer,reader)); + } + } + + @Test + public void testFillJava15() throws IOException { + String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801"; + CharacterUtils instance = CharacterUtils.getInstance(TEST_VERSION_CURRENT); + Reader reader = new StringReader(input); + CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5); + assertTrue(instance.fill(buffer, reader)); + assertEquals(4, buffer.getLength()); + assertEquals("1234", new String(buffer.getBuffer(), buffer.getOffset(), + buffer.getLength())); + assertTrue(instance.fill(buffer, reader)); + assertEquals(5, buffer.getLength()); + assertEquals("\ud801\udc1c789", new String(buffer.getBuffer())); + assertTrue(instance.fill(buffer, reader)); + assertEquals(4, buffer.getLength()); + assertEquals("123\ud801", new String(buffer.getBuffer(), + buffer.getOffset(), buffer.getLength())); + assertTrue(instance.fill(buffer, reader)); + assertEquals(2, buffer.getLength()); + assertEquals("\ud801\udc1c", new String(buffer.getBuffer(), buffer + .getOffset(), buffer.getLength())); + assertTrue(instance.fill(buffer, reader)); + assertEquals(1, buffer.getLength()); + assertEquals("\ud801", new String(buffer.getBuffer(), buffer + .getOffset(), buffer.getLength())); + assertFalse(instance.fill(buffer, reader)); + } + + @Test + public void testFillJava14() throws IOException { + String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801"; + CharacterUtils instance = CharacterUtils.getInstance(Version.LUCENE_30); + Reader reader = new StringReader(input); + CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5); + assertTrue(instance.fill(buffer, reader)); + assertEquals(5, buffer.getLength()); + assertEquals("1234\ud801", new String(buffer.getBuffer(), buffer + .getOffset(), buffer.getLength())); + assertTrue(instance.fill(buffer, reader)); + assertEquals(5, buffer.getLength()); + assertEquals("\udc1c7891", new String(buffer.getBuffer())); + buffer = CharacterUtils.newCharacterBuffer(6); + assertTrue(instance.fill(buffer, reader)); + assertEquals(6, buffer.getLength()); + assertEquals("23\ud801\ud801\udc1c\ud801", new String(buffer.getBuffer(), buffer + .getOffset(), buffer.getLength())); + assertFalse(instance.fill(buffer, reader)); + + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestCloseableThreadLocal.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestCloseableThreadLocal.java new file mode 100644 index 0000000..9b70810 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestCloseableThreadLocal.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +public class TestCloseableThreadLocal extends LuceneTestCase { + public static final String TEST_VALUE = "initvaluetest"; + + public void testInitValue() { + InitValueThreadLocal tl = new InitValueThreadLocal(); + String str = (String)tl.get(); + assertEquals(TEST_VALUE, str); + } + + public void testNullValue() throws Exception { + // Tests that null can be set as a valid value (LUCENE-1805). This + // previously failed in get(). + CloseableThreadLocal ctl = new CloseableThreadLocal(); + ctl.set(null); + assertNull(ctl.get()); + } + + public void testDefaultValueWithoutSetting() throws Exception { + // LUCENE-1805: make sure default get returns null, + // twice in a row + CloseableThreadLocal ctl = new CloseableThreadLocal(); + assertNull(ctl.get()); + } + + public class InitValueThreadLocal extends CloseableThreadLocal { + @Override + protected Object initialValue() { + return TEST_VALUE; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestCollectionUtil.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestCollectionUtil.java new file mode 100644 index 0000000..8392c9a --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestCollectionUtil.java @@ -0,0 +1,125 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; + +public class TestCollectionUtil extends LuceneTestCase { + + private List createRandomList(int maxSize) { + final Integer[] a = new Integer[random.nextInt(maxSize) + 1]; + for (int i = 0; i < a.length; i++) { + a[i] = Integer.valueOf(random.nextInt(a.length)); + } + return Arrays.asList(a); + } + + public void testQuickSort() { + for (int i = 0, c = atLeast(500); i < c; i++) { + List list1 = createRandomList(1000), list2 = new ArrayList(list1); + CollectionUtil.quickSort(list1); + Collections.sort(list2); + assertEquals(list2, list1); + + list1 = createRandomList(1000); + list2 = new ArrayList(list1); + CollectionUtil.quickSort(list1, Collections.reverseOrder()); + Collections.sort(list2, Collections.reverseOrder()); + assertEquals(list2, list1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + CollectionUtil.quickSort(list1); + Collections.sort(list2); + assertEquals(list2, list1); + } + } + + public void testMergeSort() { + for (int i = 0, c = atLeast(500); i < c; i++) { + List list1 = createRandomList(1000), list2 = new ArrayList(list1); + CollectionUtil.mergeSort(list1); + Collections.sort(list2); + assertEquals(list2, list1); + + list1 = createRandomList(1000); + list2 = new ArrayList(list1); + CollectionUtil.mergeSort(list1, Collections.reverseOrder()); + Collections.sort(list2, Collections.reverseOrder()); + assertEquals(list2, list1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + CollectionUtil.mergeSort(list1); + Collections.sort(list2); + assertEquals(list2, list1); + } + } + + public void testInsertionSort() { + for (int i = 0, c = atLeast(500); i < c; i++) { + List list1 = createRandomList(30), list2 = new ArrayList(list1); + CollectionUtil.insertionSort(list1); + Collections.sort(list2); + assertEquals(list2, list1); + + list1 = createRandomList(30); + list2 = new ArrayList(list1); + CollectionUtil.insertionSort(list1, Collections.reverseOrder()); + Collections.sort(list2, Collections.reverseOrder()); + assertEquals(list2, list1); + // reverse back, so we can test that completely backwards sorted array (worst case) is working: + CollectionUtil.insertionSort(list1); + Collections.sort(list2); + assertEquals(list2, list1); + } + } + + public void testEmptyListSort() { + // should produce no exceptions + List list = Arrays.asList(new Integer[0]); + CollectionUtil.quickSort(list); + CollectionUtil.mergeSort(list); + CollectionUtil.insertionSort(list); + CollectionUtil.quickSort(list, Collections.reverseOrder()); + CollectionUtil.mergeSort(list, Collections.reverseOrder()); + CollectionUtil.insertionSort(list, Collections.reverseOrder()); + + // check that empty non-random access lists pass sorting without ex (as sorting is not needed) + list = new LinkedList(); + CollectionUtil.quickSort(list); + CollectionUtil.mergeSort(list); + CollectionUtil.insertionSort(list); + CollectionUtil.quickSort(list, Collections.reverseOrder()); + CollectionUtil.mergeSort(list, Collections.reverseOrder()); + CollectionUtil.insertionSort(list, Collections.reverseOrder()); + } + + public void testOneElementListSort() { + // check that one-element non-random access lists pass sorting without ex (as sorting is not needed) + List list = new LinkedList(); + list.add(1); + CollectionUtil.quickSort(list); + CollectionUtil.mergeSort(list); + CollectionUtil.insertionSort(list); + CollectionUtil.quickSort(list, Collections.reverseOrder()); + CollectionUtil.mergeSort(list, Collections.reverseOrder()); + CollectionUtil.insertionSort(list, Collections.reverseOrder()); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java new file mode 100644 index 0000000..952c218 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java @@ -0,0 +1,188 @@ +package org.apache.lucene.util; + +/** +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +import org.apache.lucene.util.LuceneTestCase; + +public class TestDoubleBarrelLRUCache extends LuceneTestCase { + + private void testCache(DoubleBarrelLRUCache cache, int n) throws Exception { + Object dummy = new Object(); + + for (int i = 0; i < n; i++) { + cache.put(new CloneableInteger(i), dummy); + } + + // access every 2nd item in cache + for (int i = 0; i < n; i+=2) { + assertNotNull(cache.get(new CloneableInteger(i))); + } + + // add n/2 elements to cache, the ones that weren't + // touched in the previous loop should now be thrown away + for (int i = n; i < n + (n / 2); i++) { + cache.put(new CloneableInteger(i), dummy); + } + + // access every 4th item in cache + for (int i = 0; i < n; i+=4) { + assertNotNull(cache.get(new CloneableInteger(i))); + } + + // add 3/4n elements to cache, the ones that weren't + // touched in the previous loops should now be thrown away + for (int i = n; i < n + (n * 3 / 4); i++) { + cache.put(new CloneableInteger(i), dummy); + } + + // access every 4th item in cache + for (int i = 0; i < n; i+=4) { + assertNotNull(cache.get(new CloneableInteger(i))); + } + } + + public void testLRUCache() throws Exception { + final int n = 100; + testCache(new DoubleBarrelLRUCache(n), n); + } + + private class CacheThread extends Thread { + private final CloneableObject[] objs; + private final DoubleBarrelLRUCache c; + private final long endTime; + volatile boolean failed; + + public CacheThread(DoubleBarrelLRUCache c, + CloneableObject[] objs, long endTime) { + this.c = c; + this.objs = objs; + this.endTime = endTime; + } + + @Override + public void run() { + try { + long count = 0; + long miss = 0; + long hit = 0; + final int limit = objs.length; + + while(true) { + final CloneableObject obj = objs[(int) ((count/2) % limit)]; + Object v = c.get(obj); + if (v == null) { + c.put(new CloneableObject(obj), obj); + miss++; + } else { + assert obj == v; + hit++; + } + if ((++count % 10000) == 0) { + if (System.currentTimeMillis() >= endTime) { + break; + } + } + } + + addResults(miss, hit); + } catch (Throwable t) { + failed = true; + throw new RuntimeException(t); + } + } + } + + long totMiss, totHit; + void addResults(long miss, long hit) { + totMiss += miss; + totHit += hit; + } + + public void testThreadCorrectness() throws Exception { + final int NUM_THREADS = 4; + final int CACHE_SIZE = 512; + final int OBJ_COUNT = 3*CACHE_SIZE; + + DoubleBarrelLRUCache c = new DoubleBarrelLRUCache(1024); + + CloneableObject[] objs = new CloneableObject[OBJ_COUNT]; + for(int i=0;i 0 ? 1 : 0; + + IndexableBinaryStringTools.encode(originalBuf1, encodedBuf1); + IndexableBinaryStringTools.encode(originalBuf2, encodedBuf2); + + int encodedComparison = encodedBuf1.compareTo(encodedBuf2); + encodedComparison = encodedComparison < 0 ? -1 : encodedComparison > 0 ? 1 : 0; + + assertEquals("Test #" + (testNum + 1) + + ": Original bytes and encoded chars compare differently:" + + System.getProperty("line.separator") + + " binary 1: " + binaryDumpNIO(originalBuf1) + + System.getProperty("line.separator") + + " binary 2: " + binaryDumpNIO(originalBuf2) + + System.getProperty("line.separator") + + "encoded 1: " + charArrayDumpNIO(encodedBuf1) + + System.getProperty("line.separator") + + "encoded 2: " + charArrayDumpNIO(encodedBuf2) + + System.getProperty("line.separator"), + originalComparison, encodedComparison); + } + } + + public void testEncodedSortability() { + byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH]; + char[] originalString1 = new char[MAX_RANDOM_BINARY_LENGTH]; + char[] encoded1 = new char[MAX_RANDOM_BINARY_LENGTH * 10]; + byte[] original2 = new byte[MAX_RANDOM_BINARY_LENGTH]; + char[] originalString2 = new char[MAX_RANDOM_BINARY_LENGTH]; + char[] encoded2 = new char[MAX_RANDOM_BINARY_LENGTH * 10]; + + for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) { + int numBytes1 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1 + + for (int byteNum = 0; byteNum < numBytes1; ++byteNum) { + int randomInt = random.nextInt(0x100); + originalArray1[byteNum] = (byte) randomInt; + originalString1[byteNum] = (char) randomInt; + } + + int numBytes2 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1 + + for (int byteNum = 0; byteNum < numBytes2; ++byteNum) { + int randomInt = random.nextInt(0x100); + original2[byteNum] = (byte) randomInt; + originalString2[byteNum] = (char) randomInt; + } + int originalComparison = new String(originalString1, 0, numBytes1) + .compareTo(new String(originalString2, 0, numBytes2)); + originalComparison = originalComparison < 0 ? -1 + : originalComparison > 0 ? 1 : 0; + + int encodedLen1 = IndexableBinaryStringTools.getEncodedLength( + originalArray1, 0, numBytes1); + if (encodedLen1 > encoded1.length) + encoded1 = new char[ArrayUtil.oversize(encodedLen1, RamUsageEstimator.NUM_BYTES_CHAR)]; + IndexableBinaryStringTools.encode(originalArray1, 0, numBytes1, encoded1, + 0, encodedLen1); + + int encodedLen2 = IndexableBinaryStringTools.getEncodedLength(original2, + 0, numBytes2); + if (encodedLen2 > encoded2.length) + encoded2 = new char[ArrayUtil.oversize(encodedLen2, RamUsageEstimator.NUM_BYTES_CHAR)]; + IndexableBinaryStringTools.encode(original2, 0, numBytes2, encoded2, 0, + encodedLen2); + + int encodedComparison = new String(encoded1, 0, encodedLen1) + .compareTo(new String(encoded2, 0, encodedLen2)); + encodedComparison = encodedComparison < 0 ? -1 + : encodedComparison > 0 ? 1 : 0; + + assertEquals("Test #" + (testNum + 1) + + ": Original bytes and encoded chars compare differently:" + + System.getProperty("line.separator") + " binary 1: " + + binaryDump(originalArray1, numBytes1) + + System.getProperty("line.separator") + " binary 2: " + + binaryDump(original2, numBytes2) + + System.getProperty("line.separator") + "encoded 1: " + + charArrayDump(encoded1, encodedLen1) + + System.getProperty("line.separator") + "encoded 2: " + + charArrayDump(encoded2, encodedLen2) + + System.getProperty("line.separator"), originalComparison, + encodedComparison); + } + } + + /** @deprecated remove this test for Lucene 4.0 */ + @Deprecated + public void testEmptyInputNIO() { + byte[] binary = new byte[0]; + CharBuffer encoded = IndexableBinaryStringTools.encode(ByteBuffer.wrap(binary)); + ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded); + assertNotNull("decode() returned null", decoded); + assertEquals("decoded empty input was not empty", decoded.limit(), 0); + } + + public void testEmptyInput() { + byte[] binary = new byte[0]; + + int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0, + binary.length); + char[] encoded = new char[encodedLen]; + IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0, + encoded.length); + + int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0, + encoded.length); + byte[] decoded = new byte[decodedLen]; + IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0, + decoded.length); + + assertEquals("decoded empty input was not empty", decoded.length, 0); + } + + /** @deprecated remove this test for Lucene 4.0 */ + @Deprecated + public void testAllNullInputNIO() { + byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + ByteBuffer binaryBuf = ByteBuffer.wrap(binary); + CharBuffer encoded = IndexableBinaryStringTools.encode(binaryBuf); + assertNotNull("encode() returned null", encoded); + ByteBuffer decodedBuf = IndexableBinaryStringTools.decode(encoded); + assertNotNull("decode() returned null", decodedBuf); + assertEquals("Round trip decode/decode returned different results:" + + System.getProperty("line.separator") + + " original: " + binaryDumpNIO(binaryBuf) + + System.getProperty("line.separator") + + "decodedBuf: " + binaryDumpNIO(decodedBuf), + binaryBuf, decodedBuf); + } + + public void testAllNullInput() { + byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0, + binary.length); + char encoded[] = new char[encodedLen]; + IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0, + encoded.length); + + int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0, + encoded.length); + byte[] decoded = new byte[decodedLen]; + IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0, + decoded.length); + + assertEquals("Round trip decode/decode returned different results:" + + System.getProperty("line.separator") + " original: " + + binaryDump(binary, binary.length) + + System.getProperty("line.separator") + "decodedBuf: " + + binaryDump(decoded, decoded.length), + binaryDump(binary, binary.length), binaryDump(decoded, decoded.length)); + } + + /** @deprecated remove this test for Lucene 4.0 */ + @Deprecated + public void testRandomBinaryRoundTripNIO() { + byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH]; + ByteBuffer binaryBuf = ByteBuffer.wrap(binary); + char[] encoded = new char[IndexableBinaryStringTools.getEncodedLength(binaryBuf)]; + CharBuffer encodedBuf = CharBuffer.wrap(encoded); + byte[] decoded = new byte[MAX_RANDOM_BINARY_LENGTH]; + ByteBuffer decodedBuf = ByteBuffer.wrap(decoded); + for (int testNum = 0 ; testNum < NUM_RANDOM_TESTS ; ++testNum) { + int numBytes = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1 ; // Min == 1 + binaryBuf.limit(numBytes); + for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) { + binary[byteNum] = (byte)random.nextInt(0x100); + } + IndexableBinaryStringTools.encode(binaryBuf, encodedBuf); + IndexableBinaryStringTools.decode(encodedBuf, decodedBuf); + assertEquals("Test #" + (testNum + 1) + + ": Round trip decode/decode returned different results:" + + System.getProperty("line.separator") + + " original: " + binaryDumpNIO(binaryBuf) + + System.getProperty("line.separator") + + "encodedBuf: " + charArrayDumpNIO(encodedBuf) + + System.getProperty("line.separator") + + "decodedBuf: " + binaryDumpNIO(decodedBuf), + binaryBuf, decodedBuf); + } + } + + public void testRandomBinaryRoundTrip() { + byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH]; + char[] encoded = new char[MAX_RANDOM_BINARY_LENGTH * 10]; + byte[] decoded = new byte[MAX_RANDOM_BINARY_LENGTH]; + for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) { + int numBytes = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1 + + for (int byteNum = 0; byteNum < numBytes; ++byteNum) { + binary[byteNum] = (byte) random.nextInt(0x100); + } + + int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0, + numBytes); + if (encoded.length < encodedLen) + encoded = new char[ArrayUtil.oversize(encodedLen, RamUsageEstimator.NUM_BYTES_CHAR)]; + IndexableBinaryStringTools.encode(binary, 0, numBytes, encoded, 0, + encodedLen); + + int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0, + encodedLen); + IndexableBinaryStringTools.decode(encoded, 0, encodedLen, decoded, 0, + decodedLen); + + assertEquals("Test #" + (testNum + 1) + + ": Round trip decode/decode returned different results:" + + System.getProperty("line.separator") + " original: " + + binaryDump(binary, numBytes) + System.getProperty("line.separator") + + "encodedBuf: " + charArrayDump(encoded, encodedLen) + + System.getProperty("line.separator") + "decodedBuf: " + + binaryDump(decoded, decodedLen), binaryDump(binary, numBytes), + binaryDump(decoded, decodedLen)); + } + } + + /** @deprecated remove this method for Lucene 4.0 */ + @Deprecated + public String binaryDumpNIO(ByteBuffer binaryBuf) { + return binaryDump(binaryBuf.array(), + binaryBuf.limit() - binaryBuf.arrayOffset()); + } + + public String binaryDump(byte[] binary, int numBytes) { + StringBuilder buf = new StringBuilder(); + for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) { + String hex = Integer.toHexString(binary[byteNum] & 0xFF); + if (hex.length() == 1) { + buf.append('0'); + } + buf.append(hex.toUpperCase()); + if (byteNum < numBytes - 1) { + buf.append(' '); + } + } + return buf.toString(); + } + /** @deprecated remove this method for Lucene 4.0 */ + @Deprecated + public String charArrayDumpNIO(CharBuffer charBuf) { + return charArrayDump(charBuf.array(), + charBuf.limit() - charBuf.arrayOffset()); + } + + public String charArrayDump(char[] charArray, int numBytes) { + StringBuilder buf = new StringBuilder(); + for (int charNum = 0 ; charNum < numBytes ; ++charNum) { + String hex = Integer.toHexString(charArray[charNum]); + for (int digit = 0 ; digit < 4 - hex.length() ; ++digit) { + buf.append('0'); + } + buf.append(hex.toUpperCase()); + if (charNum < numBytes - 1) { + buf.append(' '); + } + } + return buf.toString(); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestNumericUtils.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestNumericUtils.java new file mode 100644 index 0000000..8271d9d --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestNumericUtils.java @@ -0,0 +1,521 @@ +package org.apache.lucene.util; + +/** +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.Random; + +public class TestNumericUtils extends LuceneTestCase { + + public void testLongConversionAndOrdering() throws Exception { + // generate a series of encoded longs, each numerical one bigger than the one before + String last=null; + for (long l=-100000L; l<100000L; l++) { + String act=NumericUtils.longToPrefixCoded(l); + if (last!=null) { + // test if smaller + assertTrue("actual bigger than last", last.compareTo(act) < 0 ); + } + // test is back and forward conversion works + assertEquals("forward and back conversion should generate same long", l, NumericUtils.prefixCodedToLong(act)); + // next step + last=act; + } + } + + public void testIntConversionAndOrdering() throws Exception { + // generate a series of encoded ints, each numerical one bigger than the one before + String last=null; + for (int i=-100000; i<100000; i++) { + String act=NumericUtils.intToPrefixCoded(i); + if (last!=null) { + // test if smaller + assertTrue("actual bigger than last", last.compareTo(act) < 0 ); + } + // test is back and forward conversion works + assertEquals("forward and back conversion should generate same int", i, NumericUtils.prefixCodedToInt(act)); + // next step + last=act; + } + } + + public void testLongSpecialValues() throws Exception { + long[] vals=new long[]{ + Long.MIN_VALUE, Long.MIN_VALUE+1, Long.MIN_VALUE+2, -5003400000000L, + -4000L, -3000L, -2000L, -1000L, -1L, 0L, 1L, 10L, 300L, 50006789999999999L, Long.MAX_VALUE-2, Long.MAX_VALUE-1, Long.MAX_VALUE + }; + String[] prefixVals=new String[vals.length]; + + for (int i=0; i expectedBounds, final Iterable expectedShifts + ) throws Exception { + final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null; + final Iterator neededBounds = (expectedBounds == null) ? null : expectedBounds.iterator(); + final Iterator neededShifts = (expectedShifts == null) ? null : expectedShifts.iterator(); + + NumericUtils.splitLongRange(new NumericUtils.LongRangeBuilder() { + @Override + public void addRange(long min, long max, int shift) { + assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper); + if (useBitSet) for (long l=min; l<=max; l++) { + assertFalse("ranges should not overlap", bits.getAndSet(l-lower) ); + // extra exit condition to prevent overflow on MAX_VALUE + if (l == max) break; + } + if (neededBounds == null || neededShifts == null) + return; + // make unsigned longs for easier display and understanding + min ^= 0x8000000000000000L; + max ^= 0x8000000000000000L; + //System.out.println("0x"+Long.toHexString(min>>>shift)+"L,0x"+Long.toHexString(max>>>shift)+"L)/*shift="+shift+"*/,"); + assertEquals( "shift", neededShifts.next().intValue(), shift); + assertEquals( "inner min bound", neededBounds.next().longValue(), min>>>shift); + assertEquals( "inner max bound", neededBounds.next().longValue(), max>>>shift); + } + }, precisionStep, lower, upper); + + if (useBitSet) { + // after flipping all bits in the range, the cardinality should be zero + bits.flip(0,upper-lower+1); + assertTrue("The sub-range concenated should match the whole range", bits.isEmpty()); + } + } + + /** LUCENE-2541: NumericRangeQuery errors with endpoints near long min and max values */ + public void testLongExtremeValues() throws Exception { + // upper end extremes + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 1, true, Arrays.asList( + 0xffffffffffffffffL,0xffffffffffffffffL + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 2, true, Arrays.asList( + 0xffffffffffffffffL,0xffffffffffffffffL + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 4, true, Arrays.asList( + 0xffffffffffffffffL,0xffffffffffffffffL + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 6, true, Arrays.asList( + 0xffffffffffffffffL,0xffffffffffffffffL + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 8, true, Arrays.asList( + 0xffffffffffffffffL,0xffffffffffffffffL + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 64, true, Arrays.asList( + 0xffffffffffffffffL,0xffffffffffffffffL + ), Arrays.asList( + 0 + )); + + assertLongRangeSplit(Long.MAX_VALUE-0xfL, Long.MAX_VALUE, 4, true, Arrays.asList( + 0xfffffffffffffffL,0xfffffffffffffffL + ), Arrays.asList( + 4 + )); + assertLongRangeSplit(Long.MAX_VALUE-0x10L, Long.MAX_VALUE, 4, true, Arrays.asList( + 0xffffffffffffffefL,0xffffffffffffffefL, + 0xfffffffffffffffL,0xfffffffffffffffL + ), Arrays.asList( + 0, 4 + )); + + // lower end extremes + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 1, true, Arrays.asList( + 0x0000000000000000L,0x0000000000000000L + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 2, true, Arrays.asList( + 0x0000000000000000L,0x0000000000000000L + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 4, true, Arrays.asList( + 0x0000000000000000L,0x0000000000000000L + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 6, true, Arrays.asList( + 0x0000000000000000L,0x0000000000000000L + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 8, true, Arrays.asList( + 0x0000000000000000L,0x0000000000000000L + ), Arrays.asList( + 0 + )); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 64, true, Arrays.asList( + 0x0000000000000000L,0x0000000000000000L + ), Arrays.asList( + 0 + )); + + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE+0xfL, 4, true, Arrays.asList( + 0x000000000000000L,0x000000000000000L + ), Arrays.asList( + 4 + )); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE+0x10L, 4, true, Arrays.asList( + 0x0000000000000010L,0x0000000000000010L, + 0x000000000000000L,0x000000000000000L + ), Arrays.asList( + 0, 4 + )); + } + + public void testRandomSplit() throws Exception { + long num = (long) atLeast(10); + for (long i=0; i < num; i++) { + executeOneRandomSplit(random); + } + } + + private void executeOneRandomSplit(final Random random) throws Exception { + long lower = randomLong(random); + long len = random.nextInt(16384*1024); // not too large bitsets, else OOME! + while (lower + len < lower) { // overflow + lower >>= 1; + } + assertLongRangeSplit(lower, lower + len, random.nextInt(64) + 1, true, null, null); + } + + private long randomLong(final Random random) { + long val; + switch(random.nextInt(4)) { + case 0: + val = 1L << (random.nextInt(63)); // patterns like 0x000000100000 (-1 yields patterns like 0x0000fff) + break; + case 1: + val = -1L << (random.nextInt(63)); // patterns like 0xfffff00000 + break; + default: + val = random.nextLong(); + } + + val += random.nextInt(5)-2; + + if (random.nextBoolean()) { + if (random.nextBoolean()) val += random.nextInt(100)-50; + if (random.nextBoolean()) val = ~val; + if (random.nextBoolean()) val = val<<1; + if (random.nextBoolean()) val = val>>>1; + } + + return val; + } + + public void testSplitLongRange() throws Exception { + // a hard-coded "standard" range + assertLongRangeSplit(-5000L, 9500L, 4, true, Arrays.asList( + 0x7fffffffffffec78L,0x7fffffffffffec7fL, + 0x8000000000002510L,0x800000000000251cL, + 0x7fffffffffffec8L, 0x7fffffffffffecfL, + 0x800000000000250L, 0x800000000000250L, + 0x7fffffffffffedL, 0x7fffffffffffefL, + 0x80000000000020L, 0x80000000000024L, + 0x7ffffffffffffL, 0x8000000000001L + ), Arrays.asList( + 0, 0, + 4, 4, + 8, 8, + 12 + )); + + // the same with no range splitting + assertLongRangeSplit(-5000L, 9500L, 64, true, Arrays.asList( + 0x7fffffffffffec78L,0x800000000000251cL + ), Arrays.asList( + 0 + )); + + // this tests optimized range splitting, if one of the inner bounds + // is also the bound of the next lower precision, it should be used completely + assertLongRangeSplit(0L, 1024L+63L, 4, true, Arrays.asList( + 0x800000000000040L, 0x800000000000043L, + 0x80000000000000L, 0x80000000000003L + ), Arrays.asList( + 4, 8 + )); + + // the full long range should only consist of a lowest precision range; no bitset testing here, as too much memory needed :-) + assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 8, false, Arrays.asList( + 0x00L,0xffL + ), Arrays.asList( + 56 + )); + + // the same with precisionStep=4 + assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 4, false, Arrays.asList( + 0x0L,0xfL + ), Arrays.asList( + 60 + )); + + // the same with precisionStep=2 + assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 2, false, Arrays.asList( + 0x0L,0x3L + ), Arrays.asList( + 62 + )); + + // the same with precisionStep=1 + assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 1, false, Arrays.asList( + 0x0L,0x1L + ), Arrays.asList( + 63 + )); + + // a inverse range should produce no sub-ranges + assertLongRangeSplit(9500L, -5000L, 4, false, Collections.emptyList(), Collections.emptyList()); + + // a 0-length range should reproduce the range itsself + assertLongRangeSplit(9500L, 9500L, 4, false, Arrays.asList( + 0x800000000000251cL,0x800000000000251cL + ), Arrays.asList( + 0 + )); + } + + /** Note: The neededBounds Iterable must be unsigned (easier understanding what's happening) */ + private void assertIntRangeSplit(final int lower, final int upper, int precisionStep, + final boolean useBitSet, final Iterable expectedBounds, final Iterable expectedShifts + ) throws Exception { + final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null; + final Iterator neededBounds = (expectedBounds == null) ? null : expectedBounds.iterator(); + final Iterator neededShifts = (expectedShifts == null) ? null : expectedShifts.iterator(); + + NumericUtils.splitIntRange(new NumericUtils.IntRangeBuilder() { + @Override + public void addRange(int min, int max, int shift) { + assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper); + if (useBitSet) for (int i=min; i<=max; i++) { + assertFalse("ranges should not overlap", bits.getAndSet(i-lower) ); + // extra exit condition to prevent overflow on MAX_VALUE + if (i == max) break; + } + if (neededBounds == null) + return; + // make unsigned ints for easier display and understanding + min ^= 0x80000000; + max ^= 0x80000000; + //System.out.println("0x"+Integer.toHexString(min>>>shift)+",0x"+Integer.toHexString(max>>>shift)+")/*shift="+shift+"*/,"); + assertEquals( "shift", neededShifts.next().intValue(), shift); + assertEquals( "inner min bound", neededBounds.next().intValue(), min>>>shift); + assertEquals( "inner max bound", neededBounds.next().intValue(), max>>>shift); + } + }, precisionStep, lower, upper); + + if (useBitSet) { + // after flipping all bits in the range, the cardinality should be zero + bits.flip(0,upper-lower+1); + assertTrue("The sub-range concenated should match the whole range", bits.isEmpty()); + } + } + + public void testSplitIntRange() throws Exception { + // a hard-coded "standard" range + assertIntRangeSplit(-5000, 9500, 4, true, Arrays.asList( + 0x7fffec78,0x7fffec7f, + 0x80002510,0x8000251c, + 0x7fffec8, 0x7fffecf, + 0x8000250, 0x8000250, + 0x7fffed, 0x7fffef, + 0x800020, 0x800024, + 0x7ffff, 0x80001 + ), Arrays.asList( + 0, 0, + 4, 4, + 8, 8, + 12 + )); + + // the same with no range splitting + assertIntRangeSplit(-5000, 9500, 32, true, Arrays.asList( + 0x7fffec78,0x8000251c + ), Arrays.asList( + 0 + )); + + // this tests optimized range splitting, if one of the inner bounds + // is also the bound of the next lower precision, it should be used completely + assertIntRangeSplit(0, 1024+63, 4, true, Arrays.asList( + 0x8000040, 0x8000043, + 0x800000, 0x800003 + ), Arrays.asList( + 4, 8 + )); + + // the full int range should only consist of a lowest precision range; no bitset testing here, as too much memory needed :-) + assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 8, false, Arrays.asList( + 0x00,0xff + ), Arrays.asList( + 24 + )); + + // the same with precisionStep=4 + assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 4, false, Arrays.asList( + 0x0,0xf + ), Arrays.asList( + 28 + )); + + // the same with precisionStep=2 + assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 2, false, Arrays.asList( + 0x0,0x3 + ), Arrays.asList( + 30 + )); + + // the same with precisionStep=1 + assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 1, false, Arrays.asList( + 0x0,0x1 + ), Arrays.asList( + 31 + )); + + // a inverse range should produce no sub-ranges + assertIntRangeSplit(9500, -5000, 4, false, Collections.emptyList(), Collections.emptyList()); + + // a 0-length range should reproduce the range itsself + assertIntRangeSplit(9500, 9500, 4, false, Arrays.asList( + 0x8000251c,0x8000251c + ), Arrays.asList( + 0 + )); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestOpenBitSet.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestOpenBitSet.java new file mode 100644 index 0000000..61322fe --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestOpenBitSet.java @@ -0,0 +1,265 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +import java.util.BitSet; + +import org.apache.lucene.search.DocIdSetIterator; + +public class TestOpenBitSet extends LuceneTestCase { + + void doGet(BitSet a, OpenBitSet b) { + int max = a.size(); + for (int i=0; i=0); + } + + void doPrevSetBit(BitSet a, OpenBitSet b) { + int aa = a.size() + random.nextInt(100); + int bb = aa; + do { + // aa = a.prevSetBit(aa-1); + aa--; + while ((aa >= 0) && (! a.get(aa))) { + aa--; + } + bb = b.prevSetBit(bb-1); + assertEquals(aa,bb); + } while (aa>=0); + } + + // test interleaving different OpenBitSetIterator.next()/skipTo() + void doIterate(BitSet a, OpenBitSet b, int mode) { + if (mode==1) doIterate1(a, b); + if (mode==2) doIterate2(a, b); + } + + void doIterate1(BitSet a, OpenBitSet b) { + int aa=-1,bb=-1; + OpenBitSetIterator iterator = new OpenBitSetIterator(b); + do { + aa = a.nextSetBit(aa+1); + bb = random.nextBoolean() ? iterator.nextDoc() : iterator.advance(bb + 1); + assertEquals(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb); + } while (aa>=0); + } + + void doIterate2(BitSet a, OpenBitSet b) { + int aa=-1,bb=-1; + OpenBitSetIterator iterator = new OpenBitSetIterator(b); + do { + aa = a.nextSetBit(aa+1); + bb = random.nextBoolean() ? iterator.nextDoc() : iterator.advance(bb + 1); + assertEquals(aa == -1 ? DocIdSetIterator.NO_MORE_DOCS : aa, bb); + } while (aa>=0); + } + + void doRandomSets(int maxSize, int iter, int mode) { + BitSet a0=null; + OpenBitSet b0=null; + + for (int i=0; i0) { + int nOper = random.nextInt(sz); + for (int j=0; j>1)+1); + BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex); + OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex); + + doIterate(aa,bb, mode); // a problem here is from flip or doIterate + + fromIndex = random.nextInt(sz+80); + toIndex = fromIndex + random.nextInt((sz>>1)+1); + aa = (BitSet)a.clone(); aa.clear(fromIndex,toIndex); + bb = (OpenBitSet)b.clone(); bb.clear(fromIndex,toIndex); + + doNextSetBit(aa,bb); // a problem here is from clear() or nextSetBit + doPrevSetBit(aa,bb); + + fromIndex = random.nextInt(sz+80); + toIndex = fromIndex + random.nextInt((sz>>1)+1); + aa = (BitSet)a.clone(); aa.set(fromIndex,toIndex); + bb = (OpenBitSet)b.clone(); bb.set(fromIndex,toIndex); + + doNextSetBit(aa,bb); // a problem here is from set() or nextSetBit + doPrevSetBit(aa,bb); + + + if (a0 != null) { + assertEquals( a.equals(a0), b.equals(b0)); + + assertEquals(a.cardinality(), b.cardinality()); + + BitSet a_and = (BitSet)a.clone(); a_and.and(a0); + BitSet a_or = (BitSet)a.clone(); a_or.or(a0); + BitSet a_xor = (BitSet)a.clone(); a_xor.xor(a0); + BitSet a_andn = (BitSet)a.clone(); a_andn.andNot(a0); + + OpenBitSet b_and = (OpenBitSet)b.clone(); assertEquals(b,b_and); b_and.and(b0); + OpenBitSet b_or = (OpenBitSet)b.clone(); b_or.or(b0); + OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0); + OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0); + + doIterate(a_and,b_and, mode); + doIterate(a_or,b_or, mode); + doIterate(a_xor,b_xor, mode); + doIterate(a_andn,b_andn, mode); + + assertEquals(a_and.cardinality(), b_and.cardinality()); + assertEquals(a_or.cardinality(), b_or.cardinality()); + assertEquals(a_xor.cardinality(), b_xor.cardinality()); + assertEquals(a_andn.cardinality(), b_andn.cardinality()); + + // test non-mutating popcounts + assertEquals(b_and.cardinality(), OpenBitSet.intersectionCount(b,b0)); + assertEquals(b_or.cardinality(), OpenBitSet.unionCount(b,b0)); + assertEquals(b_xor.cardinality(), OpenBitSet.xorCount(b,b0)); + assertEquals(b_andn.cardinality(), OpenBitSet.andNotCount(b,b0)); + } + + a0=a; + b0=b; + } + } + + // large enough to flush obvious bugs, small enough to run in <.5 sec as part of a + // larger testsuite. + public void testSmall() { + doRandomSets(atLeast(1200), atLeast(1000), 1); + doRandomSets(atLeast(1200), atLeast(1000), 2); + } + + // uncomment to run a bigger test (~2 minutes). + /* + public void testBig() { + doRandomSets(2000,200000, 1); + doRandomSets(2000,200000, 2); + } + */ + + public void testEquals() { + OpenBitSet b1 = new OpenBitSet(1111); + OpenBitSet b2 = new OpenBitSet(2222); + assertTrue(b1.equals(b2)); + assertTrue(b2.equals(b1)); + b1.set(10); + assertFalse(b1.equals(b2)); + assertFalse(b2.equals(b1)); + b2.set(10); + assertTrue(b1.equals(b2)); + assertTrue(b2.equals(b1)); + b2.set(2221); + assertFalse(b1.equals(b2)); + assertFalse(b2.equals(b1)); + b1.set(2221); + assertTrue(b1.equals(b2)); + assertTrue(b2.equals(b1)); + + // try different type of object + assertFalse(b1.equals(new Object())); + } + + public void testHashCodeEquals() { + OpenBitSet bs1 = new OpenBitSet(200); + OpenBitSet bs2 = new OpenBitSet(64); + bs1.set(3); + bs2.set(3); + assertEquals(bs1, bs2); + assertEquals(bs1.hashCode(), bs2.hashCode()); + } + + + private OpenBitSet makeOpenBitSet(int[] a) { + OpenBitSet bs = new OpenBitSet(); + for (int e: a) { + bs.set(e); + } + return bs; + } + + private BitSet makeBitSet(int[] a) { + BitSet bs = new BitSet(); + for (int e: a) { + bs.set(e); + } + return bs; + } + + private void checkPrevSetBitArray(int [] a) { + OpenBitSet obs = makeOpenBitSet(a); + BitSet bs = makeBitSet(a); + doPrevSetBit(bs, obs); + } + + public void testPrevSetBit() { + checkPrevSetBitArray(new int[] {}); + checkPrevSetBitArray(new int[] {0}); + checkPrevSetBitArray(new int[] {0,2}); + } +} + + + diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestPriorityQueue.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestPriorityQueue.java new file mode 100644 index 0000000..93b6378 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestPriorityQueue.java @@ -0,0 +1,116 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Random; + +public class TestPriorityQueue extends LuceneTestCase { + + private static class IntegerQueue extends PriorityQueue { + public IntegerQueue(int count) { + super(); + initialize(count); + } + + @Override + protected boolean lessThan(Integer a, Integer b) { + return (a < b); + } + } + + public void testPQ() throws Exception { + testPQ(atLeast(10000), random); + } + + public static void testPQ(int count, Random gen) { + PriorityQueue pq = new IntegerQueue(count); + int sum = 0, sum2 = 0; + + for (int i = 0; i < count; i++) + { + int next = gen.nextInt(); + sum += next; + pq.add(next); + } + + // Date end = new Date(); + + // System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000); + // System.out.println(" microseconds/put"); + + // start = new Date(); + + int last = Integer.MIN_VALUE; + for (int i = 0; i < count; i++) + { + Integer next = pq.pop(); + assertTrue(next.intValue() >= last); + last = next.intValue(); + sum2 += last; + } + + assertEquals(sum, sum2); + // end = new Date(); + + // System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000); + // System.out.println(" microseconds/pop"); + } + + public void testClear() { + PriorityQueue pq = new IntegerQueue(3); + pq.add(2); + pq.add(3); + pq.add(1); + assertEquals(3, pq.size()); + pq.clear(); + assertEquals(0, pq.size()); + } + + public void testFixedSize() { + PriorityQueue pq = new IntegerQueue(3); + pq.insertWithOverflow(2); + pq.insertWithOverflow(3); + pq.insertWithOverflow(1); + pq.insertWithOverflow(5); + pq.insertWithOverflow(7); + pq.insertWithOverflow(1); + assertEquals(3, pq.size()); + assertEquals((Integer) 3, pq.top()); + } + + public void testInsertWithOverflow() { + int size = 4; + PriorityQueue pq = new IntegerQueue(size); + Integer i1 = 2; + Integer i2 = 3; + Integer i3 = 1; + Integer i4 = 5; + Integer i5 = 7; + Integer i6 = 1; + + assertNull(pq.insertWithOverflow(i1)); + assertNull(pq.insertWithOverflow(i2)); + assertNull(pq.insertWithOverflow(i3)); + assertNull(pq.insertWithOverflow(i4)); + assertTrue(pq.insertWithOverflow(i5) == i3); // i3 should have been dropped + assertTrue(pq.insertWithOverflow(i6) == i6); // i6 should not have been inserted + assertEquals(size, pq.size()); + assertEquals((Integer) 2, pq.top()); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestRamUsageEstimator.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestRamUsageEstimator.java new file mode 100644 index 0000000..a64d094 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestRamUsageEstimator.java @@ -0,0 +1,51 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.LuceneTestCase; + +public class TestRamUsageEstimator extends LuceneTestCase { + + public void testBasic() { + RamUsageEstimator rue = new RamUsageEstimator(); + rue.estimateRamUsage("test str"); + + rue.estimateRamUsage("test strin"); + + Holder holder = new Holder(); + holder.holder = new Holder("string2", 5000L); + rue.estimateRamUsage(holder); + + String[] strings = new String[]{new String("test strin"), new String("hollow"), new String("catchmaster")}; + rue.estimateRamUsage(strings); + } + + private static final class Holder { + long field1 = 5000L; + String name = "name"; + Holder holder; + + Holder() { + } + + Holder(String name, long field1) { + this.name = name; + this.field1 = field1; + } + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSetOnce.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSetOnce.java new file mode 100644 index 0000000..fad8e19 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSetOnce.java @@ -0,0 +1,99 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Random; + +import org.apache.lucene.util.SetOnce.AlreadySetException; +import org.junit.Test; + +public class TestSetOnce extends LuceneTestCase { + + private static final class SetOnceThread extends Thread { + SetOnce set; + boolean success = false; + final Random RAND; + + public SetOnceThread(Random random) { + RAND = new Random(random.nextLong()); + } + + @Override + public void run() { + try { + sleep(RAND.nextInt(10)); // sleep for a short time + set.set(new Integer(Integer.parseInt(getName().substring(2)))); + success = true; + } catch (InterruptedException e) { + // ignore + } catch (RuntimeException e) { + // TODO: change exception type + // expected. + success = false; + } + } + } + + @Test + public void testEmptyCtor() throws Exception { + SetOnce set = new SetOnce(); + assertNull(set.get()); + } + + @Test(expected=AlreadySetException.class) + public void testSettingCtor() throws Exception { + SetOnce set = new SetOnce(new Integer(5)); + assertEquals(5, set.get().intValue()); + set.set(new Integer(7)); + } + + @Test(expected=AlreadySetException.class) + public void testSetOnce() throws Exception { + SetOnce set = new SetOnce(); + set.set(new Integer(5)); + assertEquals(5, set.get().intValue()); + set.set(new Integer(7)); + } + + @Test + public void testSetMultiThreaded() throws Exception { + final SetOnce set = new SetOnce(); + SetOnceThread[] threads = new SetOnceThread[10]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new SetOnceThread(random); + threads[i].setName("t-" + (i+1)); + threads[i].set = set; + } + + for (Thread t : threads) { + t.start(); + } + + for (Thread t : threads) { + t.join(); + } + + for (SetOnceThread t : threads) { + if (t.success) { + int expectedVal = Integer.parseInt(t.getName().substring(2)); + assertEquals("thread " + t.getName(), expectedVal, t.set.get().intValue()); + } + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSmallFloat.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSmallFloat.java new file mode 100644 index 0000000..2ee03c6 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSmallFloat.java @@ -0,0 +1,150 @@ +package org.apache.lucene.util; + +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestSmallFloat extends LuceneTestCase { + + // original lucene byteToFloat + static float orig_byteToFloat(byte b) { + if (b == 0) // zero is a special case + return 0.0f; + int mantissa = b & 7; + int exponent = (b >> 3) & 31; + int bits = ((exponent+(63-15)) << 24) | (mantissa << 21); + return Float.intBitsToFloat(bits); + } + + // original lucene floatToByte (since lucene 1.3) + static byte orig_floatToByte_v13(float f) { + if (f < 0.0f) // round negatives up to zero + f = 0.0f; + + if (f == 0.0f) // zero is a special case + return 0; + + int bits = Float.floatToIntBits(f); // parse float into parts + int mantissa = (bits & 0xffffff) >> 21; + int exponent = (((bits >> 24) & 0x7f) - 63) + 15; + + if (exponent > 31) { // overflow: use max value + exponent = 31; + mantissa = 7; + } + + if (exponent < 0) { // underflow: use min value + exponent = 0; + mantissa = 1; + } + + return (byte)((exponent << 3) | mantissa); // pack into a byte + } + + // This is the original lucene floatToBytes (from v1.3) + // except with the underflow detection bug fixed for values like 5.8123817E-10f + static byte orig_floatToByte(float f) { + if (f < 0.0f) // round negatives up to zero + f = 0.0f; + + if (f == 0.0f) // zero is a special case + return 0; + + int bits = Float.floatToIntBits(f); // parse float into parts + int mantissa = (bits & 0xffffff) >> 21; + int exponent = (((bits >> 24) & 0x7f) - 63) + 15; + + if (exponent > 31) { // overflow: use max value + exponent = 31; + mantissa = 7; + } + + if (exponent < 0 || exponent == 0 && mantissa == 0) { // underflow: use min value + exponent = 0; + mantissa = 1; + } + + return (byte)((exponent << 3) | mantissa); // pack into a byte + } + + + public void testByteToFloat() { + for (int i=0; i<256; i++) { + float f1 = orig_byteToFloat((byte)i); + float f2 = SmallFloat.byteToFloat((byte)i, 3,15); + float f3 = SmallFloat.byte315ToFloat((byte)i); + assertEquals(f1,f2,0.0); + assertEquals(f2,f3,0.0); + + float f4 = SmallFloat.byteToFloat((byte)i,5,2); + float f5 = SmallFloat.byte52ToFloat((byte)i); + assertEquals(f4,f5,0.0); + } + } + + public void testFloatToByte() { + assertEquals(0, orig_floatToByte_v13(5.8123817E-10f)); // verify the old bug (see LUCENE-2937) + assertEquals(1, orig_floatToByte(5.8123817E-10f)); // verify it's fixed in this test code + assertEquals(1, SmallFloat.floatToByte315(5.8123817E-10f)); // verify it's fixed + + // test some constants + assertEquals(0, SmallFloat.floatToByte315(0)); + assertEquals(1, SmallFloat.floatToByte315(Float.MIN_VALUE)); // underflow rounds up to smallest positive + assertEquals(255, SmallFloat.floatToByte315(Float.MAX_VALUE) & 0xff); // overflow rounds down to largest positive + assertEquals(255, SmallFloat.floatToByte315(Float.POSITIVE_INFINITY) & 0xff); + + // all negatives map to 0 + assertEquals(0, SmallFloat.floatToByte315(-Float.MIN_VALUE)); + assertEquals(0, SmallFloat.floatToByte315(-Float.MAX_VALUE)); + assertEquals(0, SmallFloat.floatToByte315(Float.NEGATIVE_INFINITY)); + + + // up iterations for more exhaustive test after changing something + int num = atLeast(100000); + for (int i = 0; i < num; i++) { + float f = Float.intBitsToFloat(random.nextInt()); + if (Float.isNaN(f)) continue; // skip NaN + byte b1 = orig_floatToByte(f); + byte b2 = SmallFloat.floatToByte(f,3,15); + byte b3 = SmallFloat.floatToByte315(f); + assertEquals(b1,b2); + assertEquals(b2,b3); + + byte b4 = SmallFloat.floatToByte(f,5,2); + byte b5 = SmallFloat.floatToByte52(f); + assertEquals(b4,b5); + } + } + + /*** + // Do an exhaustive test of all possible floating point values + // for the 315 float against the original norm encoding in Similarity. + // Takes 75 seconds on my Pentium4 3GHz, with Java5 -server + public void testAllFloats() { + for(int i = Integer.MIN_VALUE;;i++) { + float f = Float.intBitsToFloat(i); + if (f==f) { // skip non-numbers + byte b1 = orig_floatToByte(f); + byte b2 = SmallFloat.floatToByte315(f); + if (b1!=b2 || b2==0 && f>0) { + fail("Failed floatToByte315 for float " + f + " source bits="+Integer.toHexString(i) + " float raw bits=" + Integer.toHexString(Float.floatToRawIntBits(i))); + } + } + if (i==Integer.MAX_VALUE) break; + } + } + ***/ + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSortedVIntList.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSortedVIntList.java new file mode 100644 index 0000000..cef1e7c --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestSortedVIntList.java @@ -0,0 +1,201 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.BitSet; + +import junit.framework.TestSuite; +import junit.textui.TestRunner; + +import org.apache.lucene.search.DocIdSetIterator; + +public class TestSortedVIntList extends LuceneTestCase { + /** Main for running test case by itself. */ + public static void main(String args[]) { + TestRunner.run(new TestSuite(TestSortedVIntList.class)); + } + + void tstIterator ( + SortedVIntList vintList, + int[] ints) throws IOException { + for (int i = 0; i < ints.length; i++) { + if ((i > 0) && (ints[i-1] == ints[i])) { + return; // DocNrSkipper should not skip to same document. + } + } + DocIdSetIterator m = vintList.iterator(); + for (int i = 0; i < ints.length; i++) { + assertTrue("No end of Matcher at: " + i, m.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertEquals(ints[i], m.docID()); + } + assertTrue("End of Matcher", m.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); + } + + void tstVIntList( + SortedVIntList vintList, + int[] ints, + int expectedByteSize) throws IOException { + assertEquals("Size", ints.length, vintList.size()); + assertEquals("Byte size", expectedByteSize, vintList.getByteSize()); + tstIterator(vintList, ints); + } + + public void tstViaBitSet(int [] ints, int expectedByteSize) throws IOException { + final int MAX_INT_FOR_BITSET = 1024 * 1024; + BitSet bs = new BitSet(); + for (int i = 0; i < ints.length; i++) { + if (ints[i] > MAX_INT_FOR_BITSET) { + return; // BitSet takes too much memory + } + if ((i > 0) && (ints[i-1] == ints[i])) { + return; // BitSet cannot store duplicate. + } + bs.set(ints[i]); + } + SortedVIntList svil = new SortedVIntList(bs); + tstVIntList(svil, ints, expectedByteSize); + tstVIntList(new SortedVIntList(svil.iterator()), ints, expectedByteSize); + } + + private static final int VB1 = 0x7F; + private static final int BIT_SHIFT = 7; + private static final int VB2 = (VB1 << BIT_SHIFT) | VB1; + private static final int VB3 = (VB2 << BIT_SHIFT) | VB1; + private static final int VB4 = (VB3 << BIT_SHIFT) | VB1; + + private int vIntByteSize(int i) { + assert i >= 0; + if (i <= VB1) return 1; + if (i <= VB2) return 2; + if (i <= VB3) return 3; + if (i <= VB4) return 4; + return 5; + } + + private int vIntListByteSize(int [] ints) { + int byteSize = 0; + int last = 0; + for (int i = 0; i < ints.length; i++) { + byteSize += vIntByteSize(ints[i] - last); + last = ints[i]; + } + return byteSize; + } + + public void tstInts(int [] ints) { + int expectedByteSize = vIntListByteSize(ints); + try { + tstVIntList(new SortedVIntList(ints), ints, expectedByteSize); + tstViaBitSet(ints, expectedByteSize); + } catch (IOException ioe) { + throw new Error(ioe); + } + } + + public void tstIllegalArgExc(int [] ints) { + try { + new SortedVIntList(ints); + } + catch (IllegalArgumentException e) { + return; + } + fail("Expected IllegalArgumentException"); + } + + private int[] fibArray(int a, int b, int size) { + final int[] fib = new int[size]; + fib[0] = a; + fib[1] = b; + for (int i = 2; i < size; i++) { + fib[i] = fib[i-1] + fib[i-2]; + } + return fib; + } + + private int[] reverseDiffs(int []ints) { // reverse the order of the successive differences + final int[] res = new int[ints.length]; + for (int i = 0; i < ints.length; i++) { + res[i] = ints[ints.length - 1] + (ints[0] - ints[ints.length - 1 - i]); + } + return res; + } + + public void test01() { + tstInts(new int[] {}); + } + public void test02() { + tstInts(new int[] {0}); + } + public void test04a() { + tstInts(new int[] {0, VB2 - 1}); + } + public void test04b() { + tstInts(new int[] {0, VB2}); + } + public void test04c() { + tstInts(new int[] {0, VB2 + 1}); + } + public void test05() { + tstInts(fibArray(0,1,7)); // includes duplicate value 1 + } + public void test05b() { + tstInts(reverseDiffs(fibArray(0,1,7))); + } + public void test06() { + tstInts(fibArray(1,2,45)); // no duplicates, size 46 exceeds max int. + } + public void test06b() { + tstInts(reverseDiffs(fibArray(1,2,45))); + } + public void test07a() { + tstInts(new int[] {0, VB3}); + } + public void test07b() { + tstInts(new int[] {1, VB3 + 2}); + } + public void test07c() { + tstInts(new int[] {2, VB3 + 4}); + } + public void test08a() { + tstInts(new int[] {0, VB4 + 1}); + } + public void test08b() { + tstInts(new int[] {1, VB4 + 1}); + } + public void test08c() { + tstInts(new int[] {2, VB4 + 1}); + } + + public void test10() { + tstIllegalArgExc(new int[] {-1}); + } + public void test11() { + tstIllegalArgExc(new int[] {1,0}); + } + public void test12() { + tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0}); + } + public void test13Allocation() throws Exception { + int [] a = new int[2000]; // SortedVIntList initial byte size is 128 + for (int i = 0; i < a.length; i++) { + a[i] = (107 + i) * i; + } + tstIterator(new SortedVIntList(a), a); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestStringIntern.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestStringIntern.java new file mode 100755 index 0000000..db2b6cd --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestStringIntern.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; +import java.util.Random; + +public class TestStringIntern extends LuceneTestCase { + String[] testStrings; + String[] internedStrings; + + private String randStr(int len) { + char[] arr = new char[len]; + for (int i=0; i prepareCommitData = null; + Map commitData = null; + + public TwoPhaseCommitImpl(boolean failOnPrepare, boolean failOnCommit, boolean failOnRollback) { + this.failOnPrepare = failOnPrepare; + this.failOnCommit = failOnCommit; + this.failOnRollback = failOnRollback; + } + + public void prepareCommit() throws IOException { + prepareCommit(null); + } + + public void prepareCommit(Map commitData) throws IOException { + this.prepareCommitData = commitData; + assertFalse("commit should not have been called before all prepareCommit were", commitCalled); + if (failOnPrepare) { + throw new IOException("failOnPrepare"); + } + } + + public void commit() throws IOException { + commit(null); + } + + public void commit(Map commitData) throws IOException { + this.commitData = commitData; + commitCalled = true; + if (failOnCommit) { + throw new RuntimeException("failOnCommit"); + } + } + + public void rollback() throws IOException { + rollbackCalled = true; + if (failOnRollback) { + throw new Error("failOnRollback"); + } + } + } + + @Override + public void setUp() throws Exception { + super.setUp(); + TwoPhaseCommitImpl.commitCalled = false; // reset count before every test + } + + public void testPrepareThenCommit() throws Exception { + // tests that prepareCommit() is called on all objects before commit() + TwoPhaseCommitImpl[] objects = new TwoPhaseCommitImpl[2]; + for (int i = 0; i < objects.length; i++) { + objects[i] = new TwoPhaseCommitImpl(false, false, false); + } + + // following call will fail if commit() is called before all prepare() were + TwoPhaseCommitTool.execute(objects); + } + + public void testRollback() throws Exception { + // tests that rollback is called if failure occurs at any stage + int numObjects = random.nextInt(8) + 3; // between [3, 10] + TwoPhaseCommitImpl[] objects = new TwoPhaseCommitImpl[numObjects]; + for (int i = 0; i < objects.length; i++) { + boolean failOnPrepare = random.nextBoolean(); + // we should not hit failures on commit usually + boolean failOnCommit = random.nextDouble() < 0.05; + boolean railOnRollback = random.nextBoolean(); + objects[i] = new TwoPhaseCommitImpl(failOnPrepare, failOnCommit, railOnRollback); + } + + boolean anyFailure = false; + try { + TwoPhaseCommitTool.execute(objects); + } catch (Throwable t) { + anyFailure = true; + } + + if (anyFailure) { + // if any failure happened, ensure that rollback was called on all. + for (TwoPhaseCommitImpl tpc : objects) { + assertTrue("rollback was not called while a failure occurred during the 2-phase commit", tpc.rollbackCalled); + } + } + } + + public void testWrapper() throws Exception { + // tests that TwoPhaseCommitWrapper delegates prepare/commit w/ commitData + TwoPhaseCommitImpl impl = new TwoPhaseCommitImpl(false, false, false); + HashMap commitData = new HashMap(); + TwoPhaseCommitWrapper wrapper = new TwoPhaseCommitWrapper(impl, commitData); + + wrapper.prepareCommit(); + assertSame(commitData, impl.prepareCommitData); + + // wrapper should ignore passed commitData + wrapper.prepareCommit(new HashMap()); + assertSame(commitData, impl.prepareCommitData); + + wrapper.commit(); + assertSame(commitData, impl.commitData); + + // wrapper should ignore passed commitData + wrapper.commit(new HashMap()); + assertSame(commitData, impl.commitData); + } + + public void testNullTPCs() throws Exception { + int numObjects = random.nextInt(4) + 3; // between [3, 6] + TwoPhaseCommit[] tpcs = new TwoPhaseCommit[numObjects]; + boolean setNull = false; + for (int i = 0; i < tpcs.length; i++) { + boolean isNull = random.nextDouble() < 0.3; + if (isNull) { + setNull = true; + tpcs[i] = null; + } else { + tpcs[i] = new TwoPhaseCommitImpl(false, false, false); + } + } + + if (!setNull) { + // none of the TPCs were picked to be null, pick one at random + int idx = random.nextInt(numObjects); + tpcs[idx] = null; + } + + // following call would fail if TPCTool won't handle null TPCs properly + TwoPhaseCommitTool.execute(tpcs); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVersion.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVersion.java new file mode 100644 index 0000000..b2e8540 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVersion.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.util; + +public class TestVersion extends LuceneTestCase { + + public void test() { + for (Version v : Version.values()) { + assertTrue("LUCENE_CURRENT must be always onOrAfter("+v+")", Version.LUCENE_CURRENT.onOrAfter(v)); + } + assertTrue(Version.LUCENE_30.onOrAfter(Version.LUCENE_29)); + assertTrue(Version.LUCENE_30.onOrAfter(Version.LUCENE_30)); + assertFalse(Version.LUCENE_29.onOrAfter(Version.LUCENE_30)); + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVersionComparator.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVersionComparator.java new file mode 100644 index 0000000..d9646d2 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVersionComparator.java @@ -0,0 +1,52 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Comparator; + +/** + * Tests for StringHelper.getVersionComparator + */ +public class TestVersionComparator extends LuceneTestCase { + public void testVersions() { + Comparator comp = StringHelper.getVersionComparator(); + assertTrue(comp.compare("1", "2") < 0); + assertTrue(comp.compare("1", "1") == 0); + assertTrue(comp.compare("2", "1") > 0); + + assertTrue(comp.compare("1.1", "1") > 0); + assertTrue(comp.compare("1", "1.1") < 0); + assertTrue(comp.compare("1.1", "1.1") == 0); + + assertTrue(comp.compare("1.0", "1") == 0); + assertTrue(comp.compare("1", "1.0") == 0); + assertTrue(comp.compare("1.0.1", "1.0") > 0); + assertTrue(comp.compare("1.0", "1.0.1") < 0); + + assertTrue(comp.compare("1.02.003", "1.2.3.0") == 0); + assertTrue(comp.compare("1.2.3.0", "1.02.003") == 0); + + assertTrue(comp.compare("1.10", "1.9") > 0); + assertTrue(comp.compare("1.9", "1.10") < 0); + + assertTrue(comp.compare("0", "1.0") < 0); + assertTrue(comp.compare("00", "1.0") < 0); + assertTrue(comp.compare("-1.0", "1.0") < 0); + assertTrue(comp.compare("3.0", Integer.toString(Integer.MIN_VALUE)) > 0); + } +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVirtualMethod.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVirtualMethod.java new file mode 100644 index 0000000..2f41ad2 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/TestVirtualMethod.java @@ -0,0 +1,105 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class TestVirtualMethod extends LuceneTestCase { + + private static final VirtualMethod publicTestMethod = + new VirtualMethod(TestVirtualMethod.class, "publicTest", String.class); + private static final VirtualMethod protectedTestMethod = + new VirtualMethod(TestVirtualMethod.class, "protectedTest", int.class); + + public void publicTest(String test) {} + protected void protectedTest(int test) {} + + static class TestClass1 extends TestVirtualMethod { + @Override + public void publicTest(String test) {} + @Override + protected void protectedTest(int test) {} + } + + static class TestClass2 extends TestClass1 { + @Override // make it public here + public void protectedTest(int test) {} + } + + static class TestClass3 extends TestClass2 { + @Override + public void publicTest(String test) {} + } + + static class TestClass4 extends TestVirtualMethod { + } + + static class TestClass5 extends TestClass4 { + } + + public void testGeneral() { + assertEquals(0, publicTestMethod.getImplementationDistance(this.getClass())); + assertEquals(1, publicTestMethod.getImplementationDistance(TestClass1.class)); + assertEquals(1, publicTestMethod.getImplementationDistance(TestClass2.class)); + assertEquals(3, publicTestMethod.getImplementationDistance(TestClass3.class)); + assertFalse(publicTestMethod.isOverriddenAsOf(TestClass4.class)); + assertFalse(publicTestMethod.isOverriddenAsOf(TestClass5.class)); + + assertEquals(0, protectedTestMethod.getImplementationDistance(this.getClass())); + assertEquals(1, protectedTestMethod.getImplementationDistance(TestClass1.class)); + assertEquals(2, protectedTestMethod.getImplementationDistance(TestClass2.class)); + assertEquals(2, protectedTestMethod.getImplementationDistance(TestClass3.class)); + assertFalse(protectedTestMethod.isOverriddenAsOf(TestClass4.class)); + assertFalse(protectedTestMethod.isOverriddenAsOf(TestClass5.class)); + + assertTrue(VirtualMethod.compareImplementationDistance(TestClass3.class, publicTestMethod, protectedTestMethod) > 0); + assertEquals(0, VirtualMethod.compareImplementationDistance(TestClass5.class, publicTestMethod, protectedTestMethod)); + } + + @SuppressWarnings("unchecked") + public void testExceptions() { + try { + // cast to Class to remove generics: + publicTestMethod.getImplementationDistance((Class) LuceneTestCase.class); + fail("LuceneTestCase is not a subclass and can never override publicTest(String)"); + } catch (IllegalArgumentException arg) { + // pass + } + + try { + new VirtualMethod(TestVirtualMethod.class, "bogus"); + fail("Method bogus() does not exist, so IAE should be thrown"); + } catch (IllegalArgumentException arg) { + // pass + } + + try { + new VirtualMethod(TestClass2.class, "publicTest", String.class); + fail("Method publicTest(String) is not declared in TestClass2, so IAE should be thrown"); + } catch (IllegalArgumentException arg) { + // pass + } + + try { + // try to create a second instance of the same baseClass / method combination + new VirtualMethod(TestVirtualMethod.class, "publicTest", String.class); + fail("Violating singleton status succeeded"); + } catch (UnsupportedOperationException arg) { + // pass + } + } + +} diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/europarl.lines.txt.gz b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/europarl.lines.txt.gz new file mode 100644 index 0000000..e0366f1 Binary files /dev/null and b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/europarl.lines.txt.gz differ diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/makeEuroparlLineFile.py b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/makeEuroparlLineFile.py new file mode 100644 index 0000000..2cfda33 --- /dev/null +++ b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/util/makeEuroparlLineFile.py @@ -0,0 +1,137 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import glob +import datetime +import tarfile +import re + +try: + sys.argv.remove('-verbose') + VERBOSE = True +except ValueError: + VERBOSE = False + +try: + sys.argv.remove('-docPerParagraph') + docPerParagraph = True +except ValueError: + docPerParagraph = False + +reChapterOnly = re.compile('^$') +reTagOnly = re.compile('^<.*?>$') +reNumberOnly = re.compile(r'^\d+\.?$') + +docCount = 0 +didEnglish = False + +def write(date, title, pending, fOut): + global docCount + body = ' '.join(pending).replace('\t', ' ').strip() + if len(body) > 0: + line = '%s\t%s\t%s\n' % (title, date, body) + fOut.write(line) + docCount += 1 + del pending[:] + if VERBOSE: + print len(body) + +def processTar(fileName, fOut): + + global didEnglish + + t = tarfile.open(fileName, 'r:gz') + for ti in t: + if ti.isfile() and (not didEnglish or ti.name.find('/en/') == -1): + + tup = ti.name.split('/') + lang = tup[1] + year = int(tup[2][3:5]) + if year < 20: + year += 2000 + else: + year += 1900 + + month = int(tup[2][6:8]) + day = int(tup[2][9:11]) + date = datetime.date(year=year, month=month, day=day) + + if VERBOSE: + print + print '%s: %s' % (ti.name, date) + nextIsTitle = False + title = None + pending = [] + for line in t.extractfile(ti).readlines(): + line = line.strip() + if reChapterOnly.match(line) is not None: + if title is not None: + write(date, title, pending, fOut) + nextIsTitle = True + continue + if nextIsTitle: + if not reNumberOnly.match(line) and not reTagOnly.match(line): + title = line + nextIsTitle = False + if VERBOSE: + print ' title %s' % line + continue + if line.lower() == '

': + if docPerParagraph: + write(date, title, pending, fOut) + else: + pending.append('PARSEP') + elif not reTagOnly.match(line): + pending.append(line) + if title is not None and len(pending) > 0: + write(date, title, pending, fOut) + + didEnglish = True + +# '/x/lucene/data/europarl/all.lines.txt' +dirIn = sys.argv[1] +fileOut = sys.argv[2] + +fOut = open(fileOut, 'wb') + +for fileName in glob.glob('%s/??-??.tgz' % dirIn): + if fileName.endswith('.tgz'): + print 'process %s; %d docs so far...' % (fileName, docCount) + processTar(fileName, fOut) + +print 'TOTAL: %s' % docCount + +#run something like this: +""" + +# Europarl V5 makes 76,917 docs, avg 38.6 KB per +python -u europarl.py /x/lucene/data/europarl /x/lucene/data/europarl/tmp.lines.txt +shuf /x/lucene/data/europarl/tmp.lines.txt > /x/lucene/data/europarl/full.lines.txt +rm /x/lucene/data/europarl/tmp.lines.txt + +# Run again, this time each paragraph is a doc: +# Europarl V5 makes 5,607,746 paragraphs (one paragraph per line), avg 620 bytes per: +python -u europarl.py /x/lucene/data/europarl /x/lucene/data/europarl/tmp.lines.txt -docPerParagraph +shuf /x/lucene/data/europarl/tmp.lines.txt > /x/lucene/data/europarl/para.lines.txt +rm /x/lucene/data/europarl/tmp.lines.txt + +# ~5.5 MB gzip'd: +head -200 /x/lucene/data/europarl/full.lines.txt > tmp.txt +head -10000 /x/lucene/data/europarl/para.lines.txt >> tmp.txt +shuf tmp.txt > europarl.subset.txt +rm -f tmp.txt +gzip --best europarl.subset.txt +""" diff --git a/lucene-java-3.4.0/lucene/build.xml b/lucene-java-3.4.0/lucene/build.xml new file mode 100644 index 0000000..2d34b62 --- /dev/null +++ b/lucene-java-3.4.0/lucene/build.xml @@ -0,0 +1,663 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: Ignoring your multiplier and nightly settings for backwards tests. + These tests are for API compatibility only! + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +${Name} ${version} Javadoc Index + +

${Name} ${version} Javadoc Index

+