X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/SnowballProgram.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/SnowballProgram.java b/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/SnowballProgram.java deleted file mode 100644 index 0aaa1de..0000000 --- a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/SnowballProgram.java +++ /dev/null @@ -1,567 +0,0 @@ -/* - -Copyright (c) 2001, Dr Martin Porter -Copyright (c) 2002, Richard Boulton -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * Neither the name of the copyright holders nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - */ - - -package org.tartarus.snowball; - -import java.lang.reflect.InvocationTargetException; - -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -/** - * This is the rev 502 of the Snowball SVN trunk, - * but modified: - * made abstract and introduced abstract method stem to avoid expensive reflection in filter class. - * refactored StringBuffers to StringBuilder - * uses char[] as buffer instead of StringBuffer/StringBuilder - * eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b - * reflection calls (Lovins, etc) use EMPTY_ARGS/EMPTY_PARAMS - */ -public abstract class SnowballProgram { - private static final Object[] EMPTY_ARGS = new Object[0]; - - protected SnowballProgram() - { - current = new char[8]; - setCurrent(""); - } - - public abstract boolean stem(); - - /** - * Set the current string. - */ - public void setCurrent(String value) - { - current = value.toCharArray(); - cursor = 0; - limit = value.length(); - limit_backward = 0; - bra = cursor; - ket = limit; - } - - /** - * Get the current string. - */ - public String getCurrent() - { - return new String(current, 0, limit); - } - - /** - * Set the current string. - * @param text character array containing input - * @param length valid length of text. - */ - public void setCurrent(char text[], int length) { - current = text; - cursor = 0; - limit = length; - limit_backward = 0; - bra = cursor; - ket = limit; - } - - /** - * Get the current buffer containing the stem. - *

- * NOTE: this may be a reference to a different character array than the - * one originally provided with setCurrent, in the exceptional case that - * stemming produced a longer intermediate or result string. - *

- *

- * It is necessary to use {@link #getCurrentBufferLength()} to determine - * the valid length of the returned buffer. For example, many words are - * stemmed simply by subtracting from the length to remove suffixes. - *

- * @see #getCurrentBufferLength() - */ - public char[] getCurrentBuffer() { - return current; - } - - /** - * Get the valid length of the character array in - * {@link #getCurrentBuffer()}. - * @return valid length of the array. - */ - public int getCurrentBufferLength() { - return limit; - } - - // current string - private char current[]; - - protected int cursor; - protected int limit; - protected int limit_backward; - protected int bra; - protected int ket; - - protected void copy_from(SnowballProgram other) - { - current = other.current; - cursor = other.cursor; - limit = other.limit; - limit_backward = other.limit_backward; - bra = other.bra; - ket = other.ket; - } - - protected boolean in_grouping(char [] s, int min, int max) - { - if (cursor >= limit) return false; - char ch = current[cursor]; - if (ch > max || ch < min) return false; - ch -= min; - if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false; - cursor++; - return true; - } - - protected boolean in_grouping_b(char [] s, int min, int max) - { - if (cursor <= limit_backward) return false; - char ch = current[cursor - 1]; - if (ch > max || ch < min) return false; - ch -= min; - if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false; - cursor--; - return true; - } - - protected boolean out_grouping(char [] s, int min, int max) - { - if (cursor >= limit) return false; - char ch = current[cursor]; - if (ch > max || ch < min) { - cursor++; - return true; - } - ch -= min; - if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) { - cursor ++; - return true; - } - return false; - } - - protected boolean out_grouping_b(char [] s, int min, int max) - { - if (cursor <= limit_backward) return false; - char ch = current[cursor - 1]; - if (ch > max || ch < min) { - cursor--; - return true; - } - ch -= min; - if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) { - cursor--; - return true; - } - return false; - } - - protected boolean in_range(int min, int max) - { - if (cursor >= limit) return false; - char ch = current[cursor]; - if (ch > max || ch < min) return false; - cursor++; - return true; - } - - protected boolean in_range_b(int min, int max) - { - if (cursor <= limit_backward) return false; - char ch = current[cursor - 1]; - if (ch > max || ch < min) return false; - cursor--; - return true; - } - - protected boolean out_range(int min, int max) - { - if (cursor >= limit) return false; - char ch = current[cursor]; - if (!(ch > max || ch < min)) return false; - cursor++; - return true; - } - - protected boolean out_range_b(int min, int max) - { - if (cursor <= limit_backward) return false; - char ch = current[cursor - 1]; - if(!(ch > max || ch < min)) return false; - cursor--; - return true; - } - - protected boolean eq_s(int s_size, CharSequence s) - { - if (limit - cursor < s_size) return false; - int i; - for (i = 0; i != s_size; i++) { - if (current[cursor + i] != s.charAt(i)) return false; - } - cursor += s_size; - return true; - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected boolean eq_s(int s_size, String s) - { - return eq_s(s_size, (CharSequence)s); - } - - protected boolean eq_s_b(int s_size, CharSequence s) - { - if (cursor - limit_backward < s_size) return false; - int i; - for (i = 0; i != s_size; i++) { - if (current[cursor - s_size + i] != s.charAt(i)) return false; - } - cursor -= s_size; - return true; - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected boolean eq_s_b(int s_size, String s) - { - return eq_s_b(s_size, (CharSequence)s); - } - - protected boolean eq_v(CharSequence s) - { - return eq_s(s.length(), s); - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected boolean eq_v(StringBuilder s) - { - return eq_s(s.length(), (CharSequence)s); - } - - protected boolean eq_v_b(CharSequence s) - { return eq_s_b(s.length(), s); - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected boolean eq_v_b(StringBuilder s) - { return eq_s_b(s.length(), (CharSequence)s); - } - - protected int find_among(Among v[], int v_size) - { - int i = 0; - int j = v_size; - - int c = cursor; - int l = limit; - - int common_i = 0; - int common_j = 0; - - boolean first_key_inspected = false; - - while(true) { - int k = i + ((j - i) >> 1); - int diff = 0; - int common = common_i < common_j ? common_i : common_j; // smaller - Among w = v[k]; - int i2; - for (i2 = common; i2 < w.s_size; i2++) { - if (c + common == l) { - diff = -1; - break; - } - diff = current[c + common] - w.s[i2]; - if (diff != 0) break; - common++; - } - if (diff < 0) { - j = k; - common_j = common; - } else { - i = k; - common_i = common; - } - if (j - i <= 1) { - if (i > 0) break; // v->s has been inspected - if (j == i) break; // only one item in v - - // - but now we need to go round once more to get - // v->s inspected. This looks messy, but is actually - // the optimal approach. - - if (first_key_inspected) break; - first_key_inspected = true; - } - } - while(true) { - Among w = v[i]; - if (common_i >= w.s_size) { - cursor = c + w.s_size; - if (w.method == null) return w.result; - boolean res; - try { - Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS); - res = resobj.toString().equals("true"); - } catch (InvocationTargetException e) { - res = false; - // FIXME - debug message - } catch (IllegalAccessException e) { - res = false; - // FIXME - debug message - } - cursor = c + w.s_size; - if (res) return w.result; - } - i = w.substring_i; - if (i < 0) return 0; - } - } - - // find_among_b is for backwards processing. Same comments apply - protected int find_among_b(Among v[], int v_size) - { - int i = 0; - int j = v_size; - - int c = cursor; - int lb = limit_backward; - - int common_i = 0; - int common_j = 0; - - boolean first_key_inspected = false; - - while(true) { - int k = i + ((j - i) >> 1); - int diff = 0; - int common = common_i < common_j ? common_i : common_j; - Among w = v[k]; - int i2; - for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) { - if (c - common == lb) { - diff = -1; - break; - } - diff = current[c - 1 - common] - w.s[i2]; - if (diff != 0) break; - common++; - } - if (diff < 0) { - j = k; - common_j = common; - } else { - i = k; - common_i = common; - } - if (j - i <= 1) { - if (i > 0) break; - if (j == i) break; - if (first_key_inspected) break; - first_key_inspected = true; - } - } - while(true) { - Among w = v[i]; - if (common_i >= w.s_size) { - cursor = c - w.s_size; - if (w.method == null) return w.result; - - boolean res; - try { - Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS); - res = resobj.toString().equals("true"); - } catch (InvocationTargetException e) { - res = false; - // FIXME - debug message - } catch (IllegalAccessException e) { - res = false; - // FIXME - debug message - } - cursor = c - w.s_size; - if (res) return w.result; - } - i = w.substring_i; - if (i < 0) return 0; - } - } - - /* to replace chars between c_bra and c_ket in current by the - * chars in s. - */ - protected int replace_s(int c_bra, int c_ket, CharSequence s) - { - final int adjustment = s.length() - (c_ket - c_bra); - final int newLength = limit + adjustment; - //resize if necessary - if (newLength > current.length) { - char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)]; - System.arraycopy(current, 0, newBuffer, 0, limit); - current = newBuffer; - } - // if the substring being replaced is longer or shorter than the - // replacement, need to shift things around - if (adjustment != 0 && c_ket < limit) { - System.arraycopy(current, c_ket, current, c_bra + s.length(), - limit - c_ket); - } - // insert the replacement text - // Note, faster is s.getChars(0, s.length(), current, c_bra); - // but would have to duplicate this method for both String and StringBuilder - for (int i = 0; i < s.length(); i++) - current[c_bra + i] = s.charAt(i); - - limit += adjustment; - if (cursor >= c_ket) cursor += adjustment; - else if (cursor > c_bra) cursor = c_bra; - return adjustment; - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected int replace_s(int c_bra, int c_ket, String s) { - return replace_s(c_bra, c_ket, (CharSequence)s); - } - - protected void slice_check() - { - if (bra < 0 || - bra > ket || - ket > limit) - { - System.err.println("faulty slice operation"); - // FIXME: report error somehow. - /* - fprintf(stderr, "faulty slice operation:\n"); - debug(z, -1, 0); - exit(1); - */ - } - } - - protected void slice_from(CharSequence s) - { - slice_check(); - replace_s(bra, ket, s); - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected void slice_from(String s) - { - slice_from((CharSequence)s); - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected void slice_from(StringBuilder s) - { - slice_from((CharSequence)s); - } - - protected void slice_del() - { - slice_from((CharSequence)""); - } - - protected void insert(int c_bra, int c_ket, CharSequence s) - { - int adjustment = replace_s(c_bra, c_ket, s); - if (c_bra <= bra) bra += adjustment; - if (c_bra <= ket) ket += adjustment; - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected void insert(int c_bra, int c_ket, String s) - { - insert(c_bra, c_ket, (CharSequence)s); - } - - /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */ - @Deprecated - protected void insert(int c_bra, int c_ket, StringBuilder s) - { - insert(c_bra, c_ket, (CharSequence)s); - } - - /* Copy the slice into the supplied StringBuffer */ - protected StringBuilder slice_to(StringBuilder s) - { - slice_check(); - int len = ket - bra; - s.setLength(0); - s.append(current, bra, len); - return s; - } - - protected StringBuilder assign_to(StringBuilder s) - { - s.setLength(0); - s.append(current, 0, limit); - return s; - } - -/* -extern void debug(struct SN_env * z, int number, int line_count) -{ int i; - int limit = SIZE(z->p); - //if (number >= 0) printf("%3d (line %4d): '", number, line_count); - if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); - for (i = 0; i <= limit; i++) - { if (z->lb == i) printf("{"); - if (z->bra == i) printf("["); - if (z->c == i) printf("|"); - if (z->ket == i) printf("]"); - if (z->l == i) printf("}"); - if (i < limit) - { int ch = z->p[i]; - if (ch == 0) ch = '#'; - printf("%c", ch); - } - } - printf("'\n"); -} -*/ - -}; -