lucene-java-3.4.0/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java

   1 package org.apache.lucene.analysis.fr;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 /**
  21  * A stemmer for French words.
  22  * <p>
  23  * The algorithm is based on the work of
  24  * Dr Martin Porter on his snowball project<br>
  25  * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
  26  * (French stemming algorithm) for details
  27  * </p>
  28  * @deprecated Use {@link org.tartarus.snowball.ext.FrenchStemmer} instead,
  29  * which has the same functionality. This filter will be removed in Lucene 5.0
  30  */
  31 @Deprecated
  32 public class FrenchStemmer {
  33
  34     /**
  35      * Buffer for the terms while stemming them.
  36      */
  37     private StringBuilder sb = new StringBuilder();
  38
  39     /**
  40      * A temporary buffer, used to reconstruct R2
  41      */
  42      private StringBuilder tb = new StringBuilder();
  43
  44         /**
  45          * Region R0 is equal to the whole buffer
  46          */
  47         private String R0;
  48
  49         /**
  50          * Region RV
  51          * "If the word begins with two vowels, RV is the region after the third letter,
  52          * otherwise the region after the first vowel not at the beginning of the word,
  53          * or the end of the word if these positions cannot be found."
  54          */
  55     private String RV;
  56
  57         /**
  58          * Region R1
  59          * "R1 is the region after the first non-vowel following a vowel
  60          * or is the null region at the end of the word if there is no such non-vowel"
  61          */
  62     private String R1;
  63
  64         /**
  65          * Region R2
  66          * "R2 is the region after the first non-vowel in R1 following a vowel
  67          * or is the null region at the end of the word if there is no such non-vowel"
  68          */
  69     private String R2;
  70
  71
  72         /**
  73          * Set to true if we need to perform step 2
  74          */
  75     private boolean suite;
  76
  77         /**
  78          * Set to true if the buffer was modified
  79          */
  80     private boolean modified;
  81
  82
  83     /**
  84      * Stems the given term to a unique <tt>discriminator</tt>.
  85      *
  86      * @param term  java.langString The term that should be stemmed
  87      * @return java.lang.String  Discriminator for <tt>term</tt>
  88      */
  89     protected String stem( String term ) {
  90                 if ( !isStemmable( term ) ) {
  91                         return term;
  92                 }
  93
  94                 // Use lowercase for medium stemming.
  95                 term = term.toLowerCase();
  96
  97                 // Reset the StringBuilder.
  98                 sb.delete( 0, sb.length() );
  99                 sb.insert( 0, term );
 100
 101                 // reset the booleans
 102                 modified = false;
 103                 suite = false;
 104
 105                 sb = treatVowels( sb );
 106
 107                 setStrings();
 108
 109                 step1();
 110
 111                 if (!modified || suite)
 112                 {
 113                         if (RV != null)
 114                         {
 115                                 suite = step2a();
 116                                 if (!suite)
 117                                         step2b();
 118                         }
 119                 }
 120
 121                 if (modified || suite)
 122                         step3();
 123                 else
 124                         step4();
 125
 126                 step5();
 127
 128                 step6();
 129
 130                 return sb.toString();
 131     }
 132
 133         /**
 134          * Sets the search region Strings<br>
 135          * it needs to be done each time the buffer was modified
 136          */
 137         private void setStrings() {
 138                 // set the strings
 139                 R0 = sb.toString();
 140                 RV = retrieveRV( sb );
 141                 R1 = retrieveR( sb );
 142                 if ( R1 != null )
 143                 {
 144                         tb.delete( 0, tb.length() );
 145                         tb.insert( 0, R1 );
 146                         R2 = retrieveR( tb );
 147                 }
 148                 else
 149                         R2 = null;
 150         }
 151
 152         /**
 153          * First step of the Porter Algorithm<br>
 154          * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
 155          */
 156         private void step1( ) {
 157                 String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
 158                 deleteFrom( R2, suffix );
 159
 160                 replaceFrom( R2, new String[] { "logies", "logie" }, "log" );
 161                 replaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
 162                 replaceFrom( R2, new String[] { "ences", "ence" }, "ent" );
 163
 164                 String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
 165                 deleteButSuffixFromElseReplace( R2, search, "ic",  true, R0, "iqU" );
 166
 167                 deleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
 168                 deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
 169                 deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
 170                 deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
 171                 deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );
 172
 173                 deleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
 174                 deleteFrom( RV, new String[] { "ements", "ement" } );
 175
 176                 deleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "abil", false, R0, "abl" );
 177                 deleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "ic", false, R0, "iqU" );
 178                 deleteButSuffixFrom( R2, new String[] { "ités", "ité" }, "iv", true );
 179
 180                 String[] autre = { "ifs", "ives", "if", "ive" };
 181                 deleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
 182                 deleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );
 183
 184                 replaceFrom( R0, new String[] { "eaux" }, "eau" );
 185
 186                 replaceFrom( R1, new String[] { "aux" }, "al" );
 187
 188                 deleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );
 189
 190                 deleteFrom( R2, new String[] { "eux" } );
 191
 192                 // if one of the next steps is performed, we will need to perform step2a
 193                 boolean temp = false;
 194                 temp = replaceFrom( RV, new String[] { "amment" }, "ant" );
 195                 if (temp == true)
 196                         suite = true;
 197                 temp = replaceFrom( RV, new String[] { "emment" }, "ent" );
 198                 if (temp == true)
 199                         suite = true;
 200                 temp = deleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
 201                 if (temp == true)
 202                         suite = true;
 203
 204         }
 205
 206         /**
 207          * Second step (A) of the Porter Algorithm<br>
 208          * Will be performed if nothing changed from the first step
 209          * or changed were done in the amment, emment, ments or ment suffixes<br>
 210          * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
 211          *
 212          * @return boolean - true if something changed in the StringBuilder
 213          */
 214         private boolean step2a() {
 215                 String[] search = { "îmes", "îtes", "iraIent", "irait", "irais", "irai", "iras", "ira",
 216                                                         "irent", "iriez", "irez", "irions", "irons", "iront",
 217                                                         "issaIent", "issais", "issantes", "issante", "issants", "issant",
 218                                                         "issait", "issais", "issions", "issons", "issiez", "issez", "issent",
 219                                                         "isses", "isse", "ir", "is", "ît", "it", "ies", "ie", "i" };
 220                 return deleteFromIfTestVowelBeforeIn( RV, search, false, RV );
 221         }
 222
 223         /**
 224          * Second step (B) of the Porter Algorithm<br>
 225          * Will be performed if step 2 A was performed unsuccessfully<br>
 226          * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
 227          */
 228         private void step2b() {
 229                 String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
 230                                                         "erons", "eront","erez", "èrent", "era", "ées", "iez",
 231                                                         "ée", "és", "er", "ez", "é" };
 232                 deleteFrom( RV, suffix );
 233
 234                 String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
 235                                                         "antes", "aIent", "Aient", "ante", "âmes", "âtes", "ants", "ant",
 236                                                         "ait", "aît", "ais", "Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" };
 237                 deleteButSuffixFrom( RV, search, "e", true );
 238
 239                 deleteFrom( R2, new String[] { "ions" } );
 240         }
 241
 242         /**
 243          * Third step of the Porter Algorithm<br>
 244          * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
 245          */
 246         private void step3() {
 247                 if (sb.length()>0)
 248                 {
 249                         char ch = sb.charAt( sb.length()-1 );
 250                         if (ch == 'Y')
 251                         {
 252                                 sb.setCharAt( sb.length()-1, 'i' );
 253                                 setStrings();
 254                         }
 255                         else if (ch == 'ç')
 256                         {
 257                                 sb.setCharAt( sb.length()-1, 'c' );
 258                                 setStrings();
 259                         }
 260                 }
 261         }
 262
 263         /**
 264          * Fourth step of the Porter Algorithm<br>
 265          * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
 266          */
 267         private void step4() {
 268                 if (sb.length() > 1)
 269                 {
 270                         char ch = sb.charAt( sb.length()-1 );
 271                         if (ch == 's')
 272                         {
 273                                 char b = sb.charAt( sb.length()-2 );
 274                                 if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
 275                                 {
 276                                         sb.delete( sb.length() - 1, sb.length());
 277                                         setStrings();
 278                                 }
 279                         }
 280                 }
 281                 boolean found = deleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
 282                 if (!found)
 283                 found = deleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );
 284
 285                 replaceFrom( RV, new String[] { "Ière", "ière", "Ier", "ier" }, "i" );
 286                 deleteFrom( RV, new String[] { "e" } );
 287                 deleteFromIfPrecededIn( RV, new String[] { "ë" }, R0, "gu" );
 288         }
 289
 290         /**
 291          * Fifth step of the Porter Algorithm<br>
 292          * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
 293          */
 294         private void step5() {
 295                 if (R0 != null)
 296                 {
 297                         if (R0.endsWith("enn") || R0.endsWith("onn") || R0.endsWith("ett") || R0.endsWith("ell") || R0.endsWith("eill"))
 298                         {
 299                                 sb.delete( sb.length() - 1, sb.length() );
 300                                 setStrings();
 301                         }
 302                 }
 303         }
 304
 305         /**
 306          * Sixth (and last!) step of the Porter Algorithm<br>
 307          * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
 308          */
 309         private void step6() {
 310                 if (R0!=null && R0.length()>0)
 311                 {
 312                         boolean seenVowel = false;
 313                         boolean seenConson = false;
 314                         int pos = -1;
 315                         for (int i = R0.length()-1; i > -1; i--)
 316                         {
 317                                 char ch = R0.charAt(i);
 318                                 if (isVowel(ch))
 319                                 {
 320                                         if (!seenVowel)
 321                                         {
 322                                                 if (ch == 'é' || ch == 'è')
 323                                                 {
 324                                                         pos = i;
 325                                                         break;
 326                                                 }
 327                                         }
 328                                         seenVowel = true;
 329                                 }
 330                                 else
 331                                 {
 332                                         if (seenVowel)
 333                                                 break;
 334                                         else
 335                                                 seenConson = true;
 336                                 }
 337                         }
 338                         if (pos > -1 && seenConson && !seenVowel)
 339                                 sb.setCharAt(pos, 'e');
 340                 }
 341         }
 342
 343         /**
 344          * Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
 345          *
 346          * @param source java.lang.String - the primary source zone for search
 347          * @param search java.lang.String[] - the strings to search for suppression
 348          * @param from java.lang.String - the secondary source zone for search
 349          * @param prefix java.lang.String - the prefix to add to the search string to test
 350          * @return boolean - true if modified
 351          */
 352         private boolean deleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
 353                 boolean found = false;
 354                 if (source!=null )
 355                 {
 356                         for (int i = 0; i < search.length; i++) {
 357                                 if ( source.endsWith( search[i] ))
 358                                 {
 359                                         if (from!=null && from.endsWith( prefix + search[i] ))
 360                                         {
 361                                                 sb.delete( sb.length() - search[i].length(), sb.length());
 362                                                 found = true;
 363                                                 setStrings();
 364                                                 break;
 365                                         }
 366                                 }
 367                         }
 368                 }
 369                 return found;
 370         }
 371
 372         /**
 373          * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
 374          *
 375          * @param source java.lang.String - the primary source zone for search
 376          * @param search java.lang.String[] - the strings to search for suppression
 377          * @param vowel boolean - true if we need a vowel before the search string
 378          * @param from java.lang.String - the secondary source zone for search (where vowel could be)
 379          * @return boolean - true if modified
 380          */
 381         private boolean deleteFromIfTestVowelBeforeIn( String source, String[] search, boolean vowel, String from ) {
 382                 boolean found = false;
 383                 if (source!=null && from!=null)
 384                 {
 385                         for (int i = 0; i < search.length; i++) {
 386                                 if ( source.endsWith( search[i] ))
 387                                 {
 388                                         if ((search[i].length() + 1) <= from.length())
 389                                         {
 390                                                 boolean test = isVowel(sb.charAt(sb.length()-(search[i].length()+1)));
 391                                                 if (test == vowel)
 392                                                 {
 393                                                         sb.delete( sb.length() - search[i].length(), sb.length());
 394                                                         modified = true;
 395                                                         found = true;
 396                                                         setStrings();
 397                                                         break;
 398                                                 }
 399                                         }
 400                                 }
 401                         }
 402                 }
 403                 return found;
 404         }
 405
 406         /**
 407          * Delete a suffix searched in zone "source" if preceded by the prefix
 408          *
 409          * @param source java.lang.String - the primary source zone for search
 410          * @param search java.lang.String[] - the strings to search for suppression
 411          * @param prefix java.lang.String - the prefix to add to the search string to test
 412          * @param without boolean - true if it will be deleted even without prefix found
 413          */
 414         private void deleteButSuffixFrom( String source, String[] search, String prefix, boolean without ) {
 415                 if (source!=null)
 416                 {
 417                         for (int i = 0; i < search.length; i++) {
 418                                 if ( source.endsWith( prefix + search[i] ))
 419                                 {
 420                                         sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );
 421                                         modified = true;
 422                                         setStrings();
 423                                         break;
 424                                 }
 425                                 else if ( without && source.endsWith( search[i] ))
 426                                 {
 427                                         sb.delete( sb.length() - search[i].length(), sb.length() );
 428                                         modified = true;
 429                                         setStrings();
 430                                         break;
 431                                 }
 432                         }
 433                 }
 434         }
 435
 436         /**
 437          * Delete a suffix searched in zone "source" if preceded by prefix<br>
 438          * or replace it with the replace string if preceded by the prefix in the zone "from"<br>
 439          * or delete the suffix if specified
 440          *
 441          * @param source java.lang.String - the primary source zone for search
 442          * @param search java.lang.String[] - the strings to search for suppression
 443          * @param prefix java.lang.String - the prefix to add to the search string to test
 444          * @param without boolean - true if it will be deleted even without prefix found
 445          */
 446         private void deleteButSuffixFromElseReplace( String source, String[] search, String prefix, boolean without, String from, String replace ) {
 447                 if (source!=null)
 448                 {
 449                         for (int i = 0; i < search.length; i++) {
 450                                 if ( source.endsWith( prefix + search[i] ))
 451                                 {
 452                                         sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );
 453                                         modified = true;
 454                                         setStrings();
 455                                         break;
 456                                 }
 457                                 else if ( from!=null && from.endsWith( prefix + search[i] ))
 458                                 {
 459                                         sb.replace( sb.length() - (prefix.length() + search[i].length()), sb.length(), replace );
 460                                         modified = true;
 461                                         setStrings();
 462                                         break;
 463                                 }
 464                                 else if ( without && source.endsWith( search[i] ))
 465                                 {
 466                                         sb.delete( sb.length() - search[i].length(), sb.length() );
 467                                         modified = true;
 468                                         setStrings();
 469                                         break;
 470                                 }
 471                         }
 472                 }
 473         }
 474
 475         /**
 476          * Replace a search string with another within the source zone
 477          *
 478          * @param source java.lang.String - the source zone for search
 479          * @param search java.lang.String[] - the strings to search for replacement
 480          * @param replace java.lang.String - the replacement string
 481          */
 482         private boolean replaceFrom( String source, String[] search, String replace ) {
 483                 boolean found = false;
 484                 if (source!=null)
 485                 {
 486                         for (int i = 0; i < search.length; i++) {
 487                                 if ( source.endsWith( search[i] ))
 488                                 {
 489                                         sb.replace( sb.length() - search[i].length(), sb.length(), replace );
 490                                         modified = true;
 491                                         found = true;
 492                                         setStrings();
 493                                         break;
 494                                 }
 495                         }
 496                 }
 497                 return found;
 498         }
 499
 500         /**
 501          * Delete a search string within the source zone
 502          *
 503          * @param source the source zone for search
 504          * @param suffix the strings to search for suppression
 505          */
 506         private void deleteFrom(String source, String[] suffix ) {
 507                 if (source!=null)
 508                 {
 509                         for (int i = 0; i < suffix.length; i++) {
 510                                 if (source.endsWith( suffix[i] ))
 511                                 {
 512                                         sb.delete( sb.length() - suffix[i].length(), sb.length());
 513                                         modified = true;
 514                                         setStrings();
 515                                         break;
 516                                 }
 517                         }
 518                 }
 519         }
 520
 521         /**
 522          * Test if a char is a french vowel, including accentuated ones
 523          *
 524          * @param ch the char to test
 525          * @return boolean - true if the char is a vowel
 526          */
 527         private boolean isVowel(char ch) {
 528                 switch (ch)
 529                 {
 530                         case 'a':
 531                         case 'e':
 532                         case 'i':
 533                         case 'o':
 534                         case 'u':
 535                         case 'y':
 536                         case 'â':
 537                         case 'à':
 538                         case 'ë':
 539                         case 'é':
 540                         case 'ê':
 541                         case 'è':
 542                         case 'ï':
 543                         case 'î':
 544                         case 'ô':
 545                         case 'ü':
 546                         case 'ù':
 547                         case 'û':
 548                                 return true;
 549                         default:
 550                                 return false;
 551                 }
 552         }
 553
 554         /**
 555          * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
 556          * "R is the region after the first non-vowel following a vowel
 557          * or is the null region at the end of the word if there is no such non-vowel"<br>
 558          * @param buffer java.lang.StringBuilder - the in buffer
 559          * @return java.lang.String - the resulting string
 560          */
 561         private String retrieveR( StringBuilder buffer ) {
 562                 int len = buffer.length();
 563                 int pos = -1;
 564                 for (int c = 0; c < len; c++) {
 565                         if (isVowel( buffer.charAt( c )))
 566                         {
 567                                 pos = c;
 568                                 break;
 569                         }
 570                 }
 571                 if (pos > -1)
 572                 {
 573                         int consonne = -1;
 574                         for (int c = pos; c < len; c++) {
 575                                 if (!isVowel(buffer.charAt( c )))
 576                                 {
 577                                         consonne = c;
 578                                         break;
 579                                 }
 580                         }
 581                         if (consonne > -1 && (consonne+1) < len)
 582                                 return buffer.substring( consonne+1, len );
 583                         else
 584                                 return null;
 585                 }
 586                 else
 587                         return null;
 588         }
 589
 590         /**
 591          * Retrieve the "RV zone" from a buffer an return the corresponding string<br>
 592          * "If the word begins with two vowels, RV is the region after the third letter,
 593          * otherwise the region after the first vowel not at the beginning of the word,
 594          * or the end of the word if these positions cannot be found."<br>
 595          * @param buffer java.lang.StringBuilder - the in buffer
 596          * @return java.lang.String - the resulting string
 597          */
 598         private String retrieveRV( StringBuilder buffer ) {
 599                 int len = buffer.length();
 600                 if ( buffer.length() > 3)
 601                 {
 602                         if ( isVowel(buffer.charAt( 0 )) && isVowel(buffer.charAt( 1 ))) {
 603                                 return buffer.substring(3,len);
 604                         }
 605                         else
 606                         {
 607                                 int pos = 0;
 608                                 for (int c = 1; c < len; c++) {
 609                                         if (isVowel( buffer.charAt( c )))
 610                                         {
 611                                                 pos = c;
 612                                                 break;
 613                                         }
 614                                 }
 615                                 if ( pos+1 < len )
 616                                         return buffer.substring( pos+1, len );
 617                                 else
 618                                         return null;
 619                         }
 620                 }
 621                 else
 622                         return null;
 623         }
 624
 625
 626
 627     /**
 628          * Turns u and i preceded AND followed by a vowel to UpperCase<br>
 629          * Turns y preceded OR followed by a vowel to UpperCase<br>
 630          * Turns u preceded by q to UpperCase<br>
 631      *
 632      * @param buffer java.util.StringBuilder - the buffer to treat
 633      * @return java.util.StringBuilder - the treated buffer
 634      */
 635     private StringBuilder treatVowels( StringBuilder buffer ) {
 636                 for ( int c = 0; c < buffer.length(); c++ ) {
 637                         char ch = buffer.charAt( c );
 638
 639                         if (c == 0) // first char
 640                         {
 641                                 if (buffer.length()>1)
 642                                 {
 643                                         if (ch == 'y' && isVowel(buffer.charAt( c + 1 )))
 644                                                 buffer.setCharAt( c, 'Y' );
 645                                 }
 646                         }
 647                         else if (c == buffer.length()-1) // last char
 648                         {
 649                                 if (ch == 'u' && buffer.charAt( c - 1 ) == 'q')
 650                                         buffer.setCharAt( c, 'U' );
 651                                 if (ch == 'y' && isVowel(buffer.charAt( c - 1 )))
 652                                         buffer.setCharAt( c, 'Y' );
 653                         }
 654                         else // other cases
 655                         {
 656                                 if (ch == 'u')
 657                                 {
 658                                         if (buffer.charAt( c - 1) == 'q')
 659                                                 buffer.setCharAt( c, 'U' );
 660                                         else if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))
 661                                                 buffer.setCharAt( c, 'U' );
 662                                 }
 663                                 if (ch == 'i')
 664                                 {
 665                                         if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))
 666                                                 buffer.setCharAt( c, 'I' );
 667                                 }
 668                                 if (ch == 'y')
 669                                 {
 670                                         if (isVowel(buffer.charAt( c - 1 )) || isVowel(buffer.charAt( c + 1 )))
 671                                                 buffer.setCharAt( c, 'Y' );
 672                                 }
 673                         }
 674                 }
 675
 676                 return buffer;
 677     }
 678
 679     /**
 680      * Checks a term if it can be processed correctly.
 681      *
 682      * @return boolean - true if, and only if, the given term consists in letters.
 683      */
 684     private boolean isStemmable( String term ) {
 685                 boolean upper = false;
 686                 int first = -1;
 687                 for ( int c = 0; c < term.length(); c++ ) {
 688                         // Discard terms that contain non-letter characters.
 689                         if ( !Character.isLetter( term.charAt( c ) ) ) {
 690                                 return false;
 691                         }
 692                         // Discard terms that contain multiple uppercase letters.
 693                         if ( Character.isUpperCase( term.charAt( c ) ) ) {
 694                                 if ( upper ) {
 695                                         return false;
 696                                 }
 697                         // First encountered uppercase letter, set flag and save
 698                         // position.
 699                                 else {
 700                                         first = c;
 701                                         upper = true;
 702                                 }
 703                         }
 704                 }
 705                 // Discard the term if it contains a single uppercase letter that
 706                 // is not starting the term.
 707                 if ( first > 0 ) {
 708                         return false;
 709                 }
 710                 return true;
 711     }
 712 }