1 package org.apache.lucene.analysis.synonym;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.LineNumberReader;
22 import java.io.Reader;
23 import java.text.ParseException;
25 import org.apache.lucene.analysis.Analyzer;
26 import org.apache.lucene.util.CharsRef;
29 * Parser for wordnet prolog format
31 * See http://wordnet.princeton.edu/man/prologdb.5WN.html for a description of the format.
32 * @lucene.experimental
34 // TODO: allow you to specify syntactic categories (e.g. just nouns, etc)
35 public class WordnetSynonymParser extends SynonymMap.Builder {
36 private final boolean expand;
37 private final Analyzer analyzer;
39 public WordnetSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) {
42 this.analyzer = analyzer;
45 public void add(Reader in) throws IOException, ParseException {
46 LineNumberReader br = new LineNumberReader(in);
49 String lastSynSetID = "";
50 CharsRef synset[] = new CharsRef[8];
53 while ((line = br.readLine()) != null) {
54 String synSetID = line.substring(2, 11);
56 if (!synSetID.equals(lastSynSetID)) {
57 addInternal(synset, synsetSize);
61 if (synset.length <= synsetSize+1) {
62 CharsRef larger[] = new CharsRef[synset.length * 2];
63 System.arraycopy(synset, 0, larger, 0, synsetSize);
67 synset[synsetSize] = parseSynonym(line, synset[synsetSize]);
69 lastSynSetID = synSetID;
72 // final synset in the file
73 addInternal(synset, synsetSize);
74 } catch (IllegalArgumentException e) {
75 ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
83 private CharsRef parseSynonym(String line, CharsRef reuse) throws IOException {
85 reuse = new CharsRef(8);
88 int start = line.indexOf('\'')+1;
89 int end = line.lastIndexOf('\'');
91 String text = line.substring(start, end).replace("''", "'");
92 return analyze(analyzer, text, reuse);
95 private void addInternal(CharsRef synset[], int size) throws IOException {
97 return; // nothing to do
101 for (int i = 0; i < size; i++) {
102 for (int j = 0; j < size; j++) {
103 add(synset[i], synset[j], false);
107 for (int i = 0; i < size; i++) {
108 add(synset[i], synset[0], false);