X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/icu/src/data/uax29/Lao.rbbi

diff --git a/lucene-java-3.4.0/lucene/contrib/icu/src/data/uax29/Lao.rbbi b/lucene-java-3.4.0/lucene/contrib/icu/src/data/uax29/Lao.rbbi
deleted file mode 100644
index 27dcaca..0000000
--- a/lucene-java-3.4.0/lucene/contrib/icu/src/data/uax29/Lao.rbbi
+++ /dev/null
@@ -1,192 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Parses Lao text, with syllable as token.
-#
-# The definition of Lao syllable is based from:
-#
-#   Syllabification of Lao Script for Line Breaking
-#   Phonpasit Phissamay, Valaxay Dalolay, Chitaphone Chanhsililath, Oulaiphone Silimasak, 
-#     Sarmad Hussain, Nadir Durrani, Science Technology and Environment Agency, CRULP
-#   http://www.panl10n.net/english/final%20reports/pdf%20files/Laos/LAO06.pdf
-#	http://www.panl10n.net/Presentations/Cambodia/Phonpassit/LineBreakingAlgo.pdf
-#
-# NOTE:
-# There are some ambiguities in Lao syllabification without additional processing, as mentioned in the paper.
-# For this reason, this RBBI grammar really only works with LaoBreakIterator, as it does this additional work.
-#
-# Syllable structure, where X is the nuclear consonant:
-#
-#           +----+
-#           | X5 |
-#           +----+
-#           | X4 |
-# +----+----+----+----+----+----+----+-----+
-# | X0 | X1 | X  | X6 | X7 | X8 | X9 | X10 |
-# +----+----+----+----+----+----+----+-----+
-#           | X2 |
-#           +----+
-#           | X3 |
-#           +----+
-#
-# X0 represents a vowel which occurs before the nuclear consonant. 
-# It can always define the beginning of syllable.
-$X0 = [\u0EC0-\u0EC4];
-# X1 is a combination consonant which comes before the nuclear consonant, 
-# but only if nuclear consonant is one of {àº àº àº¥ àº§ àº¼ àº¡ àº àº£}
-$X1 = [\u0EAB];
-# X represents the nuclear consonant.
-$X = [\u0E81-\u0EAE\u0EDC\u0EDD];
-# X2 is a combination consonant which comes after the nuclear consonant, 
-# which is placed under or next to the nuclear consonant.
-$X2 = [\u0EBC\u0EA3\u0EA7\u0EA5];
-# X3 represents a vowel which occurs under the nuclear consonant.
-$X3 = [\u0EB8\u0EB9];
-# X4 represents a vowel which occurs above the nuclear consonant. 
-$X4 = [\u0EB4-\u0EB7\u0ECD\u0EBB\u0EB1];
-# X5 represents a tone mark which occurs above the nuclear consonant or upper vowel.
-$X5 = [\u0EC8-\u0ECB];
-# X6 represents a consonant vowel, which occurs after the nuclear consonant. 
-# It functions when the syllable doesnât have any vowels. And it always exists with X8.
-$X6 = [\u0EA7\u0EAD\u0EBD];
-# X7 represents a final vowel. 
-# However X7_1 always represents the end of syllable and it never exists with tone mark.
-$X7 = [\u0EB0\u0EB2\u0EB3];
-# X8 represents an alternate consonant.
-$X8 = [\u0E81\u0E87\u0E8D\u0E94\u0E99\u0EA1\u0E9A\u0EA7];
-# X9 represents alternate consonants to pronounce foreign terms, it always exist with X10_3.
-$X9 = [\u0E88\u0EAA\u0E8A\u0E9E\u0E9F\u0EA5];
-# X10 represents a sign mark. 
-# It always occurs at the end of a syllable, but mostly people keep it separate from syllable.
-$X10 = [\u0EAF\u0EC6\u0ECC];
-
-# Section 1
-$X0_1 = [\u0EC0];
-$X4_1_2 = [\u0EB4\u0EB5];
-$X4_3_4 = [\u0EB6\u0EB7];
-$X4_6 = [\u0EBB];
-$X4_7 = [\u0EB1];
-$X6_2 = [\u0EAD];
-$X6_3 = [\u0EBD];
-$X7_1 = [\u0EB0];
-$X7_2 = [\u0EB2];
-$X10_1 = [\u0EAF];
-$X10_2 = [\u0EC6];
-$X10_3 = [\u0ECC];
-
-$Rule1_1 = $X0_1 ($X1)? $X ($X2)? ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-$Rule1_2 = $X0_1 ($X1)? $X ($X2)? $X4_1_2 ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-$Rule1_3 = $X0_1 ($X1)? $X ($X2)? $X4_3_4 ($X5)? $X6_2 ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-$Rule1_4 = $X0_1 ($X1)? $X ($X2)? ($X7_2)? $X7_1;
-$Rule1_5 = $X0_1 ($X1)? $X ($X2)? $X4_6 ($X5)? $X7_2 ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-$Rule1_6 = $X0_1 ($X1)? $X ($X2)? $X4_7 ($X5)? $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-$Rule1_7 = $X0_1 ($X1)? $X ($X2)? ($X4_7)? ($X5)? $X6_3 ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-$Rule1 = ($Rule1_1 | $Rule1_2 | $Rule1_3 | $Rule1_4 | $Rule1_5 | $Rule1_6 | $Rule1_7);
-
-# Section 2
-$X0_2 = [\u0EC1];
-
-$Rule2_1 = $X0_2 ($X1)? $X ($X2)? ($X5)? ($X6)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-$Rule2_2 = $X0_2 ($X1)? $X ($X2)? $X7_1;
-$Rule2_3 = $X0_2 ($X1)? $X ($X2)? $X4_7 ($X5)? $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?; 
-
-$Rule2 = ($Rule2_1 | $Rule2_2 | $Rule2_3);
-
-# Section 3
-$X0_3 = [\u0EC2];
-$X8_3 = [\u0E8D];
-$X8_8 = [\u0EA7];
-
-$Rule3_1 = $X0_3 ($X1)? $X ($X2)? ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-$Rule3_2 = $X0_3 ($X1)? $X ($X2)? $X7_1;
-$Rule3_3 = $X0_3 ($X1)? $X ($X2)? $X4_7 ($X5)? ($X8_3 | $X8_8);
-
-$Rule3 = ($Rule3_1 | $Rule3_2 | $Rule3_3);
-
-# Section 4
-$X0_4 = [\u0EC4];
-$X6_1 = [\u0EA7];
-
-$Rule4 = $X0_4 ($X1)? $X ($X2)? ($X5)? ($X6_1)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-# Section 5
-$X0_5 = [\u0EC3];
-
-$Rule5 = $X0_5 ($X1)? $X ($X2)? ($X5)? ($X6_1)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-# Section 6
-$Rule6 = ($X1)? $X ($X2)? $X3 ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-# Section 7
-$X4_1_4 = [\u0EB4-\u0EB7];
-
-$Rule7 = ($X1)? $X ($X2)? $X4_1_4 ($X5)? ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-# Section 8
-$X4_5 = [\u0ECD];
-
-$Rule8 = ($X1)? $X ($X2)? $X4_5 ($X5)? ($X7_2)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-# Section 9
-
-$Rule9_1 = ($X1)? $X ($X2)? $X4_6 ($X5)? $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-$Rule9_2 = ($X1)? $X ($X2)? $X4_6 ($X5)? $X6_1 $X7_1;
-
-$Rule9 = ($Rule9_1 | $Rule9_2);
-
-# Section 10
-$Rule10 = ($X1)? $X ($X2)? $X4_7 ($X5)? ($X6_1)? $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-# Section 11
-$Rule11 = ($X1)? $X ($X2)? ($X5)? $X6 $X8 ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-# Section 12
-$Rule12 = ($X1)? $X ($X2)? ($X5)? $X7_1;
-
-# Section 13
-$Rule13 = ($X1)? $X ($X2)? ($X5)? $X7_2 ($X8)? ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-# Section 14
-$X7_3 = [\u0EB3];
-
-$Rule14 = ($X1)? $X ($X2)? ($X5)? $X7_3 ($X9 $X10_3)? ($X10_2)? ($X10_1)?;
-
-$LaoSyllableEx = ($Rule1 | $Rule2 | $Rule3 | $Rule4 | $Rule5 | $Rule6 | $Rule7 | $Rule8 | $Rule9 | $Rule10 | $Rule11 | $Rule12 | $Rule13 | $Rule14);
-
-$WordJoin = [:Line_Break=Word_Joiner:];
-
-$LaoJoinedSyllableEx = $LaoSyllableEx ($WordJoin $LaoSyllableEx)*;
-
-#
-# default numerical definitions
-#
-$Extend       = [\p{Word_Break = Extend}];
-$Format       = [\p{Word_Break = Format}];
-$MidNumLet    = [\p{Word_Break = MidNumLet}];
-$MidNum       = [\p{Word_Break = MidNum}];
-$Numeric      = [\p{Word_Break = Numeric}];
-$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];                                                          
-$MidNumLetEx    = $MidNumLet    ($Extend |  $Format)*;
-$MidNumEx       = $MidNum       ($Extend |  $Format)*;
-$NumericEx      = $Numeric      ($Extend |  $Format)*;
-$ExtendNumLetEx = $ExtendNumLet ($Extend |  $Format)*;
-
-!!forward;
-
-$LaoJoinedSyllableEx {200};
-# default numeric rules
-$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)*  {100};