2 # Licensed to the Apache Software Foundation (ASF) under one or more
\r
3 # contributor license agreements. See the NOTICE file distributed with
\r
4 # this work for additional information regarding copyright ownership.
\r
5 # The ASF licenses this file to You under the Apache License, Version 2.0
\r
6 # (the "License"); you may not use this file except in compliance with
\r
7 # the License. You may obtain a copy of the License at
\r
9 # http://www.apache.org/licenses/LICENSE-2.0
\r
11 # Unless required by applicable law or agreed to in writing, software
\r
12 # distributed under the License is distributed on an "AS IS" BASIS,
\r
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
14 # See the License for the specific language governing permissions and
\r
15 # limitations under the License.
\r
18 # Parses Myanmar text, with syllable as token.
\r
21 $Cons = [[:Other_Letter:]&[:Myanmar:]];
\r
25 $WordJoin = [:Line_Break=Word_Joiner:];
\r
28 # default numerical definitions
\r
30 $Extend = [\p{Word_Break = Extend}];
\r
31 $Format = [\p{Word_Break = Format}];
\r
32 $MidNumLet = [\p{Word_Break = MidNumLet}];
\r
33 $MidNum = [\p{Word_Break = MidNum}];
\r
34 $Numeric = [\p{Word_Break = Numeric}];
\r
35 $ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
\r
36 $MidNumLetEx = $MidNumLet ($Extend | $Format)*;
\r
37 $MidNumEx = $MidNum ($Extend | $Format)*;
\r
38 $NumericEx = $Numeric ($Extend | $Format)*;
\r
39 $ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
\r
41 $ConsEx = $Cons ($Extend | $Format)*;
\r
42 $AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*;
\r
43 $MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*;
\r
44 $MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*;
\r
47 $MyanmarJoinedSyllableEx {200};
\r
49 # default numeric rules
\r
50 $NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};
\r