# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Parses Myanmar text, with syllable as token. # $Cons = [[:Other_Letter:]&[:Myanmar:]]; $Virama = [\u1039]; $Asat = [\u103A]; $WordJoin = [:Line_Break=Word_Joiner:]; # # default numerical definitions # $Extend = [\p{Word_Break = Extend}]; $Format = [\p{Word_Break = Format}]; $MidNumLet = [\p{Word_Break = MidNumLet}]; $MidNum = [\p{Word_Break = MidNum}]; $Numeric = [\p{Word_Break = Numeric}]; $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; $MidNumLetEx = $MidNumLet ($Extend | $Format)*; $MidNumEx = $MidNum ($Extend | $Format)*; $NumericEx = $Numeric ($Extend | $Format)*; $ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; $ConsEx = $Cons ($Extend | $Format)*; $AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*; $MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*; $MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*; !!forward; $MyanmarJoinedSyllableEx {200}; # default numeric rules $NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};