2 # Licensed to the Apache Software Foundation (ASF) under one or more
\r
3 # contributor license agreements. See the NOTICE file distributed with
\r
4 # this work for additional information regarding copyright ownership.
\r
5 # The ASF licenses this file to You under the Apache License, Version 2.0
\r
6 # (the "License"); you may not use this file except in compliance with
\r
7 # the License. You may obtain a copy of the License at
\r
9 # http://www.apache.org/licenses/LICENSE-2.0
\r
11 # Unless required by applicable law or agreed to in writing, software
\r
12 # distributed under the License is distributed on an "AS IS" BASIS,
\r
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
14 # See the License for the specific language governing permissions and
\r
15 # limitations under the License.
\r
18 # This is an example of rule tailoring for Hebrew.
\r
19 # In this example the single-quote is added to the Extend category
\r
20 # The double-quote is added to the MidLetter category.
\r
23 $CR = [\p{Word_Break = CR}];
\r
24 $LF = [\p{Word_Break = LF}];
\r
25 $Newline = [\p{Word_Break = Newline}];
\r
26 $Extend = [\p{Word_Break = Extend}\u0027];
\r
27 $Format = [\p{Word_Break = Format}];
\r
28 $ALetter = [\p{Word_Break = ALetter}];
\r
29 $MidNumLet = [\p{Word_Break = MidNumLet}];
\r
30 $MidLetter = [\p{Word_Break = MidLetter}\u0022];
\r
31 $MidNum = [\p{Word_Break = MidNum}];
\r
32 $Numeric = [\p{Word_Break = Numeric}];
\r
33 $ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
\r
34 $dictionary = [:LineBreak = Complex_Context:];
\r
35 $Control = [\p{Grapheme_Cluster_Break = Control}];
\r
36 $ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];
\r
38 $ALetterEx = $ALetterPlus ($Extend | $Format)*;
\r
39 $MidNumLetEx = $MidNumLet ($Extend | $Format)*;
\r
40 $MidLetterEx = $MidLetter ($Extend | $Format)*;
\r
41 $MidNumEx = $MidNum ($Extend | $Format)*;
\r
42 $NumericEx = $Numeric ($Extend | $Format)*;
\r
43 $ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
\r
48 [^$CR $LF $Newline]? ($Extend | $Format)+;
\r
51 $ALetterEx $ALetterEx {200};
\r
52 $ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200};
\r
53 $NumericEx $NumericEx {100};
\r
54 $ALetterEx $NumericEx {200};
\r
55 $NumericEx $ALetterEx {200};
\r
56 $NumericEx ($MidNumEx | $MidNumLetEx) $NumericEx {100};
\r
57 $ALetterEx $ExtendNumLetEx {200};
\r
58 $NumericEx $ExtendNumLetEx {100};
\r
59 $ExtendNumLetEx $ExtendNumLetEx {200};
\r
60 $ExtendNumLetEx $ALetterEx {200};
\r
61 $ExtendNumLetEx $NumericEx {100};
\r