X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/icu/src/data/utr30/DiacriticFolding.txt diff --git a/lucene-java-3.5.0/lucene/contrib/icu/src/data/utr30/DiacriticFolding.txt b/lucene-java-3.5.0/lucene/contrib/icu/src/data/utr30/DiacriticFolding.txt new file mode 100644 index 0000000..5e5f2de --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/icu/src/data/utr30/DiacriticFolding.txt @@ -0,0 +1,563 @@ +# Copyright 2001-2010 Unicode, Inc. +# +# Disclaimer +# +# This source code is provided as is by Unicode, Inc. No claims are +# made as to fitness for any particular purpose. No warranties of any +# kind are expressed or implied. The recipient agrees to determine +# applicability of information provided. If this file has been +# purchased on magnetic or optical media from Unicode, Inc., the +# sole remedy for any claim will be exchange of defective media +# within 90 days of receipt. +# +# Limitations on Rights to Redistribute This Code +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form +# for internal or external distribution as long as this notice +# remains attached. + +### Custom Normalization mappings for UTR#30 +### (http://www.unicode.org/reports/tr30/tr30-4.html) +### +### Created from Unicode 5.2 UCD +### + +# Removes diacritics, as defined by [:Diacritic:] +# These may or may not be combining marks +005E> +0060> +00B7> +02B9..02D7> +02DE> +02DF> +02E5..033F> +0342> +0346..034E> +0350..0357> +035D..0362> +0375> +0483..0487> +0559> +0591..05A1> +05A3..05BD> +05BF> +05C1> +05C2> +05C4> +064B..0652> +0657> +0658> +06DF> +06E0> +06E5> +06E6> +06EA..06EC> +0730..074A> +07A6..07B0> +07EB..07F5> +0818> +0819> +093C> +094D> +0951..0954> +0971> +09BC> +09CD> +0A3C> +0A4D> +0ABC> +0ACD> +0B3C> +0B4D> +0BCD> +0C4D> +0CBC> +0CCD> +0D4D> +0DCA> +0E47..0E4C> +0E4E> +0EC8..0ECC> +0F18> +0F19> +0F35> +0F37> +0F39> +0F3E> +0F3F> +0F82..0F84> +0F86> +0F87> +0FC6> +1037> +1039> +103A> +1087..108D> +108F> +109A> +109B> +17C9..17D3> +17DD> +1939..193B> +1A75..1A7C> +1A7F> +1B34> +1B44> +1B6B..1B73> +1BAA> +1C36> +1C37> +1C78..1C7D> +1CD0..1CE8> +1CED> +1D2F> +1D3B> +1D4E> +1DC4..1DCF> +1DFD..1DFF> +2CEF..2CF1> +2E2F> +302A..302F> +3099> +309A> +30FC> +A66F> +A67C> +A67D> +A67F> +A6F0> +A6F1> +A717..A721> +A788> +A8C4> +A8E0..A8F1> +A92B..A92E> +A953> +A9B3> +A9C0> +AA7B> +AABF..AAC2> +ABEC> +ABED> +FB1E> +FE20..FE26> +110B9> +110BA> +1D167..1D169> +1D16D..1D172> +1D17B..1D182> +1D185..1D18B> +1D1AA..1D1AD> + +# Latin script "composed" that do not further decompose, so decompose here +# These are from AsciiFoldingFilter +00E6>0061 0065 +00F0>0064 +00F8>006F +00FE>0074 0068 +0111>0064 +0127>0068 +0131>0069 +0138>0071 +0142>006C +014B>006E +0153>006F 0065 +0167>0074 +0180>0062 +0183>0062 +0185>0062 +0188>0063 +018C>0064 +018D>0064 +0192>0066 +0195>0068 0076 +0199>006B +019A>006C +#019B> +019E>006E +#01A3> +01A5>0070 +#01A8> +#01AA> +01AB>0074 +01AD>0074 +01B4>0079 +01B6>007A +#01B9> +#01BA> +01BB>0032 +01BD>0035 +#01BE> +01BF>0077 +01C0>007C +01C1>007C 007C +#01C2> +01C3>0021 +01DD>0065 +01E5>0047 +021D>007A +0221>0064 +0223>006F 0075 +0225>007A +0234>006C +0235>006E +0236>0074 +0237>006A +0238>0064 0062 +0239>0071 0070 +023C>0063 +023F>0073 +0240>007A +#0242> +0247>0065 +0249>006A +024B>0071 +024D>0072 +024F>0079 +0250>0061 +0251>0061 +0252>0061 +0253>0062 +0254>006F +0255>0063 +0256>0064 +0257>0064 +0258>0065 +0259>0061 +025A>0061 +025B>0065 +025C>0065 +025D>0065 +025E>0065 +025F>006A +0260>0067 +0261>0067 +0262>0047 +#0263> +#0264> +0265>0068 +0266>0068 +#0267> +0268>0069 +0269>0069 +026A>0049 +026B>006C +026C>006C +026D>006C +#026E> +026F>006D +0270>006D +0271>006D +0272>006E +0273>006E +0274>004E +0275>006F +0276>004F 0045 +#0277> +#0278> +#0279> +#027A> +#027B> +027C>0072 +027D>0072 +027E>0072 +027F>0072 +0280>0052 +0281>0052 +0282>0073 +#0283> +0284>006A +#0285> +#0286> +0287>0074 +0288>0074 +0289>0075 +#028A> +028B>0076 +028C>0076 +028D>0077 +028E>0079 +028F>0059 +0290>007A +0291>007A +#0292> +#0293> +#0294> +#0295> +#0296> +0297>0043 +0298>006F +0299>0042 +029A>0065 +029B>0047 +029C>0048 +029D>006A +029E>006B +029F>004C +02A0>0071 +#02A1> +#02A2> +02A3>0064 007A +#02A4> +02A5>0064 007A +02A6>0074 0073 +#02A7> +02A8>0074 0063 +02A9>0066 006E +02AA>006C 0073 +02AB>006C 007A +02AC>0077 0077 +#02AD> +02AE>0068 +02AF>0068 +1D00>0041 +1D01>0041 0045 +1D02>0061 0065 +1D03>0042 +1D04>0043 +1D05>0044 +1D06>0044 +1D07>0045 +1D08>0065 +1D09>0069 +1D0A>004A +1D0B>004B +1D0C>004C +1D0D>004D +1D0E>004E +1D0F>004F +1D10>004F +1D11>006F +#1D12> +1D13>006F +1D14>006F 0065 +1D15>004F 0055 +1D16>006F +1D17>006F +1D18>0050 +1D19>0052 +1D1A>0052 +1D1B>0054 +1D1C>0055 +1D1D>0075 +1D1E>0075 +1D1F>006D +1D20>0056 +1D21>0057 +1D22>005A +#1D23> +#1D24> +#1D25> +1D6B>0075 0065 +1D6C>0062 +1D6D>0064 +1D6E>0066 +1D6F>006D +1D70>006E +1D71>0070 +1D72>0072 +1D73>0072 +1D74>0073 +1D75>0074 +1D76>007A +1D77>0067 +1D79>0067 +1D7A>0074 0068 +1D7B>0049 +1D7C>0069 +1D7D>0070 +1D7E>0055 +#1D7F> +1D80>0062 +1D81>0064 +1D82>0066 +1D83>0067 +1D84>006B +1D85>006C +1D86>006D +1D87>006E +1D88>0070 +1D89>0072 +1D8A>0073 +#1D8B> +1D8C>0076 +1D8D>0078 +1D8E>007A +1D8F>0061 +1D90>0061 +1D91>0064 +1D92>0065 +1D93>0065 +1D94>0065 +1D95>0061 +1D96>0069 +1D97>006F +#1D98> +1D99>0075 +#1D9A> +1E9C>0073 +1E9D>0073 +1E9F>0064 +1EFB>006C 006C +1EFD>0076 +1EFF>0079 +214E>0066 +#2180> +#2181> +#2182> +2184>0063 +#2185> +#2186> +#2187> +#2188> +2C61>006C +2C65>0061 +2C66>0074 +2C68>0068 +2C6A>006B +2C6C>007A +2C71>0076 +2C73>0077 +2C74>0076 +2C76>0068 +#2C77> +2C78>0065 +#2C79> +2C7A>006F +2C7B>0045 +#A723> +#A725> +#A727> +A729>0074 007A +#A72B> +#A72D> +#A72F> +A730>0046 +A731>0053 +A733>0061 0061 +A735>0061 006F +A737>0061 0075 +A739>0061 0076 +A73B>0061 0076 +A73D>0061 0079 +A73F>0063 +A741>006B +A743>006B +A745>006B +A747>006C +A749>006C +A74B>006F +A74D>006F +A74F>006F 006F +A751>0070 +A753>0070 +A755>0070 +A757>0071 +A759>0071 +A75B>0072 +#A75D> +A75F>0076 +A761>0076 0079 +A763>007A +A765>0074 0068 +A767>0074 0068 +A769>0076 +#A76B> +#A76D> +#A76F> +#A771> +#A772> +#A773> +#A774> +#A775> +#A776> +#A777> +#A778> +A77A>0064 +A77C>0066 +A77F>0067 +A781>006C +A783>0072 +A785>0053 +A787>0074 +A78C>0027 +A7FB>0046 +A7FC>0070 +A7FD>004D +A7FE>0049 +A7FF>004D + +# Cyrillic script "composed" that do not further decompose, so decompose here +# These are from UTR#30 DiacriticFolding.txt + +047D>0461 +048B>0439 +048F>0440 +0491>0433 +0493>0433 +0495>0433 +0497>0436 +0499>0437 +049B>043A +049D>043A +049F>043A +04A3>043D +04A7>043F +04AB>0441 +04AD>0442 +04B1>04AF +04B3>0425 +04B7>04BC +04B9>0447 +04BF>04BC +04C4>043A +04C6>043B +04C8>043D +04CA>043D +04CC>04BC +04CE>043C + +# Additional signs and diacritic, from examination of [:Mark:]&[:Lm:] +0358..035C> +05A2> +05C5> +05C7> +0610..061A> +0640> +06D6..06DE> +06E1..06E4> +06E7..06E9> +06ED> +0653..0656> +0659..065F> +0670> +0711> +07FA> +0816..0817> +081B..0823> +0825..0827> +0829> +082A..082D> +0900>0901 +1714> +1734> +1DC0..1DC3> +1DD0..1DE6> +20D0..20F0> +2DE0..2DFF> +A670..A672> +A802> +10A3F> +11046> +1D165..1D166> +1D242..1D244> + +# Additional Arabic/Hebrew decompositions +05F3>0027 +05F4>0022 +0629>0647 +0649>064A +06A9>0643 +06CC>064A