1 package org.apache.lucene.analysis;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
23 import org.apache.lucene.util.ArrayUtil;
24 import org.apache.lucene.util.RamUsageEstimator;
27 * This class converts alphabetic, numeric, and symbolic Unicode characters
28 * which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
29 * block) into their ASCII equivalents, if one exists.
31 * Characters from the following Unicode blocks are converted; however, only
32 * those characters with reasonable ASCII alternatives are converted:
35 * <li>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a>
36 * <li>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a>
37 * <li>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a>
38 * <li>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a>
39 * <li>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a>
40 * <li>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a>
41 * <li>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a>
42 * <li>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a>
43 * <li>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a>
44 * <li>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a>
45 * <li>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a>
46 * <li>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a>
47 * <li>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a>
48 * <li>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a>
49 * <li>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a>
50 * <li>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a>
53 * See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
55 * The set of character conversions supported by this class is a superset of
56 * those supported by Lucene's {@link ISOLatin1AccentFilter} which strips
57 * accents from Latin1 characters. For example, 'à' will be replaced by
60 public final class ASCIIFoldingFilter extends TokenFilter {
61 public ASCIIFoldingFilter(TokenStream input)
66 private char[] output = new char[512];
67 private int outputPos;
68 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
71 public boolean incrementToken() throws IOException {
72 if (input.incrementToken()) {
73 final char[] buffer = termAtt.buffer();
74 final int length = termAtt.length();
76 // If no characters actually require rewriting then we
77 // just return token as-is:
78 for(int i = 0 ; i < length ; ++i) {
79 final char c = buffer[i];
82 foldToASCII(buffer, length);
83 termAtt.copyBuffer(output, 0, outputPos);
94 * Converts characters above ASCII to their ASCII equivalents. For example,
95 * accents are removed from accented characters.
96 * @param input The string to fold
97 * @param length The number of characters in the input string
99 public void foldToASCII(char[] input, int length)
101 // Worst-case length required:
102 final int maxSizeNeeded = 4 * length;
103 if (output.length < maxSizeNeeded) {
104 output = new char[ArrayUtil.oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
107 outputPos = foldToASCII(input, 0, output, 0, length);
111 * Converts characters above ASCII to their ASCII equivalents. For example,
112 * accents are removed from accented characters.
113 * @param input The characters to fold
114 * @param inputPos Index of the first character to fold
115 * @param output The result of the folding. Should be of size >= {@code length * 4}.
116 * @param outputPos Index of output where to put the result of the folding
117 * @param length The number of characters to fold
118 * @return length of output
121 public static final int foldToASCII(char input[], int inputPos, char output[], int outputPos, int length)
123 final int end = inputPos + length;
124 for (int pos = inputPos; pos < end ; ++pos) {
125 final char c = input[pos];
127 // Quick test: if it's not in range then just keep current character
129 output[outputPos++] = c;
132 case '\u00C0': // À [LATIN CAPITAL LETTER A WITH GRAVE]
133 case '\u00C1': // Á [LATIN CAPITAL LETTER A WITH ACUTE]
134 case '\u00C2': // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
135 case '\u00C3': // Ã [LATIN CAPITAL LETTER A WITH TILDE]
136 case '\u00C4': // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
137 case '\u00C5': // Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
138 case '\u0100': // Ā [LATIN CAPITAL LETTER A WITH MACRON]
139 case '\u0102': // Ă [LATIN CAPITAL LETTER A WITH BREVE]
140 case '\u0104': // Ą [LATIN CAPITAL LETTER A WITH OGONEK]
141 case '\u018F': // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
142 case '\u01CD': // Ǎ [LATIN CAPITAL LETTER A WITH CARON]
143 case '\u01DE': // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
144 case '\u01E0': // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
145 case '\u01FA': // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
146 case '\u0200': // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
147 case '\u0202': // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
148 case '\u0226': // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
149 case '\u023A': // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
150 case '\u1D00': // ᴀ [LATIN LETTER SMALL CAPITAL A]
151 case '\u1E00': // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
152 case '\u1EA0': // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
153 case '\u1EA2': // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
154 case '\u1EA4': // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
155 case '\u1EA6': // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
156 case '\u1EA8': // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
157 case '\u1EAA': // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
158 case '\u1EAC': // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
159 case '\u1EAE': // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
160 case '\u1EB0': // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
161 case '\u1EB2': // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
162 case '\u1EB4': // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
163 case '\u1EB6': // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
164 case '\u24B6': // Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
165 case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A]
166 output[outputPos++] = 'A';
168 case '\u00E0': // à [LATIN SMALL LETTER A WITH GRAVE]
169 case '\u00E1': // á [LATIN SMALL LETTER A WITH ACUTE]
170 case '\u00E2': // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
171 case '\u00E3': // ã [LATIN SMALL LETTER A WITH TILDE]
172 case '\u00E4': // ä [LATIN SMALL LETTER A WITH DIAERESIS]
173 case '\u00E5': // å [LATIN SMALL LETTER A WITH RING ABOVE]
174 case '\u0101': // ā [LATIN SMALL LETTER A WITH MACRON]
175 case '\u0103': // ă [LATIN SMALL LETTER A WITH BREVE]
176 case '\u0105': // ą [LATIN SMALL LETTER A WITH OGONEK]
177 case '\u01CE': // ǎ [LATIN SMALL LETTER A WITH CARON]
178 case '\u01DF': // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
179 case '\u01E1': // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
180 case '\u01FB': // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
181 case '\u0201': // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
182 case '\u0203': // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
183 case '\u0227': // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
184 case '\u0250': // ɐ [LATIN SMALL LETTER TURNED A]
185 case '\u0259': // ə [LATIN SMALL LETTER SCHWA]
186 case '\u025A': // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
187 case '\u1D8F': // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
188 case '\u1D95': // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
189 case '\u1E01': // ạ [LATIN SMALL LETTER A WITH RING BELOW]
190 case '\u1E9A': // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
191 case '\u1EA1': // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
192 case '\u1EA3': // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
193 case '\u1EA5': // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
194 case '\u1EA7': // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
195 case '\u1EA9': // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
196 case '\u1EAB': // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
197 case '\u1EAD': // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
198 case '\u1EAF': // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
199 case '\u1EB1': // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
200 case '\u1EB3': // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
201 case '\u1EB5': // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
202 case '\u1EB7': // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
203 case '\u2090': // ₐ [LATIN SUBSCRIPT SMALL LETTER A]
204 case '\u2094': // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
205 case '\u24D0': // ⓐ [CIRCLED LATIN SMALL LETTER A]
206 case '\u2C65': // ⱥ [LATIN SMALL LETTER A WITH STROKE]
207 case '\u2C6F': // Ɐ [LATIN CAPITAL LETTER TURNED A]
208 case '\uFF41': // a [FULLWIDTH LATIN SMALL LETTER A]
209 output[outputPos++] = 'a';
211 case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA]
212 output[outputPos++] = 'A';
213 output[outputPos++] = 'A';
215 case '\u00C6': // Æ [LATIN CAPITAL LETTER AE]
216 case '\u01E2': // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
217 case '\u01FC': // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
218 case '\u1D01': // ᴁ [LATIN LETTER SMALL CAPITAL AE]
219 output[outputPos++] = 'A';
220 output[outputPos++] = 'E';
222 case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO]
223 output[outputPos++] = 'A';
224 output[outputPos++] = 'O';
226 case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU]
227 output[outputPos++] = 'A';
228 output[outputPos++] = 'U';
230 case '\uA738': // Ꜹ [LATIN CAPITAL LETTER AV]
231 case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
232 output[outputPos++] = 'A';
233 output[outputPos++] = 'V';
235 case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY]
236 output[outputPos++] = 'A';
237 output[outputPos++] = 'Y';
239 case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
240 output[outputPos++] = '(';
241 output[outputPos++] = 'a';
242 output[outputPos++] = ')';
244 case '\uA733': // ꜳ [LATIN SMALL LETTER AA]
245 output[outputPos++] = 'a';
246 output[outputPos++] = 'a';
248 case '\u00E6': // æ [LATIN SMALL LETTER AE]
249 case '\u01E3': // ǣ [LATIN SMALL LETTER AE WITH MACRON]
250 case '\u01FD': // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
251 case '\u1D02': // ᴂ [LATIN SMALL LETTER TURNED AE]
252 output[outputPos++] = 'a';
253 output[outputPos++] = 'e';
255 case '\uA735': // ꜵ [LATIN SMALL LETTER AO]
256 output[outputPos++] = 'a';
257 output[outputPos++] = 'o';
259 case '\uA737': // ꜷ [LATIN SMALL LETTER AU]
260 output[outputPos++] = 'a';
261 output[outputPos++] = 'u';
263 case '\uA739': // ꜹ [LATIN SMALL LETTER AV]
264 case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
265 output[outputPos++] = 'a';
266 output[outputPos++] = 'v';
268 case '\uA73D': // ꜽ [LATIN SMALL LETTER AY]
269 output[outputPos++] = 'a';
270 output[outputPos++] = 'y';
272 case '\u0181': // Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
273 case '\u0182': // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
274 case '\u0243': // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
275 case '\u0299': // ʙ [LATIN LETTER SMALL CAPITAL B]
276 case '\u1D03': // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
277 case '\u1E02': // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
278 case '\u1E04': // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
279 case '\u1E06': // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
280 case '\u24B7': // Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
281 case '\uFF22': // B [FULLWIDTH LATIN CAPITAL LETTER B]
282 output[outputPos++] = 'B';
284 case '\u0180': // ƀ [LATIN SMALL LETTER B WITH STROKE]
285 case '\u0183': // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
286 case '\u0253': // ɓ [LATIN SMALL LETTER B WITH HOOK]
287 case '\u1D6C': // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
288 case '\u1D80': // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
289 case '\u1E03': // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
290 case '\u1E05': // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
291 case '\u1E07': // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
292 case '\u24D1': // ⓑ [CIRCLED LATIN SMALL LETTER B]
293 case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B]
294 output[outputPos++] = 'b';
296 case '\u249D': // ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
297 output[outputPos++] = '(';
298 output[outputPos++] = 'b';
299 output[outputPos++] = ')';
301 case '\u00C7': // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
302 case '\u0106': // Ć [LATIN CAPITAL LETTER C WITH ACUTE]
303 case '\u0108': // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
304 case '\u010A': // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
305 case '\u010C': // Č [LATIN CAPITAL LETTER C WITH CARON]
306 case '\u0187': // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
307 case '\u023B': // Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
308 case '\u0297': // ʗ [LATIN LETTER STRETCHED C]
309 case '\u1D04': // ᴄ [LATIN LETTER SMALL CAPITAL C]
310 case '\u1E08': // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
311 case '\u24B8': // Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
312 case '\uFF23': // C [FULLWIDTH LATIN CAPITAL LETTER C]
313 output[outputPos++] = 'C';
315 case '\u00E7': // ç [LATIN SMALL LETTER C WITH CEDILLA]
316 case '\u0107': // ć [LATIN SMALL LETTER C WITH ACUTE]
317 case '\u0109': // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
318 case '\u010B': // ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
319 case '\u010D': // č [LATIN SMALL LETTER C WITH CARON]
320 case '\u0188': // ƈ [LATIN SMALL LETTER C WITH HOOK]
321 case '\u023C': // ȼ [LATIN SMALL LETTER C WITH STROKE]
322 case '\u0255': // ɕ [LATIN SMALL LETTER C WITH CURL]
323 case '\u1E09': // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
324 case '\u2184': // ↄ [LATIN SMALL LETTER REVERSED C]
325 case '\u24D2': // ⓒ [CIRCLED LATIN SMALL LETTER C]
326 case '\uA73E': // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
327 case '\uA73F': // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
328 case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C]
329 output[outputPos++] = 'c';
331 case '\u249E': // ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
332 output[outputPos++] = '(';
333 output[outputPos++] = 'c';
334 output[outputPos++] = ')';
336 case '\u00D0': // Ð [LATIN CAPITAL LETTER ETH]
337 case '\u010E': // Ď [LATIN CAPITAL LETTER D WITH CARON]
338 case '\u0110': // Đ [LATIN CAPITAL LETTER D WITH STROKE]
339 case '\u0189': // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
340 case '\u018A': // Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
341 case '\u018B': // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
342 case '\u1D05': // ᴅ [LATIN LETTER SMALL CAPITAL D]
343 case '\u1D06': // ᴆ [LATIN LETTER SMALL CAPITAL ETH]
344 case '\u1E0A': // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
345 case '\u1E0C': // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
346 case '\u1E0E': // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
347 case '\u1E10': // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
348 case '\u1E12': // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
349 case '\u24B9': // Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
350 case '\uA779': // Ꝺ [LATIN CAPITAL LETTER INSULAR D]
351 case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D]
352 output[outputPos++] = 'D';
354 case '\u00F0': // ð [LATIN SMALL LETTER ETH]
355 case '\u010F': // ď [LATIN SMALL LETTER D WITH CARON]
356 case '\u0111': // đ [LATIN SMALL LETTER D WITH STROKE]
357 case '\u018C': // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
358 case '\u0221': // ȡ [LATIN SMALL LETTER D WITH CURL]
359 case '\u0256': // ɖ [LATIN SMALL LETTER D WITH TAIL]
360 case '\u0257': // ɗ [LATIN SMALL LETTER D WITH HOOK]
361 case '\u1D6D': // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
362 case '\u1D81': // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
363 case '\u1D91': // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
364 case '\u1E0B': // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
365 case '\u1E0D': // ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
366 case '\u1E0F': // ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
367 case '\u1E11': // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
368 case '\u1E13': // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
369 case '\u24D3': // ⓓ [CIRCLED LATIN SMALL LETTER D]
370 case '\uA77A': // ꝺ [LATIN SMALL LETTER INSULAR D]
371 case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D]
372 output[outputPos++] = 'd';
374 case '\u01C4': // DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
375 case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ]
376 output[outputPos++] = 'D';
377 output[outputPos++] = 'Z';
379 case '\u01C5': // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
380 case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
381 output[outputPos++] = 'D';
382 output[outputPos++] = 'z';
384 case '\u249F': // ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
385 output[outputPos++] = '(';
386 output[outputPos++] = 'd';
387 output[outputPos++] = ')';
389 case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH]
390 output[outputPos++] = 'd';
391 output[outputPos++] = 'b';
393 case '\u01C6': // dž [LATIN SMALL LETTER DZ WITH CARON]
394 case '\u01F3': // dz [LATIN SMALL LETTER DZ]
395 case '\u02A3': // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
396 case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
397 output[outputPos++] = 'd';
398 output[outputPos++] = 'z';
400 case '\u00C8': // È [LATIN CAPITAL LETTER E WITH GRAVE]
401 case '\u00C9': // É [LATIN CAPITAL LETTER E WITH ACUTE]
402 case '\u00CA': // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
403 case '\u00CB': // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
404 case '\u0112': // Ē [LATIN CAPITAL LETTER E WITH MACRON]
405 case '\u0114': // Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
406 case '\u0116': // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
407 case '\u0118': // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
408 case '\u011A': // Ě [LATIN CAPITAL LETTER E WITH CARON]
409 case '\u018E': // Ǝ [LATIN CAPITAL LETTER REVERSED E]
410 case '\u0190': // Ɛ [LATIN CAPITAL LETTER OPEN E]
411 case '\u0204': // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
412 case '\u0206': // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
413 case '\u0228': // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
414 case '\u0246': // Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
415 case '\u1D07': // ᴇ [LATIN LETTER SMALL CAPITAL E]
416 case '\u1E14': // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
417 case '\u1E16': // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
418 case '\u1E18': // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
419 case '\u1E1A': // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
420 case '\u1E1C': // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
421 case '\u1EB8': // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
422 case '\u1EBA': // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
423 case '\u1EBC': // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
424 case '\u1EBE': // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
425 case '\u1EC0': // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
426 case '\u1EC2': // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
427 case '\u1EC4': // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
428 case '\u1EC6': // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
429 case '\u24BA': // Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
430 case '\u2C7B': // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
431 case '\uFF25': // E [FULLWIDTH LATIN CAPITAL LETTER E]
432 output[outputPos++] = 'E';
434 case '\u00E8': // è [LATIN SMALL LETTER E WITH GRAVE]
435 case '\u00E9': // é [LATIN SMALL LETTER E WITH ACUTE]
436 case '\u00EA': // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
437 case '\u00EB': // ë [LATIN SMALL LETTER E WITH DIAERESIS]
438 case '\u0113': // ē [LATIN SMALL LETTER E WITH MACRON]
439 case '\u0115': // ĕ [LATIN SMALL LETTER E WITH BREVE]
440 case '\u0117': // ė [LATIN SMALL LETTER E WITH DOT ABOVE]
441 case '\u0119': // ę [LATIN SMALL LETTER E WITH OGONEK]
442 case '\u011B': // ě [LATIN SMALL LETTER E WITH CARON]
443 case '\u01DD': // ǝ [LATIN SMALL LETTER TURNED E]
444 case '\u0205': // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
445 case '\u0207': // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
446 case '\u0229': // ȩ [LATIN SMALL LETTER E WITH CEDILLA]
447 case '\u0247': // ɇ [LATIN SMALL LETTER E WITH STROKE]
448 case '\u0258': // ɘ [LATIN SMALL LETTER REVERSED E]
449 case '\u025B': // ɛ [LATIN SMALL LETTER OPEN E]
450 case '\u025C': // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
451 case '\u025D': // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
452 case '\u025E': // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
453 case '\u029A': // ʚ [LATIN SMALL LETTER CLOSED OPEN E]
454 case '\u1D08': // ᴈ [LATIN SMALL LETTER TURNED OPEN E]
455 case '\u1D92': // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
456 case '\u1D93': // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
457 case '\u1D94': // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
458 case '\u1E15': // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
459 case '\u1E17': // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
460 case '\u1E19': // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
461 case '\u1E1B': // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
462 case '\u1E1D': // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
463 case '\u1EB9': // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
464 case '\u1EBB': // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
465 case '\u1EBD': // ẽ [LATIN SMALL LETTER E WITH TILDE]
466 case '\u1EBF': // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
467 case '\u1EC1': // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
468 case '\u1EC3': // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
469 case '\u1EC5': // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
470 case '\u1EC7': // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
471 case '\u2091': // ₑ [LATIN SUBSCRIPT SMALL LETTER E]
472 case '\u24D4': // ⓔ [CIRCLED LATIN SMALL LETTER E]
473 case '\u2C78': // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
474 case '\uFF45': // e [FULLWIDTH LATIN SMALL LETTER E]
475 output[outputPos++] = 'e';
477 case '\u24A0': // ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
478 output[outputPos++] = '(';
479 output[outputPos++] = 'e';
480 output[outputPos++] = ')';
482 case '\u0191': // Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
483 case '\u1E1E': // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
484 case '\u24BB': // Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
485 case '\uA730': // ꜰ [LATIN LETTER SMALL CAPITAL F]
486 case '\uA77B': // Ꝼ [LATIN CAPITAL LETTER INSULAR F]
487 case '\uA7FB': // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
488 case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F]
489 output[outputPos++] = 'F';
491 case '\u0192': // ƒ [LATIN SMALL LETTER F WITH HOOK]
492 case '\u1D6E': // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
493 case '\u1D82': // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
494 case '\u1E1F': // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
495 case '\u1E9B': // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
496 case '\u24D5': // ⓕ [CIRCLED LATIN SMALL LETTER F]
497 case '\uA77C': // ꝼ [LATIN SMALL LETTER INSULAR F]
498 case '\uFF46': // f [FULLWIDTH LATIN SMALL LETTER F]
499 output[outputPos++] = 'f';
501 case '\u24A1': // ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
502 output[outputPos++] = '(';
503 output[outputPos++] = 'f';
504 output[outputPos++] = ')';
506 case '\uFB00': // ff [LATIN SMALL LIGATURE FF]
507 output[outputPos++] = 'f';
508 output[outputPos++] = 'f';
510 case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI]
511 output[outputPos++] = 'f';
512 output[outputPos++] = 'f';
513 output[outputPos++] = 'i';
515 case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL]
516 output[outputPos++] = 'f';
517 output[outputPos++] = 'f';
518 output[outputPos++] = 'l';
520 case '\uFB01': // fi [LATIN SMALL LIGATURE FI]
521 output[outputPos++] = 'f';
522 output[outputPos++] = 'i';
524 case '\uFB02': // fl [LATIN SMALL LIGATURE FL]
525 output[outputPos++] = 'f';
526 output[outputPos++] = 'l';
528 case '\u011C': // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
529 case '\u011E': // Ğ [LATIN CAPITAL LETTER G WITH BREVE]
530 case '\u0120': // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
531 case '\u0122': // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
532 case '\u0193': // Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
533 case '\u01E4': // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
534 case '\u01E5': // ǥ [LATIN SMALL LETTER G WITH STROKE]
535 case '\u01E6': // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
536 case '\u01E7': // ǧ [LATIN SMALL LETTER G WITH CARON]
537 case '\u01F4': // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
538 case '\u0262': // ɢ [LATIN LETTER SMALL CAPITAL G]
539 case '\u029B': // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
540 case '\u1E20': // Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
541 case '\u24BC': // Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
542 case '\uA77D': // Ᵹ [LATIN CAPITAL LETTER INSULAR G]
543 case '\uA77E': // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
544 case '\uFF27': // G [FULLWIDTH LATIN CAPITAL LETTER G]
545 output[outputPos++] = 'G';
547 case '\u011D': // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
548 case '\u011F': // ğ [LATIN SMALL LETTER G WITH BREVE]
549 case '\u0121': // ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
550 case '\u0123': // ģ [LATIN SMALL LETTER G WITH CEDILLA]
551 case '\u01F5': // ǵ [LATIN SMALL LETTER G WITH ACUTE]
552 case '\u0260': // ɠ [LATIN SMALL LETTER G WITH HOOK]
553 case '\u0261': // ɡ [LATIN SMALL LETTER SCRIPT G]
554 case '\u1D77': // ᵷ [LATIN SMALL LETTER TURNED G]
555 case '\u1D79': // ᵹ [LATIN SMALL LETTER INSULAR G]
556 case '\u1D83': // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
557 case '\u1E21': // ḡ [LATIN SMALL LETTER G WITH MACRON]
558 case '\u24D6': // ⓖ [CIRCLED LATIN SMALL LETTER G]
559 case '\uA77F': // ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
560 case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G]
561 output[outputPos++] = 'g';
563 case '\u24A2': // ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
564 output[outputPos++] = '(';
565 output[outputPos++] = 'g';
566 output[outputPos++] = ')';
568 case '\u0124': // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
569 case '\u0126': // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
570 case '\u021E': // Ȟ [LATIN CAPITAL LETTER H WITH CARON]
571 case '\u029C': // ʜ [LATIN LETTER SMALL CAPITAL H]
572 case '\u1E22': // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
573 case '\u1E24': // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
574 case '\u1E26': // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
575 case '\u1E28': // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
576 case '\u1E2A': // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
577 case '\u24BD': // Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
578 case '\u2C67': // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
579 case '\u2C75': // Ⱶ [LATIN CAPITAL LETTER HALF H]
580 case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H]
581 output[outputPos++] = 'H';
583 case '\u0125': // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
584 case '\u0127': // ħ [LATIN SMALL LETTER H WITH STROKE]
585 case '\u021F': // ȟ [LATIN SMALL LETTER H WITH CARON]
586 case '\u0265': // ɥ [LATIN SMALL LETTER TURNED H]
587 case '\u0266': // ɦ [LATIN SMALL LETTER H WITH HOOK]
588 case '\u02AE': // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
589 case '\u02AF': // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
590 case '\u1E23': // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
591 case '\u1E25': // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
592 case '\u1E27': // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
593 case '\u1E29': // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
594 case '\u1E2B': // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
595 case '\u1E96': // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
596 case '\u24D7': // ⓗ [CIRCLED LATIN SMALL LETTER H]
597 case '\u2C68': // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
598 case '\u2C76': // ⱶ [LATIN SMALL LETTER HALF H]
599 case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H]
600 output[outputPos++] = 'h';
602 case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
603 output[outputPos++] = 'H';
604 output[outputPos++] = 'V';
606 case '\u24A3': // ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
607 output[outputPos++] = '(';
608 output[outputPos++] = 'h';
609 output[outputPos++] = ')';
611 case '\u0195': // ƕ [LATIN SMALL LETTER HV]
612 output[outputPos++] = 'h';
613 output[outputPos++] = 'v';
615 case '\u00CC': // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
616 case '\u00CD': // Í [LATIN CAPITAL LETTER I WITH ACUTE]
617 case '\u00CE': // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
618 case '\u00CF': // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
619 case '\u0128': // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
620 case '\u012A': // Ī [LATIN CAPITAL LETTER I WITH MACRON]
621 case '\u012C': // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
622 case '\u012E': // Į [LATIN CAPITAL LETTER I WITH OGONEK]
623 case '\u0130': // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
624 case '\u0196': // Ɩ [LATIN CAPITAL LETTER IOTA]
625 case '\u0197': // Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
626 case '\u01CF': // Ǐ [LATIN CAPITAL LETTER I WITH CARON]
627 case '\u0208': // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
628 case '\u020A': // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
629 case '\u026A': // ɪ [LATIN LETTER SMALL CAPITAL I]
630 case '\u1D7B': // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
631 case '\u1E2C': // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
632 case '\u1E2E': // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
633 case '\u1EC8': // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
634 case '\u1ECA': // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
635 case '\u24BE': // Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
636 case '\uA7FE': // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
637 case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I]
638 output[outputPos++] = 'I';
640 case '\u00EC': // ì [LATIN SMALL LETTER I WITH GRAVE]
641 case '\u00ED': // í [LATIN SMALL LETTER I WITH ACUTE]
642 case '\u00EE': // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
643 case '\u00EF': // ï [LATIN SMALL LETTER I WITH DIAERESIS]
644 case '\u0129': // ĩ [LATIN SMALL LETTER I WITH TILDE]
645 case '\u012B': // ī [LATIN SMALL LETTER I WITH MACRON]
646 case '\u012D': // ĭ [LATIN SMALL LETTER I WITH BREVE]
647 case '\u012F': // į [LATIN SMALL LETTER I WITH OGONEK]
648 case '\u0131': // ı [LATIN SMALL LETTER DOTLESS I]
649 case '\u01D0': // ǐ [LATIN SMALL LETTER I WITH CARON]
650 case '\u0209': // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
651 case '\u020B': // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
652 case '\u0268': // ɨ [LATIN SMALL LETTER I WITH STROKE]
653 case '\u1D09': // ᴉ [LATIN SMALL LETTER TURNED I]
654 case '\u1D62': // ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
655 case '\u1D7C': // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
656 case '\u1D96': // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
657 case '\u1E2D': // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
658 case '\u1E2F': // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
659 case '\u1EC9': // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
660 case '\u1ECB': // ị [LATIN SMALL LETTER I WITH DOT BELOW]
661 case '\u2071': // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
662 case '\u24D8': // ⓘ [CIRCLED LATIN SMALL LETTER I]
663 case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I]
664 output[outputPos++] = 'i';
666 case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ]
667 output[outputPos++] = 'I';
668 output[outputPos++] = 'J';
670 case '\u24A4': // ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
671 output[outputPos++] = '(';
672 output[outputPos++] = 'i';
673 output[outputPos++] = ')';
675 case '\u0133': // ij [LATIN SMALL LIGATURE IJ]
676 output[outputPos++] = 'i';
677 output[outputPos++] = 'j';
679 case '\u0134': // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
680 case '\u0248': // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
681 case '\u1D0A': // ᴊ [LATIN LETTER SMALL CAPITAL J]
682 case '\u24BF': // Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
683 case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J]
684 output[outputPos++] = 'J';
686 case '\u0135': // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
687 case '\u01F0': // ǰ [LATIN SMALL LETTER J WITH CARON]
688 case '\u0237': // ȷ [LATIN SMALL LETTER DOTLESS J]
689 case '\u0249': // ɉ [LATIN SMALL LETTER J WITH STROKE]
690 case '\u025F': // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
691 case '\u0284': // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
692 case '\u029D': // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
693 case '\u24D9': // ⓙ [CIRCLED LATIN SMALL LETTER J]
694 case '\u2C7C': // ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
695 case '\uFF4A': // j [FULLWIDTH LATIN SMALL LETTER J]
696 output[outputPos++] = 'j';
698 case '\u24A5': // ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
699 output[outputPos++] = '(';
700 output[outputPos++] = 'j';
701 output[outputPos++] = ')';
703 case '\u0136': // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
704 case '\u0198': // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
705 case '\u01E8': // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
706 case '\u1D0B': // ᴋ [LATIN LETTER SMALL CAPITAL K]
707 case '\u1E30': // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
708 case '\u1E32': // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
709 case '\u1E34': // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
710 case '\u24C0': // Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
711 case '\u2C69': // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
712 case '\uA740': // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
713 case '\uA742': // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
714 case '\uA744': // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
715 case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K]
716 output[outputPos++] = 'K';
718 case '\u0137': // ķ [LATIN SMALL LETTER K WITH CEDILLA]
719 case '\u0199': // ƙ [LATIN SMALL LETTER K WITH HOOK]
720 case '\u01E9': // ǩ [LATIN SMALL LETTER K WITH CARON]
721 case '\u029E': // ʞ [LATIN SMALL LETTER TURNED K]
722 case '\u1D84': // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
723 case '\u1E31': // ḱ [LATIN SMALL LETTER K WITH ACUTE]
724 case '\u1E33': // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
725 case '\u1E35': // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
726 case '\u24DA': // ⓚ [CIRCLED LATIN SMALL LETTER K]
727 case '\u2C6A': // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
728 case '\uA741': // ꝁ [LATIN SMALL LETTER K WITH STROKE]
729 case '\uA743': // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
730 case '\uA745': // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
731 case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K]
732 output[outputPos++] = 'k';
734 case '\u24A6': // ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
735 output[outputPos++] = '(';
736 output[outputPos++] = 'k';
737 output[outputPos++] = ')';
739 case '\u0139': // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
740 case '\u013B': // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
741 case '\u013D': // Ľ [LATIN CAPITAL LETTER L WITH CARON]
742 case '\u013F': // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
743 case '\u0141': // Ł [LATIN CAPITAL LETTER L WITH STROKE]
744 case '\u023D': // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
745 case '\u029F': // ʟ [LATIN LETTER SMALL CAPITAL L]
746 case '\u1D0C': // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
747 case '\u1E36': // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
748 case '\u1E38': // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
749 case '\u1E3A': // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
750 case '\u1E3C': // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
751 case '\u24C1': // Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
752 case '\u2C60': // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
753 case '\u2C62': // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
754 case '\uA746': // Ꝇ [LATIN CAPITAL LETTER BROKEN L]
755 case '\uA748': // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
756 case '\uA780': // Ꞁ [LATIN CAPITAL LETTER TURNED L]
757 case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L]
758 output[outputPos++] = 'L';
760 case '\u013A': // ĺ [LATIN SMALL LETTER L WITH ACUTE]
761 case '\u013C': // ļ [LATIN SMALL LETTER L WITH CEDILLA]
762 case '\u013E': // ľ [LATIN SMALL LETTER L WITH CARON]
763 case '\u0140': // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
764 case '\u0142': // ł [LATIN SMALL LETTER L WITH STROKE]
765 case '\u019A': // ƚ [LATIN SMALL LETTER L WITH BAR]
766 case '\u0234': // ȴ [LATIN SMALL LETTER L WITH CURL]
767 case '\u026B': // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
768 case '\u026C': // ɬ [LATIN SMALL LETTER L WITH BELT]
769 case '\u026D': // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
770 case '\u1D85': // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
771 case '\u1E37': // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
772 case '\u1E39': // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
773 case '\u1E3B': // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
774 case '\u1E3D': // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
775 case '\u24DB': // ⓛ [CIRCLED LATIN SMALL LETTER L]
776 case '\u2C61': // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
777 case '\uA747': // ꝇ [LATIN SMALL LETTER BROKEN L]
778 case '\uA749': // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
779 case '\uA781': // ꞁ [LATIN SMALL LETTER TURNED L]
780 case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L]
781 output[outputPos++] = 'l';
783 case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ]
784 output[outputPos++] = 'L';
785 output[outputPos++] = 'J';
787 case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
788 output[outputPos++] = 'L';
789 output[outputPos++] = 'L';
791 case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
792 output[outputPos++] = 'L';
793 output[outputPos++] = 'j';
795 case '\u24A7': // ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
796 output[outputPos++] = '(';
797 output[outputPos++] = 'l';
798 output[outputPos++] = ')';
800 case '\u01C9': // lj [LATIN SMALL LETTER LJ]
801 output[outputPos++] = 'l';
802 output[outputPos++] = 'j';
804 case '\u1EFB': // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
805 output[outputPos++] = 'l';
806 output[outputPos++] = 'l';
808 case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH]
809 output[outputPos++] = 'l';
810 output[outputPos++] = 's';
812 case '\u02AB': // ʫ [LATIN SMALL LETTER LZ DIGRAPH]
813 output[outputPos++] = 'l';
814 output[outputPos++] = 'z';
816 case '\u019C': // Ɯ [LATIN CAPITAL LETTER TURNED M]
817 case '\u1D0D': // ᴍ [LATIN LETTER SMALL CAPITAL M]
818 case '\u1E3E': // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
819 case '\u1E40': // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
820 case '\u1E42': // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
821 case '\u24C2': // Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
822 case '\u2C6E': // Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
823 case '\uA7FD': // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
824 case '\uA7FF': // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
825 case '\uFF2D': // M [FULLWIDTH LATIN CAPITAL LETTER M]
826 output[outputPos++] = 'M';
828 case '\u026F': // ɯ [LATIN SMALL LETTER TURNED M]
829 case '\u0270': // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
830 case '\u0271': // ɱ [LATIN SMALL LETTER M WITH HOOK]
831 case '\u1D6F': // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
832 case '\u1D86': // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
833 case '\u1E3F': // ḿ [LATIN SMALL LETTER M WITH ACUTE]
834 case '\u1E41': // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
835 case '\u1E43': // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
836 case '\u24DC': // ⓜ [CIRCLED LATIN SMALL LETTER M]
837 case '\uFF4D': // m [FULLWIDTH LATIN SMALL LETTER M]
838 output[outputPos++] = 'm';
840 case '\u24A8': // ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
841 output[outputPos++] = '(';
842 output[outputPos++] = 'm';
843 output[outputPos++] = ')';
845 case '\u00D1': // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
846 case '\u0143': // Ń [LATIN CAPITAL LETTER N WITH ACUTE]
847 case '\u0145': // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
848 case '\u0147': // Ň [LATIN CAPITAL LETTER N WITH CARON]
849 case '\u014A': // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
850 case '\u019D': // Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
851 case '\u01F8': // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
852 case '\u0220': // Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
853 case '\u0274': // ɴ [LATIN LETTER SMALL CAPITAL N]
854 case '\u1D0E': // ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
855 case '\u1E44': // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
856 case '\u1E46': // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
857 case '\u1E48': // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
858 case '\u1E4A': // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
859 case '\u24C3': // Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
860 case '\uFF2E': // N [FULLWIDTH LATIN CAPITAL LETTER N]
861 output[outputPos++] = 'N';
863 case '\u00F1': // ñ [LATIN SMALL LETTER N WITH TILDE]
864 case '\u0144': // ń [LATIN SMALL LETTER N WITH ACUTE]
865 case '\u0146': // ņ [LATIN SMALL LETTER N WITH CEDILLA]
866 case '\u0148': // ň [LATIN SMALL LETTER N WITH CARON]
867 case '\u0149': // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
868 case '\u014B': // ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
869 case '\u019E': // ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
870 case '\u01F9': // ǹ [LATIN SMALL LETTER N WITH GRAVE]
871 case '\u0235': // ȵ [LATIN SMALL LETTER N WITH CURL]
872 case '\u0272': // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
873 case '\u0273': // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
874 case '\u1D70': // ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
875 case '\u1D87': // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
876 case '\u1E45': // ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
877 case '\u1E47': // ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
878 case '\u1E49': // ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
879 case '\u1E4B': // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
880 case '\u207F': // ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
881 case '\u24DD': // ⓝ [CIRCLED LATIN SMALL LETTER N]
882 case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N]
883 output[outputPos++] = 'n';
885 case '\u01CA': // NJ [LATIN CAPITAL LETTER NJ]
886 output[outputPos++] = 'N';
887 output[outputPos++] = 'J';
889 case '\u01CB': // Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
890 output[outputPos++] = 'N';
891 output[outputPos++] = 'j';
893 case '\u24A9': // ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
894 output[outputPos++] = '(';
895 output[outputPos++] = 'n';
896 output[outputPos++] = ')';
898 case '\u01CC': // nj [LATIN SMALL LETTER NJ]
899 output[outputPos++] = 'n';
900 output[outputPos++] = 'j';
902 case '\u00D2': // Ò [LATIN CAPITAL LETTER O WITH GRAVE]
903 case '\u00D3': // Ó [LATIN CAPITAL LETTER O WITH ACUTE]
904 case '\u00D4': // Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
905 case '\u00D5': // Õ [LATIN CAPITAL LETTER O WITH TILDE]
906 case '\u00D6': // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
907 case '\u00D8': // Ø [LATIN CAPITAL LETTER O WITH STROKE]
908 case '\u014C': // Ō [LATIN CAPITAL LETTER O WITH MACRON]
909 case '\u014E': // Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
910 case '\u0150': // Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
911 case '\u0186': // Ɔ [LATIN CAPITAL LETTER OPEN O]
912 case '\u019F': // Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
913 case '\u01A0': // Ơ [LATIN CAPITAL LETTER O WITH HORN]
914 case '\u01D1': // Ǒ [LATIN CAPITAL LETTER O WITH CARON]
915 case '\u01EA': // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
916 case '\u01EC': // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
917 case '\u01FE': // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
918 case '\u020C': // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
919 case '\u020E': // Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
920 case '\u022A': // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
921 case '\u022C': // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
922 case '\u022E': // Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
923 case '\u0230': // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
924 case '\u1D0F': // ᴏ [LATIN LETTER SMALL CAPITAL O]
925 case '\u1D10': // ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
926 case '\u1E4C': // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
927 case '\u1E4E': // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
928 case '\u1E50': // Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
929 case '\u1E52': // Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
930 case '\u1ECC': // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
931 case '\u1ECE': // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
932 case '\u1ED0': // Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
933 case '\u1ED2': // Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
934 case '\u1ED4': // Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
935 case '\u1ED6': // Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
936 case '\u1ED8': // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
937 case '\u1EDA': // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
938 case '\u1EDC': // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
939 case '\u1EDE': // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
940 case '\u1EE0': // Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
941 case '\u1EE2': // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
942 case '\u24C4': // Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
943 case '\uA74A': // Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
944 case '\uA74C': // Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
945 case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O]
946 output[outputPos++] = 'O';
948 case '\u00F2': // ò [LATIN SMALL LETTER O WITH GRAVE]
949 case '\u00F3': // ó [LATIN SMALL LETTER O WITH ACUTE]
950 case '\u00F4': // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
951 case '\u00F5': // õ [LATIN SMALL LETTER O WITH TILDE]
952 case '\u00F6': // ö [LATIN SMALL LETTER O WITH DIAERESIS]
953 case '\u00F8': // ø [LATIN SMALL LETTER O WITH STROKE]
954 case '\u014D': // ō [LATIN SMALL LETTER O WITH MACRON]
955 case '\u014F': // ŏ [LATIN SMALL LETTER O WITH BREVE]
956 case '\u0151': // ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
957 case '\u01A1': // ơ [LATIN SMALL LETTER O WITH HORN]
958 case '\u01D2': // ǒ [LATIN SMALL LETTER O WITH CARON]
959 case '\u01EB': // ǫ [LATIN SMALL LETTER O WITH OGONEK]
960 case '\u01ED': // ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
961 case '\u01FF': // ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
962 case '\u020D': // ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
963 case '\u020F': // ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
964 case '\u022B': // ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
965 case '\u022D': // ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
966 case '\u022F': // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
967 case '\u0231': // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
968 case '\u0254': // ɔ [LATIN SMALL LETTER OPEN O]
969 case '\u0275': // ɵ [LATIN SMALL LETTER BARRED O]
970 case '\u1D16': // ᴖ [LATIN SMALL LETTER TOP HALF O]
971 case '\u1D17': // ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
972 case '\u1D97': // ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
973 case '\u1E4D': // ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
974 case '\u1E4F': // ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
975 case '\u1E51': // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
976 case '\u1E53': // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
977 case '\u1ECD': // ọ [LATIN SMALL LETTER O WITH DOT BELOW]
978 case '\u1ECF': // ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
979 case '\u1ED1': // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
980 case '\u1ED3': // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
981 case '\u1ED5': // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
982 case '\u1ED7': // ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
983 case '\u1ED9': // ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
984 case '\u1EDB': // ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
985 case '\u1EDD': // ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
986 case '\u1EDF': // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
987 case '\u1EE1': // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
988 case '\u1EE3': // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
989 case '\u2092': // ₒ [LATIN SUBSCRIPT SMALL LETTER O]
990 case '\u24DE': // ⓞ [CIRCLED LATIN SMALL LETTER O]
991 case '\u2C7A': // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
992 case '\uA74B': // ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
993 case '\uA74D': // ꝍ [LATIN SMALL LETTER O WITH LOOP]
994 case '\uFF4F': // o [FULLWIDTH LATIN SMALL LETTER O]
995 output[outputPos++] = 'o';
997 case '\u0152': // Œ [LATIN CAPITAL LIGATURE OE]
998 case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE]
999 output[outputPos++] = 'O';
1000 output[outputPos++] = 'E';
1002 case '\uA74E': // Ꝏ [LATIN CAPITAL LETTER OO]
1003 output[outputPos++] = 'O';
1004 output[outputPos++] = 'O';
1006 case '\u0222': // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
1007 case '\u1D15': // ᴕ [LATIN LETTER SMALL CAPITAL OU]
1008 output[outputPos++] = 'O';
1009 output[outputPos++] = 'U';
1011 case '\u24AA': // ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
1012 output[outputPos++] = '(';
1013 output[outputPos++] = 'o';
1014 output[outputPos++] = ')';
1016 case '\u0153': // œ [LATIN SMALL LIGATURE OE]
1017 case '\u1D14': // ᴔ [LATIN SMALL LETTER TURNED OE]
1018 output[outputPos++] = 'o';
1019 output[outputPos++] = 'e';
1021 case '\uA74F': // ꝏ [LATIN SMALL LETTER OO]
1022 output[outputPos++] = 'o';
1023 output[outputPos++] = 'o';
1025 case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
1026 output[outputPos++] = 'o';
1027 output[outputPos++] = 'u';
1029 case '\u01A4': // Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
1030 case '\u1D18': // ᴘ [LATIN LETTER SMALL CAPITAL P]
1031 case '\u1E54': // Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
1032 case '\u1E56': // Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
1033 case '\u24C5': // Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
1034 case '\u2C63': // Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
1035 case '\uA750': // Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
1036 case '\uA752': // Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
1037 case '\uA754': // Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
1038 case '\uFF30': // P [FULLWIDTH LATIN CAPITAL LETTER P]
1039 output[outputPos++] = 'P';
1041 case '\u01A5': // ƥ [LATIN SMALL LETTER P WITH HOOK]
1042 case '\u1D71': // ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
1043 case '\u1D7D': // ᵽ [LATIN SMALL LETTER P WITH STROKE]
1044 case '\u1D88': // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
1045 case '\u1E55': // ṕ [LATIN SMALL LETTER P WITH ACUTE]
1046 case '\u1E57': // ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
1047 case '\u24DF': // ⓟ [CIRCLED LATIN SMALL LETTER P]
1048 case '\uA751': // ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
1049 case '\uA753': // ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
1050 case '\uA755': // ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
1051 case '\uA7FC': // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
1052 case '\uFF50': // p [FULLWIDTH LATIN SMALL LETTER P]
1053 output[outputPos++] = 'p';
1055 case '\u24AB': // ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
1056 output[outputPos++] = '(';
1057 output[outputPos++] = 'p';
1058 output[outputPos++] = ')';
1060 case '\u024A': // Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
1061 case '\u24C6': // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
1062 case '\uA756': // Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
1063 case '\uA758': // Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
1064 case '\uFF31': // Q [FULLWIDTH LATIN CAPITAL LETTER Q]
1065 output[outputPos++] = 'Q';
1067 case '\u0138': // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
1068 case '\u024B': // ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
1069 case '\u02A0': // ʠ [LATIN SMALL LETTER Q WITH HOOK]
1070 case '\u24E0': // ⓠ [CIRCLED LATIN SMALL LETTER Q]
1071 case '\uA757': // ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
1072 case '\uA759': // ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
1073 case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q]
1074 output[outputPos++] = 'q';
1076 case '\u24AC': // ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
1077 output[outputPos++] = '(';
1078 output[outputPos++] = 'q';
1079 output[outputPos++] = ')';
1081 case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH]
1082 output[outputPos++] = 'q';
1083 output[outputPos++] = 'p';
1085 case '\u0154': // Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
1086 case '\u0156': // Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
1087 case '\u0158': // Ř [LATIN CAPITAL LETTER R WITH CARON]
1088 case '\u0210': // Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
1089 case '\u0212': // Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
1090 case '\u024C': // Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
1091 case '\u0280': // ʀ [LATIN LETTER SMALL CAPITAL R]
1092 case '\u0281': // ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
1093 case '\u1D19': // ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
1094 case '\u1D1A': // ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
1095 case '\u1E58': // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
1096 case '\u1E5A': // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
1097 case '\u1E5C': // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
1098 case '\u1E5E': // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
1099 case '\u24C7': // Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
1100 case '\u2C64': // Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
1101 case '\uA75A': // Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
1102 case '\uA782': // Ꞃ [LATIN CAPITAL LETTER INSULAR R]
1103 case '\uFF32': // R [FULLWIDTH LATIN CAPITAL LETTER R]
1104 output[outputPos++] = 'R';
1106 case '\u0155': // ŕ [LATIN SMALL LETTER R WITH ACUTE]
1107 case '\u0157': // ŗ [LATIN SMALL LETTER R WITH CEDILLA]
1108 case '\u0159': // ř [LATIN SMALL LETTER R WITH CARON]
1109 case '\u0211': // ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
1110 case '\u0213': // ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
1111 case '\u024D': // ɍ [LATIN SMALL LETTER R WITH STROKE]
1112 case '\u027C': // ɼ [LATIN SMALL LETTER R WITH LONG LEG]
1113 case '\u027D': // ɽ [LATIN SMALL LETTER R WITH TAIL]
1114 case '\u027E': // ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
1115 case '\u027F': // ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
1116 case '\u1D63': // ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
1117 case '\u1D72': // ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
1118 case '\u1D73': // ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
1119 case '\u1D89': // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
1120 case '\u1E59': // ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
1121 case '\u1E5B': // ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
1122 case '\u1E5D': // ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
1123 case '\u1E5F': // ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
1124 case '\u24E1': // ⓡ [CIRCLED LATIN SMALL LETTER R]
1125 case '\uA75B': // ꝛ [LATIN SMALL LETTER R ROTUNDA]
1126 case '\uA783': // ꞃ [LATIN SMALL LETTER INSULAR R]
1127 case '\uFF52': // r [FULLWIDTH LATIN SMALL LETTER R]
1128 output[outputPos++] = 'r';
1130 case '\u24AD': // ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
1131 output[outputPos++] = '(';
1132 output[outputPos++] = 'r';
1133 output[outputPos++] = ')';
1135 case '\u015A': // Ś [LATIN CAPITAL LETTER S WITH ACUTE]
1136 case '\u015C': // Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
1137 case '\u015E': // Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
1138 case '\u0160': // Š [LATIN CAPITAL LETTER S WITH CARON]
1139 case '\u0218': // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
1140 case '\u1E60': // Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
1141 case '\u1E62': // Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
1142 case '\u1E64': // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
1143 case '\u1E66': // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
1144 case '\u1E68': // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
1145 case '\u24C8': // Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
1146 case '\uA731': // ꜱ [LATIN LETTER SMALL CAPITAL S]
1147 case '\uA785': // ꞅ [LATIN SMALL LETTER INSULAR S]
1148 case '\uFF33': // S [FULLWIDTH LATIN CAPITAL LETTER S]
1149 output[outputPos++] = 'S';
1151 case '\u015B': // ś [LATIN SMALL LETTER S WITH ACUTE]
1152 case '\u015D': // ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
1153 case '\u015F': // ş [LATIN SMALL LETTER S WITH CEDILLA]
1154 case '\u0161': // š [LATIN SMALL LETTER S WITH CARON]
1155 case '\u017F': // ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
1156 case '\u0219': // ș [LATIN SMALL LETTER S WITH COMMA BELOW]
1157 case '\u023F': // ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
1158 case '\u0282': // ʂ [LATIN SMALL LETTER S WITH HOOK]
1159 case '\u1D74': // ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
1160 case '\u1D8A': // ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
1161 case '\u1E61': // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
1162 case '\u1E63': // ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
1163 case '\u1E65': // ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
1164 case '\u1E67': // ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
1165 case '\u1E69': // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
1166 case '\u1E9C': // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
1167 case '\u1E9D': // ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
1168 case '\u24E2': // ⓢ [CIRCLED LATIN SMALL LETTER S]
1169 case '\uA784': // Ꞅ [LATIN CAPITAL LETTER INSULAR S]
1170 case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S]
1171 output[outputPos++] = 's';
1173 case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S]
1174 output[outputPos++] = 'S';
1175 output[outputPos++] = 'S';
1177 case '\u24AE': // ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
1178 output[outputPos++] = '(';
1179 output[outputPos++] = 's';
1180 output[outputPos++] = ')';
1182 case '\u00DF': // ß [LATIN SMALL LETTER SHARP S]
1183 output[outputPos++] = 's';
1184 output[outputPos++] = 's';
1186 case '\uFB06': // st [LATIN SMALL LIGATURE ST]
1187 output[outputPos++] = 's';
1188 output[outputPos++] = 't';
1190 case '\u0162': // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
1191 case '\u0164': // Ť [LATIN CAPITAL LETTER T WITH CARON]
1192 case '\u0166': // Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
1193 case '\u01AC': // Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
1194 case '\u01AE': // Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
1195 case '\u021A': // Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
1196 case '\u023E': // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
1197 case '\u1D1B': // ᴛ [LATIN LETTER SMALL CAPITAL T]
1198 case '\u1E6A': // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
1199 case '\u1E6C': // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
1200 case '\u1E6E': // Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
1201 case '\u1E70': // Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
1202 case '\u24C9': // Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
1203 case '\uA786': // Ꞇ [LATIN CAPITAL LETTER INSULAR T]
1204 case '\uFF34': // T [FULLWIDTH LATIN CAPITAL LETTER T]
1205 output[outputPos++] = 'T';
1207 case '\u0163': // ţ [LATIN SMALL LETTER T WITH CEDILLA]
1208 case '\u0165': // ť [LATIN SMALL LETTER T WITH CARON]
1209 case '\u0167': // ŧ [LATIN SMALL LETTER T WITH STROKE]
1210 case '\u01AB': // ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
1211 case '\u01AD': // ƭ [LATIN SMALL LETTER T WITH HOOK]
1212 case '\u021B': // ț [LATIN SMALL LETTER T WITH COMMA BELOW]
1213 case '\u0236': // ȶ [LATIN SMALL LETTER T WITH CURL]
1214 case '\u0287': // ʇ [LATIN SMALL LETTER TURNED T]
1215 case '\u0288': // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
1216 case '\u1D75': // ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
1217 case '\u1E6B': // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
1218 case '\u1E6D': // ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
1219 case '\u1E6F': // ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
1220 case '\u1E71': // ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
1221 case '\u1E97': // ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
1222 case '\u24E3': // ⓣ [CIRCLED LATIN SMALL LETTER T]
1223 case '\u2C66': // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
1224 case '\uFF54': // t [FULLWIDTH LATIN SMALL LETTER T]
1225 output[outputPos++] = 't';
1227 case '\u00DE': // Þ [LATIN CAPITAL LETTER THORN]
1228 case '\uA766': // Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
1229 output[outputPos++] = 'T';
1230 output[outputPos++] = 'H';
1232 case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ]
1233 output[outputPos++] = 'T';
1234 output[outputPos++] = 'Z';
1236 case '\u24AF': // ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
1237 output[outputPos++] = '(';
1238 output[outputPos++] = 't';
1239 output[outputPos++] = ')';
1241 case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
1242 output[outputPos++] = 't';
1243 output[outputPos++] = 'c';
1245 case '\u00FE': // þ [LATIN SMALL LETTER THORN]
1246 case '\u1D7A': // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
1247 case '\uA767': // ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
1248 output[outputPos++] = 't';
1249 output[outputPos++] = 'h';
1251 case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH]
1252 output[outputPos++] = 't';
1253 output[outputPos++] = 's';
1255 case '\uA729': // ꜩ [LATIN SMALL LETTER TZ]
1256 output[outputPos++] = 't';
1257 output[outputPos++] = 'z';
1259 case '\u00D9': // Ù [LATIN CAPITAL LETTER U WITH GRAVE]
1260 case '\u00DA': // Ú [LATIN CAPITAL LETTER U WITH ACUTE]
1261 case '\u00DB': // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
1262 case '\u00DC': // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
1263 case '\u0168': // Ũ [LATIN CAPITAL LETTER U WITH TILDE]
1264 case '\u016A': // Ū [LATIN CAPITAL LETTER U WITH MACRON]
1265 case '\u016C': // Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
1266 case '\u016E': // Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
1267 case '\u0170': // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
1268 case '\u0172': // Ų [LATIN CAPITAL LETTER U WITH OGONEK]
1269 case '\u01AF': // Ư [LATIN CAPITAL LETTER U WITH HORN]
1270 case '\u01D3': // Ǔ [LATIN CAPITAL LETTER U WITH CARON]
1271 case '\u01D5': // Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
1272 case '\u01D7': // Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
1273 case '\u01D9': // Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
1274 case '\u01DB': // Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
1275 case '\u0214': // Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
1276 case '\u0216': // Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
1277 case '\u0244': // Ʉ [LATIN CAPITAL LETTER U BAR]
1278 case '\u1D1C': // ᴜ [LATIN LETTER SMALL CAPITAL U]
1279 case '\u1D7E': // ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
1280 case '\u1E72': // Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
1281 case '\u1E74': // Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
1282 case '\u1E76': // Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
1283 case '\u1E78': // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
1284 case '\u1E7A': // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
1285 case '\u1EE4': // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
1286 case '\u1EE6': // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
1287 case '\u1EE8': // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
1288 case '\u1EEA': // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
1289 case '\u1EEC': // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
1290 case '\u1EEE': // Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
1291 case '\u1EF0': // Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
1292 case '\u24CA': // Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
1293 case '\uFF35': // U [FULLWIDTH LATIN CAPITAL LETTER U]
1294 output[outputPos++] = 'U';
1296 case '\u00F9': // ù [LATIN SMALL LETTER U WITH GRAVE]
1297 case '\u00FA': // ú [LATIN SMALL LETTER U WITH ACUTE]
1298 case '\u00FB': // û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
1299 case '\u00FC': // ü [LATIN SMALL LETTER U WITH DIAERESIS]
1300 case '\u0169': // ũ [LATIN SMALL LETTER U WITH TILDE]
1301 case '\u016B': // ū [LATIN SMALL LETTER U WITH MACRON]
1302 case '\u016D': // ŭ [LATIN SMALL LETTER U WITH BREVE]
1303 case '\u016F': // ů [LATIN SMALL LETTER U WITH RING ABOVE]
1304 case '\u0171': // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
1305 case '\u0173': // ų [LATIN SMALL LETTER U WITH OGONEK]
1306 case '\u01B0': // ư [LATIN SMALL LETTER U WITH HORN]
1307 case '\u01D4': // ǔ [LATIN SMALL LETTER U WITH CARON]
1308 case '\u01D6': // ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
1309 case '\u01D8': // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
1310 case '\u01DA': // ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
1311 case '\u01DC': // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
1312 case '\u0215': // ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
1313 case '\u0217': // ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
1314 case '\u0289': // ʉ [LATIN SMALL LETTER U BAR]
1315 case '\u1D64': // ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
1316 case '\u1D99': // ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
1317 case '\u1E73': // ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
1318 case '\u1E75': // ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
1319 case '\u1E77': // ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
1320 case '\u1E79': // ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
1321 case '\u1E7B': // ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
1322 case '\u1EE5': // ụ [LATIN SMALL LETTER U WITH DOT BELOW]
1323 case '\u1EE7': // ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
1324 case '\u1EE9': // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
1325 case '\u1EEB': // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
1326 case '\u1EED': // ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
1327 case '\u1EEF': // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
1328 case '\u1EF1': // ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
1329 case '\u24E4': // ⓤ [CIRCLED LATIN SMALL LETTER U]
1330 case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U]
1331 output[outputPos++] = 'u';
1333 case '\u24B0': // ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
1334 output[outputPos++] = '(';
1335 output[outputPos++] = 'u';
1336 output[outputPos++] = ')';
1338 case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE]
1339 output[outputPos++] = 'u';
1340 output[outputPos++] = 'e';
1342 case '\u01B2': // Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
1343 case '\u0245': // Ʌ [LATIN CAPITAL LETTER TURNED V]
1344 case '\u1D20': // ᴠ [LATIN LETTER SMALL CAPITAL V]
1345 case '\u1E7C': // Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
1346 case '\u1E7E': // Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
1347 case '\u1EFC': // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
1348 case '\u24CB': // Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
1349 case '\uA75E': // Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
1350 case '\uA768': // Ꝩ [LATIN CAPITAL LETTER VEND]
1351 case '\uFF36': // V [FULLWIDTH LATIN CAPITAL LETTER V]
1352 output[outputPos++] = 'V';
1354 case '\u028B': // ʋ [LATIN SMALL LETTER V WITH HOOK]
1355 case '\u028C': // ʌ [LATIN SMALL LETTER TURNED V]
1356 case '\u1D65': // ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
1357 case '\u1D8C': // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
1358 case '\u1E7D': // ṽ [LATIN SMALL LETTER V WITH TILDE]
1359 case '\u1E7F': // ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
1360 case '\u24E5': // ⓥ [CIRCLED LATIN SMALL LETTER V]
1361 case '\u2C71': // ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
1362 case '\u2C74': // ⱴ [LATIN SMALL LETTER V WITH CURL]
1363 case '\uA75F': // ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
1364 case '\uFF56': // v [FULLWIDTH LATIN SMALL LETTER V]
1365 output[outputPos++] = 'v';
1367 case '\uA760': // Ꝡ [LATIN CAPITAL LETTER VY]
1368 output[outputPos++] = 'V';
1369 output[outputPos++] = 'Y';
1371 case '\u24B1': // ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
1372 output[outputPos++] = '(';
1373 output[outputPos++] = 'v';
1374 output[outputPos++] = ')';
1376 case '\uA761': // ꝡ [LATIN SMALL LETTER VY]
1377 output[outputPos++] = 'v';
1378 output[outputPos++] = 'y';
1380 case '\u0174': // Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
1381 case '\u01F7': // Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
1382 case '\u1D21': // ᴡ [LATIN LETTER SMALL CAPITAL W]
1383 case '\u1E80': // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
1384 case '\u1E82': // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
1385 case '\u1E84': // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
1386 case '\u1E86': // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
1387 case '\u1E88': // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
1388 case '\u24CC': // Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
1389 case '\u2C72': // Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
1390 case '\uFF37': // W [FULLWIDTH LATIN CAPITAL LETTER W]
1391 output[outputPos++] = 'W';
1393 case '\u0175': // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
1394 case '\u01BF': // ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
1395 case '\u028D': // ʍ [LATIN SMALL LETTER TURNED W]
1396 case '\u1E81': // ẁ [LATIN SMALL LETTER W WITH GRAVE]
1397 case '\u1E83': // ẃ [LATIN SMALL LETTER W WITH ACUTE]
1398 case '\u1E85': // ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
1399 case '\u1E87': // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
1400 case '\u1E89': // ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
1401 case '\u1E98': // ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
1402 case '\u24E6': // ⓦ [CIRCLED LATIN SMALL LETTER W]
1403 case '\u2C73': // ⱳ [LATIN SMALL LETTER W WITH HOOK]
1404 case '\uFF57': // w [FULLWIDTH LATIN SMALL LETTER W]
1405 output[outputPos++] = 'w';
1407 case '\u24B2': // ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
1408 output[outputPos++] = '(';
1409 output[outputPos++] = 'w';
1410 output[outputPos++] = ')';
1412 case '\u1E8A': // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
1413 case '\u1E8C': // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
1414 case '\u24CD': // Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
1415 case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X]
1416 output[outputPos++] = 'X';
1418 case '\u1D8D': // ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
1419 case '\u1E8B': // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
1420 case '\u1E8D': // ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
1421 case '\u2093': // ₓ [LATIN SUBSCRIPT SMALL LETTER X]
1422 case '\u24E7': // ⓧ [CIRCLED LATIN SMALL LETTER X]
1423 case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X]
1424 output[outputPos++] = 'x';
1426 case '\u24B3': // ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
1427 output[outputPos++] = '(';
1428 output[outputPos++] = 'x';
1429 output[outputPos++] = ')';
1431 case '\u00DD': // Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
1432 case '\u0176': // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
1433 case '\u0178': // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
1434 case '\u01B3': // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
1435 case '\u0232': // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
1436 case '\u024E': // Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
1437 case '\u028F': // ʏ [LATIN LETTER SMALL CAPITAL Y]
1438 case '\u1E8E': // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
1439 case '\u1EF2': // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
1440 case '\u1EF4': // Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
1441 case '\u1EF6': // Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
1442 case '\u1EF8': // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
1443 case '\u1EFE': // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
1444 case '\u24CE': // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
1445 case '\uFF39': // Y [FULLWIDTH LATIN CAPITAL LETTER Y]
1446 output[outputPos++] = 'Y';
1448 case '\u00FD': // ý [LATIN SMALL LETTER Y WITH ACUTE]
1449 case '\u00FF': // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
1450 case '\u0177': // ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
1451 case '\u01B4': // ƴ [LATIN SMALL LETTER Y WITH HOOK]
1452 case '\u0233': // ȳ [LATIN SMALL LETTER Y WITH MACRON]
1453 case '\u024F': // ɏ [LATIN SMALL LETTER Y WITH STROKE]
1454 case '\u028E': // ʎ [LATIN SMALL LETTER TURNED Y]
1455 case '\u1E8F': // ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
1456 case '\u1E99': // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
1457 case '\u1EF3': // ỳ [LATIN SMALL LETTER Y WITH GRAVE]
1458 case '\u1EF5': // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
1459 case '\u1EF7': // ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
1460 case '\u1EF9': // ỹ [LATIN SMALL LETTER Y WITH TILDE]
1461 case '\u1EFF': // ỿ [LATIN SMALL LETTER Y WITH LOOP]
1462 case '\u24E8': // ⓨ [CIRCLED LATIN SMALL LETTER Y]
1463 case '\uFF59': // y [FULLWIDTH LATIN SMALL LETTER Y]
1464 output[outputPos++] = 'y';
1466 case '\u24B4': // ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
1467 output[outputPos++] = '(';
1468 output[outputPos++] = 'y';
1469 output[outputPos++] = ')';
1471 case '\u0179': // Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
1472 case '\u017B': // Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
1473 case '\u017D': // Ž [LATIN CAPITAL LETTER Z WITH CARON]
1474 case '\u01B5': // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
1475 case '\u021C': // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
1476 case '\u0224': // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
1477 case '\u1D22': // ᴢ [LATIN LETTER SMALL CAPITAL Z]
1478 case '\u1E90': // Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
1479 case '\u1E92': // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
1480 case '\u1E94': // Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
1481 case '\u24CF': // Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
1482 case '\u2C6B': // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
1483 case '\uA762': // Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
1484 case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z]
1485 output[outputPos++] = 'Z';
1487 case '\u017A': // ź [LATIN SMALL LETTER Z WITH ACUTE]
1488 case '\u017C': // ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
1489 case '\u017E': // ž [LATIN SMALL LETTER Z WITH CARON]
1490 case '\u01B6': // ƶ [LATIN SMALL LETTER Z WITH STROKE]
1491 case '\u021D': // ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
1492 case '\u0225': // ȥ [LATIN SMALL LETTER Z WITH HOOK]
1493 case '\u0240': // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
1494 case '\u0290': // ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
1495 case '\u0291': // ʑ [LATIN SMALL LETTER Z WITH CURL]
1496 case '\u1D76': // ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
1497 case '\u1D8E': // ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
1498 case '\u1E91': // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
1499 case '\u1E93': // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
1500 case '\u1E95': // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
1501 case '\u24E9': // ⓩ [CIRCLED LATIN SMALL LETTER Z]
1502 case '\u2C6C': // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
1503 case '\uA763': // ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
1504 case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z]
1505 output[outputPos++] = 'z';
1507 case '\u24B5': // ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
1508 output[outputPos++] = '(';
1509 output[outputPos++] = 'z';
1510 output[outputPos++] = ')';
1512 case '\u2070': // ⁰ [SUPERSCRIPT ZERO]
1513 case '\u2080': // ₀ [SUBSCRIPT ZERO]
1514 case '\u24EA': // ⓪ [CIRCLED DIGIT ZERO]
1515 case '\u24FF': // ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
1516 case '\uFF10': // 0 [FULLWIDTH DIGIT ZERO]
1517 output[outputPos++] = '0';
1519 case '\u00B9': // ¹ [SUPERSCRIPT ONE]
1520 case '\u2081': // ₁ [SUBSCRIPT ONE]
1521 case '\u2460': // ① [CIRCLED DIGIT ONE]
1522 case '\u24F5': // ⓵ [DOUBLE CIRCLED DIGIT ONE]
1523 case '\u2776': // ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
1524 case '\u2780': // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
1525 case '\u278A': // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
1526 case '\uFF11': // 1 [FULLWIDTH DIGIT ONE]
1527 output[outputPos++] = '1';
1529 case '\u2488': // ⒈ [DIGIT ONE FULL STOP]
1530 output[outputPos++] = '1';
1531 output[outputPos++] = '.';
1533 case '\u2474': // ⑴ [PARENTHESIZED DIGIT ONE]
1534 output[outputPos++] = '(';
1535 output[outputPos++] = '1';
1536 output[outputPos++] = ')';
1538 case '\u00B2': // ² [SUPERSCRIPT TWO]
1539 case '\u2082': // ₂ [SUBSCRIPT TWO]
1540 case '\u2461': // ② [CIRCLED DIGIT TWO]
1541 case '\u24F6': // ⓶ [DOUBLE CIRCLED DIGIT TWO]
1542 case '\u2777': // ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
1543 case '\u2781': // ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
1544 case '\u278B': // ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
1545 case '\uFF12': // 2 [FULLWIDTH DIGIT TWO]
1546 output[outputPos++] = '2';
1548 case '\u2489': // ⒉ [DIGIT TWO FULL STOP]
1549 output[outputPos++] = '2';
1550 output[outputPos++] = '.';
1552 case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO]
1553 output[outputPos++] = '(';
1554 output[outputPos++] = '2';
1555 output[outputPos++] = ')';
1557 case '\u00B3': // ³ [SUPERSCRIPT THREE]
1558 case '\u2083': // ₃ [SUBSCRIPT THREE]
1559 case '\u2462': // ③ [CIRCLED DIGIT THREE]
1560 case '\u24F7': // ⓷ [DOUBLE CIRCLED DIGIT THREE]
1561 case '\u2778': // ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
1562 case '\u2782': // ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
1563 case '\u278C': // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
1564 case '\uFF13': // 3 [FULLWIDTH DIGIT THREE]
1565 output[outputPos++] = '3';
1567 case '\u248A': // ⒊ [DIGIT THREE FULL STOP]
1568 output[outputPos++] = '3';
1569 output[outputPos++] = '.';
1571 case '\u2476': // ⑶ [PARENTHESIZED DIGIT THREE]
1572 output[outputPos++] = '(';
1573 output[outputPos++] = '3';
1574 output[outputPos++] = ')';
1576 case '\u2074': // ⁴ [SUPERSCRIPT FOUR]
1577 case '\u2084': // ₄ [SUBSCRIPT FOUR]
1578 case '\u2463': // ④ [CIRCLED DIGIT FOUR]
1579 case '\u24F8': // ⓸ [DOUBLE CIRCLED DIGIT FOUR]
1580 case '\u2779': // ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
1581 case '\u2783': // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
1582 case '\u278D': // ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
1583 case '\uFF14': // 4 [FULLWIDTH DIGIT FOUR]
1584 output[outputPos++] = '4';
1586 case '\u248B': // ⒋ [DIGIT FOUR FULL STOP]
1587 output[outputPos++] = '4';
1588 output[outputPos++] = '.';
1590 case '\u2477': // ⑷ [PARENTHESIZED DIGIT FOUR]
1591 output[outputPos++] = '(';
1592 output[outputPos++] = '4';
1593 output[outputPos++] = ')';
1595 case '\u2075': // ⁵ [SUPERSCRIPT FIVE]
1596 case '\u2085': // ₅ [SUBSCRIPT FIVE]
1597 case '\u2464': // ⑤ [CIRCLED DIGIT FIVE]
1598 case '\u24F9': // ⓹ [DOUBLE CIRCLED DIGIT FIVE]
1599 case '\u277A': // ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
1600 case '\u2784': // ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
1601 case '\u278E': // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
1602 case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE]
1603 output[outputPos++] = '5';
1605 case '\u248C': // ⒌ [DIGIT FIVE FULL STOP]
1606 output[outputPos++] = '5';
1607 output[outputPos++] = '.';
1609 case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE]
1610 output[outputPos++] = '(';
1611 output[outputPos++] = '5';
1612 output[outputPos++] = ')';
1614 case '\u2076': // ⁶ [SUPERSCRIPT SIX]
1615 case '\u2086': // ₆ [SUBSCRIPT SIX]
1616 case '\u2465': // ⑥ [CIRCLED DIGIT SIX]
1617 case '\u24FA': // ⓺ [DOUBLE CIRCLED DIGIT SIX]
1618 case '\u277B': // ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
1619 case '\u2785': // ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
1620 case '\u278F': // ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
1621 case '\uFF16': // 6 [FULLWIDTH DIGIT SIX]
1622 output[outputPos++] = '6';
1624 case '\u248D': // ⒍ [DIGIT SIX FULL STOP]
1625 output[outputPos++] = '6';
1626 output[outputPos++] = '.';
1628 case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX]
1629 output[outputPos++] = '(';
1630 output[outputPos++] = '6';
1631 output[outputPos++] = ')';
1633 case '\u2077': // ⁷ [SUPERSCRIPT SEVEN]
1634 case '\u2087': // ₇ [SUBSCRIPT SEVEN]
1635 case '\u2466': // ⑦ [CIRCLED DIGIT SEVEN]
1636 case '\u24FB': // ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
1637 case '\u277C': // ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
1638 case '\u2786': // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
1639 case '\u2790': // ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
1640 case '\uFF17': // 7 [FULLWIDTH DIGIT SEVEN]
1641 output[outputPos++] = '7';
1643 case '\u248E': // ⒎ [DIGIT SEVEN FULL STOP]
1644 output[outputPos++] = '7';
1645 output[outputPos++] = '.';
1647 case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN]
1648 output[outputPos++] = '(';
1649 output[outputPos++] = '7';
1650 output[outputPos++] = ')';
1652 case '\u2078': // ⁸ [SUPERSCRIPT EIGHT]
1653 case '\u2088': // ₈ [SUBSCRIPT EIGHT]
1654 case '\u2467': // ⑧ [CIRCLED DIGIT EIGHT]
1655 case '\u24FC': // ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
1656 case '\u277D': // ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
1657 case '\u2787': // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
1658 case '\u2791': // ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
1659 case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT]
1660 output[outputPos++] = '8';
1662 case '\u248F': // ⒏ [DIGIT EIGHT FULL STOP]
1663 output[outputPos++] = '8';
1664 output[outputPos++] = '.';
1666 case '\u247B': // ⑻ [PARENTHESIZED DIGIT EIGHT]
1667 output[outputPos++] = '(';
1668 output[outputPos++] = '8';
1669 output[outputPos++] = ')';
1671 case '\u2079': // ⁹ [SUPERSCRIPT NINE]
1672 case '\u2089': // ₉ [SUBSCRIPT NINE]
1673 case '\u2468': // ⑨ [CIRCLED DIGIT NINE]
1674 case '\u24FD': // ⓽ [DOUBLE CIRCLED DIGIT NINE]
1675 case '\u277E': // ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
1676 case '\u2788': // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
1677 case '\u2792': // ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
1678 case '\uFF19': // 9 [FULLWIDTH DIGIT NINE]
1679 output[outputPos++] = '9';
1681 case '\u2490': // ⒐ [DIGIT NINE FULL STOP]
1682 output[outputPos++] = '9';
1683 output[outputPos++] = '.';
1685 case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE]
1686 output[outputPos++] = '(';
1687 output[outputPos++] = '9';
1688 output[outputPos++] = ')';
1690 case '\u2469': // ⑩ [CIRCLED NUMBER TEN]
1691 case '\u24FE': // ⓾ [DOUBLE CIRCLED NUMBER TEN]
1692 case '\u277F': // ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
1693 case '\u2789': // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
1694 case '\u2793': // ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
1695 output[outputPos++] = '1';
1696 output[outputPos++] = '0';
1698 case '\u2491': // ⒑ [NUMBER TEN FULL STOP]
1699 output[outputPos++] = '1';
1700 output[outputPos++] = '0';
1701 output[outputPos++] = '.';
1703 case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN]
1704 output[outputPos++] = '(';
1705 output[outputPos++] = '1';
1706 output[outputPos++] = '0';
1707 output[outputPos++] = ')';
1709 case '\u246A': // ⑪ [CIRCLED NUMBER ELEVEN]
1710 case '\u24EB': // ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
1711 output[outputPos++] = '1';
1712 output[outputPos++] = '1';
1714 case '\u2492': // ⒒ [NUMBER ELEVEN FULL STOP]
1715 output[outputPos++] = '1';
1716 output[outputPos++] = '1';
1717 output[outputPos++] = '.';
1719 case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN]
1720 output[outputPos++] = '(';
1721 output[outputPos++] = '1';
1722 output[outputPos++] = '1';
1723 output[outputPos++] = ')';
1725 case '\u246B': // ⑫ [CIRCLED NUMBER TWELVE]
1726 case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
1727 output[outputPos++] = '1';
1728 output[outputPos++] = '2';
1730 case '\u2493': // ⒓ [NUMBER TWELVE FULL STOP]
1731 output[outputPos++] = '1';
1732 output[outputPos++] = '2';
1733 output[outputPos++] = '.';
1735 case '\u247F': // ⑿ [PARENTHESIZED NUMBER TWELVE]
1736 output[outputPos++] = '(';
1737 output[outputPos++] = '1';
1738 output[outputPos++] = '2';
1739 output[outputPos++] = ')';
1741 case '\u246C': // ⑬ [CIRCLED NUMBER THIRTEEN]
1742 case '\u24ED': // ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
1743 output[outputPos++] = '1';
1744 output[outputPos++] = '3';
1746 case '\u2494': // ⒔ [NUMBER THIRTEEN FULL STOP]
1747 output[outputPos++] = '1';
1748 output[outputPos++] = '3';
1749 output[outputPos++] = '.';
1751 case '\u2480': // ⒀ [PARENTHESIZED NUMBER THIRTEEN]
1752 output[outputPos++] = '(';
1753 output[outputPos++] = '1';
1754 output[outputPos++] = '3';
1755 output[outputPos++] = ')';
1757 case '\u246D': // ⑭ [CIRCLED NUMBER FOURTEEN]
1758 case '\u24EE': // ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
1759 output[outputPos++] = '1';
1760 output[outputPos++] = '4';
1762 case '\u2495': // ⒕ [NUMBER FOURTEEN FULL STOP]
1763 output[outputPos++] = '1';
1764 output[outputPos++] = '4';
1765 output[outputPos++] = '.';
1767 case '\u2481': // ⒁ [PARENTHESIZED NUMBER FOURTEEN]
1768 output[outputPos++] = '(';
1769 output[outputPos++] = '1';
1770 output[outputPos++] = '4';
1771 output[outputPos++] = ')';
1773 case '\u246E': // ⑮ [CIRCLED NUMBER FIFTEEN]
1774 case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
1775 output[outputPos++] = '1';
1776 output[outputPos++] = '5';
1778 case '\u2496': // ⒖ [NUMBER FIFTEEN FULL STOP]
1779 output[outputPos++] = '1';
1780 output[outputPos++] = '5';
1781 output[outputPos++] = '.';
1783 case '\u2482': // ⒂ [PARENTHESIZED NUMBER FIFTEEN]
1784 output[outputPos++] = '(';
1785 output[outputPos++] = '1';
1786 output[outputPos++] = '5';
1787 output[outputPos++] = ')';
1789 case '\u246F': // ⑯ [CIRCLED NUMBER SIXTEEN]
1790 case '\u24F0': // ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
1791 output[outputPos++] = '1';
1792 output[outputPos++] = '6';
1794 case '\u2497': // ⒗ [NUMBER SIXTEEN FULL STOP]
1795 output[outputPos++] = '1';
1796 output[outputPos++] = '6';
1797 output[outputPos++] = '.';
1799 case '\u2483': // ⒃ [PARENTHESIZED NUMBER SIXTEEN]
1800 output[outputPos++] = '(';
1801 output[outputPos++] = '1';
1802 output[outputPos++] = '6';
1803 output[outputPos++] = ')';
1805 case '\u2470': // ⑰ [CIRCLED NUMBER SEVENTEEN]
1806 case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
1807 output[outputPos++] = '1';
1808 output[outputPos++] = '7';
1810 case '\u2498': // ⒘ [NUMBER SEVENTEEN FULL STOP]
1811 output[outputPos++] = '1';
1812 output[outputPos++] = '7';
1813 output[outputPos++] = '.';
1815 case '\u2484': // ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
1816 output[outputPos++] = '(';
1817 output[outputPos++] = '1';
1818 output[outputPos++] = '7';
1819 output[outputPos++] = ')';
1821 case '\u2471': // ⑱ [CIRCLED NUMBER EIGHTEEN]
1822 case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
1823 output[outputPos++] = '1';
1824 output[outputPos++] = '8';
1826 case '\u2499': // ⒙ [NUMBER EIGHTEEN FULL STOP]
1827 output[outputPos++] = '1';
1828 output[outputPos++] = '8';
1829 output[outputPos++] = '.';
1831 case '\u2485': // ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
1832 output[outputPos++] = '(';
1833 output[outputPos++] = '1';
1834 output[outputPos++] = '8';
1835 output[outputPos++] = ')';
1837 case '\u2472': // ⑲ [CIRCLED NUMBER NINETEEN]
1838 case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
1839 output[outputPos++] = '1';
1840 output[outputPos++] = '9';
1842 case '\u249A': // ⒚ [NUMBER NINETEEN FULL STOP]
1843 output[outputPos++] = '1';
1844 output[outputPos++] = '9';
1845 output[outputPos++] = '.';
1847 case '\u2486': // ⒆ [PARENTHESIZED NUMBER NINETEEN]
1848 output[outputPos++] = '(';
1849 output[outputPos++] = '1';
1850 output[outputPos++] = '9';
1851 output[outputPos++] = ')';
1853 case '\u2473': // ⑳ [CIRCLED NUMBER TWENTY]
1854 case '\u24F4': // ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
1855 output[outputPos++] = '2';
1856 output[outputPos++] = '0';
1858 case '\u249B': // ⒛ [NUMBER TWENTY FULL STOP]
1859 output[outputPos++] = '2';
1860 output[outputPos++] = '0';
1861 output[outputPos++] = '.';
1863 case '\u2487': // ⒇ [PARENTHESIZED NUMBER TWENTY]
1864 output[outputPos++] = '(';
1865 output[outputPos++] = '2';
1866 output[outputPos++] = '0';
1867 output[outputPos++] = ')';
1869 case '\u00AB': // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
1870 case '\u00BB': // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
1871 case '\u201C': // “ [LEFT DOUBLE QUOTATION MARK]
1872 case '\u201D': // ” [RIGHT DOUBLE QUOTATION MARK]
1873 case '\u201E': // „ [DOUBLE LOW-9 QUOTATION MARK]
1874 case '\u2033': // ″ [DOUBLE PRIME]
1875 case '\u2036': // ‶ [REVERSED DOUBLE PRIME]
1876 case '\u275D': // ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
1877 case '\u275E': // ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
1878 case '\u276E': // ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
1879 case '\u276F': // ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
1880 case '\uFF02': // " [FULLWIDTH QUOTATION MARK]
1881 output[outputPos++] = '"';
1883 case '\u2018': // ‘ [LEFT SINGLE QUOTATION MARK]
1884 case '\u2019': // ’ [RIGHT SINGLE QUOTATION MARK]
1885 case '\u201A': // ‚ [SINGLE LOW-9 QUOTATION MARK]
1886 case '\u201B': // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
1887 case '\u2032': // ′ [PRIME]
1888 case '\u2035': // ‵ [REVERSED PRIME]
1889 case '\u2039': // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
1890 case '\u203A': // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
1891 case '\u275B': // ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
1892 case '\u275C': // ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
1893 case '\uFF07': // ' [FULLWIDTH APOSTROPHE]
1894 output[outputPos++] = '\'';
1896 case '\u2010': // ‐ [HYPHEN]
1897 case '\u2011': // ‑ [NON-BREAKING HYPHEN]
1898 case '\u2012': // ‒ [FIGURE DASH]
1899 case '\u2013': // – [EN DASH]
1900 case '\u2014': // — [EM DASH]
1901 case '\u207B': // ⁻ [SUPERSCRIPT MINUS]
1902 case '\u208B': // ₋ [SUBSCRIPT MINUS]
1903 case '\uFF0D': // - [FULLWIDTH HYPHEN-MINUS]
1904 output[outputPos++] = '-';
1906 case '\u2045': // ⁅ [LEFT SQUARE BRACKET WITH QUILL]
1907 case '\u2772': // ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
1908 case '\uFF3B': // [ [FULLWIDTH LEFT SQUARE BRACKET]
1909 output[outputPos++] = '[';
1911 case '\u2046': // ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
1912 case '\u2773': // ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
1913 case '\uFF3D': // ] [FULLWIDTH RIGHT SQUARE BRACKET]
1914 output[outputPos++] = ']';
1916 case '\u207D': // ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
1917 case '\u208D': // ₍ [SUBSCRIPT LEFT PARENTHESIS]
1918 case '\u2768': // ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
1919 case '\u276A': // ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
1920 case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS]
1921 output[outputPos++] = '(';
1923 case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS]
1924 output[outputPos++] = '(';
1925 output[outputPos++] = '(';
1927 case '\u207E': // ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
1928 case '\u208E': // ₎ [SUBSCRIPT RIGHT PARENTHESIS]
1929 case '\u2769': // ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
1930 case '\u276B': // ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
1931 case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS]
1932 output[outputPos++] = ')';
1934 case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS]
1935 output[outputPos++] = ')';
1936 output[outputPos++] = ')';
1938 case '\u276C': // ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
1939 case '\u2770': // ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
1940 case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN]
1941 output[outputPos++] = '<';
1943 case '\u276D': // ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
1944 case '\u2771': // ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
1945 case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN]
1946 output[outputPos++] = '>';
1948 case '\u2774': // ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
1949 case '\uFF5B': // { [FULLWIDTH LEFT CURLY BRACKET]
1950 output[outputPos++] = '{';
1952 case '\u2775': // ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
1953 case '\uFF5D': // } [FULLWIDTH RIGHT CURLY BRACKET]
1954 output[outputPos++] = '}';
1956 case '\u207A': // ⁺ [SUPERSCRIPT PLUS SIGN]
1957 case '\u208A': // ₊ [SUBSCRIPT PLUS SIGN]
1958 case '\uFF0B': // + [FULLWIDTH PLUS SIGN]
1959 output[outputPos++] = '+';
1961 case '\u207C': // ⁼ [SUPERSCRIPT EQUALS SIGN]
1962 case '\u208C': // ₌ [SUBSCRIPT EQUALS SIGN]
1963 case '\uFF1D': // = [FULLWIDTH EQUALS SIGN]
1964 output[outputPos++] = '=';
1966 case '\uFF01': // ! [FULLWIDTH EXCLAMATION MARK]
1967 output[outputPos++] = '!';
1969 case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK]
1970 output[outputPos++] = '!';
1971 output[outputPos++] = '!';
1973 case '\u2049': // ⁉ [EXCLAMATION QUESTION MARK]
1974 output[outputPos++] = '!';
1975 output[outputPos++] = '?';
1977 case '\uFF03': // # [FULLWIDTH NUMBER SIGN]
1978 output[outputPos++] = '#';
1980 case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN]
1981 output[outputPos++] = '$';
1983 case '\u2052': // ⁒ [COMMERCIAL MINUS SIGN]
1984 case '\uFF05': // % [FULLWIDTH PERCENT SIGN]
1985 output[outputPos++] = '%';
1987 case '\uFF06': // & [FULLWIDTH AMPERSAND]
1988 output[outputPos++] = '&';
1990 case '\u204E': // ⁎ [LOW ASTERISK]
1991 case '\uFF0A': // * [FULLWIDTH ASTERISK]
1992 output[outputPos++] = '*';
1994 case '\uFF0C': // , [FULLWIDTH COMMA]
1995 output[outputPos++] = ',';
1997 case '\uFF0E': // . [FULLWIDTH FULL STOP]
1998 output[outputPos++] = '.';
2000 case '\u2044': // ⁄ [FRACTION SLASH]
2001 case '\uFF0F': // / [FULLWIDTH SOLIDUS]
2002 output[outputPos++] = '/';
2004 case '\uFF1A': // : [FULLWIDTH COLON]
2005 output[outputPos++] = ':';
2007 case '\u204F': // ⁏ [REVERSED SEMICOLON]
2008 case '\uFF1B': // ; [FULLWIDTH SEMICOLON]
2009 output[outputPos++] = ';';
2011 case '\uFF1F': // ? [FULLWIDTH QUESTION MARK]
2012 output[outputPos++] = '?';
2014 case '\u2047': // ⁇ [DOUBLE QUESTION MARK]
2015 output[outputPos++] = '?';
2016 output[outputPos++] = '?';
2018 case '\u2048': // ⁈ [QUESTION EXCLAMATION MARK]
2019 output[outputPos++] = '?';
2020 output[outputPos++] = '!';
2022 case '\uFF20': // @ [FULLWIDTH COMMERCIAL AT]
2023 output[outputPos++] = '@';
2025 case '\uFF3C': // \ [FULLWIDTH REVERSE SOLIDUS]
2026 output[outputPos++] = '\\';
2028 case '\u2038': // ‸ [CARET]
2029 case '\uFF3E': // ^ [FULLWIDTH CIRCUMFLEX ACCENT]
2030 output[outputPos++] = '^';
2032 case '\uFF3F': // _ [FULLWIDTH LOW LINE]
2033 output[outputPos++] = '_';
2035 case '\u2053': // ⁓ [SWUNG DASH]
2036 case '\uFF5E': // ~ [FULLWIDTH TILDE]
2037 output[outputPos++] = '~';
2040 output[outputPos++] = c;