Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]
    1   package org.apache.lucene.analysis;
    2   
    3   import java.io.IOException;
    4   
    5   import org.apache.lucene.analysis.tokenattributes.TermAttribute;
    6   import org.apache.lucene.util.ArrayUtil;
    7   
    8   /**
    9    * Licensed to the Apache Software Foundation (ASF) under one or more
   10    * contributor license agreements.  See the NOTICE file distributed with
   11    * this work for additional information regarding copyright ownership.
   12    * The ASF licenses this file to You under the Apache License, Version 2.0
   13    * (the "License"); you may not use this file except in compliance with
   14    * the License.  You may obtain a copy of the License at
   15    *
   16    *     http://www.apache.org/licenses/LICENSE-2.0
   17    *
   18    * Unless required by applicable law or agreed to in writing, software
   19    * distributed under the License is distributed on an "AS IS" BASIS,
   20    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   21    * See the License for the specific language governing permissions and
   22    * limitations under the License.
   23    */
   24   
   25   /**
   26    * This class converts alphabetic, numeric, and symbolic Unicode characters
   27    * which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
   28    * block) into their ASCII equivalents, if one exists.
   29    *
   30    * Characters from the following Unicode blocks are converted; however, only
   31    * those characters with reasonable ASCII alternatives are converted:
   32    *
   33    * <ul>
   34    *   <li>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a>
   35    *   <li>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a>
   36    *   <li>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a>
   37    *   <li>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a>
   38    *   <li>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a>
   39    *   <li>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a>
   40    *   <li>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a>
   41    *   <li>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a>
   42    *   <li>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a>
   43    *   <li>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a>
   44    *   <li>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a>
   45    *   <li>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a>
   46    *   <li>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a>
   47    *   <li>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a>
   48    *   <li>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a>
   49    *   <li>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a>
   50    * </ul>
   51    *  
   52    * See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
   53    *
   54    * The set of character conversions supported by this class is a superset of
   55    * those supported by Lucene's {@link ISOLatin1AccentFilter} which strips
   56    * accents from Latin1 characters.  For example, '&agrave;' will be replaced by
   57    * 'a'.
   58    */
   59   public final class ASCIIFoldingFilter extends TokenFilter {
   60     public ASCIIFoldingFilter(TokenStream input)
   61     {
   62       super(input);
   63       termAtt = addAttribute(TermAttribute.class);
   64     }
   65   
   66     private char[] output = new char[512];
   67     private int outputPos;
   68     private TermAttribute termAtt;
   69   
   70     @Override
   71     public boolean incrementToken() throws IOException {
   72       if (input.incrementToken()) {
   73         final char[] buffer = termAtt.termBuffer();
   74         final int length = termAtt.termLength();
   75   
   76         // If no characters actually require rewriting then we
   77         // just return token as-is:
   78         for(int i = 0 ; i < length ; ++i) {
   79           final char c = buffer[i];
   80           if (c >= '\u0080')
   81           {
   82             foldToASCII(buffer, length);
   83             termAtt.setTermBuffer(output, 0, outputPos);
   84             break;
   85           }
   86         }
   87         return true;
   88       } else {
   89         return false;
   90       }
   91     }
   92   
   93     /**
   94      * Converts characters above ASCII to their ASCII equivalents.  For example,
   95      * accents are removed from accented characters.
   96      * @param input The string to fold
   97      * @param length The number of characters in the input string
   98      */
   99     public void foldToASCII(char[] input, int length)
  100     {
  101       // Worst-case length required:
  102       final int maxSizeNeeded = 4 * length;
  103       if (output.length < maxSizeNeeded) {
  104         output = new char[ArrayUtil.getNextSize(maxSizeNeeded)];
  105       }
  106   
  107       outputPos = 0;
  108   
  109       for (int pos = 0 ; pos < length ; ++pos) {
  110         final char c = input[pos];
  111   
  112         // Quick test: if it's not in range then just keep current character
  113         if (c < '\u0080') {
  114           output[outputPos++] = c;
  115         } else {
  116           switch (c) {
  117             case '\u00C0': //   [LATIN CAPITAL LETTER A WITH GRAVE]
  118             case '\u00C1': //   [LATIN CAPITAL LETTER A WITH ACUTE]
  119             case '\u00C2': //   [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
  120             case '\u00C3': //   [LATIN CAPITAL LETTER A WITH TILDE]
  121             case '\u00C4': //   [LATIN CAPITAL LETTER A WITH DIAERESIS]
  122             case '\u00C5': //   [LATIN CAPITAL LETTER A WITH RING ABOVE]
  123             case '\u0100': // ?  [LATIN CAPITAL LETTER A WITH MACRON]
  124             case '\u0102': // ?  [LATIN CAPITAL LETTER A WITH BREVE]
  125             case '\u0104': // ?  [LATIN CAPITAL LETTER A WITH OGONEK]
  126             case '\u018F': // ?  http://en.wikipedia.org/wiki/Schwa  [LATIN CAPITAL LETTER SCHWA]
  127             case '\u01CD': // ?  [LATIN CAPITAL LETTER A WITH CARON]
  128             case '\u01DE': // ?  [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
  129             case '\u01E0': // ?  [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
  130             case '\u01FA': // ?  [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
  131             case '\u0200': // ?  [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
  132             case '\u0202': // ?  [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
  133             case '\u0226': // ?  [LATIN CAPITAL LETTER A WITH DOT ABOVE]
  134             case '\u023A': // ?  [LATIN CAPITAL LETTER A WITH STROKE]
  135             case '\u1D00': // ?  [LATIN LETTER SMALL CAPITAL A]
  136             case '\u1E00': // ?  [LATIN CAPITAL LETTER A WITH RING BELOW]
  137             case '\u1EA0': // ?  [LATIN CAPITAL LETTER A WITH DOT BELOW]
  138             case '\u1EA2': // ?  [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
  139             case '\u1EA4': // ?  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
  140             case '\u1EA6': // ?  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
  141             case '\u1EA8': // ?  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
  142             case '\u1EAA': // ?  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
  143             case '\u1EAC': // ?  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
  144             case '\u1EAE': // ?  [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
  145             case '\u1EB0': // ?  [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
  146             case '\u1EB2': // ?  [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
  147             case '\u1EB4': // ?  [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
  148             case '\u1EB6': // ?  [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
  149             case '\u24B6': // ?  [CIRCLED LATIN CAPITAL LETTER A]
  150             case '\uFF21': // ?  [FULLWIDTH LATIN CAPITAL LETTER A]
  151               output[outputPos++] = 'A';
  152               break;
  153             case '\u00E0': //   [LATIN SMALL LETTER A WITH GRAVE]
  154             case '\u00E1': //   [LATIN SMALL LETTER A WITH ACUTE]
  155             case '\u00E2': //   [LATIN SMALL LETTER A WITH CIRCUMFLEX]
  156             case '\u00E3': //   [LATIN SMALL LETTER A WITH TILDE]
  157             case '\u00E4': //   [LATIN SMALL LETTER A WITH DIAERESIS]
  158             case '\u00E5': //   [LATIN SMALL LETTER A WITH RING ABOVE]
  159             case '\u0101': // ?  [LATIN SMALL LETTER A WITH MACRON]
  160             case '\u0103': // ?  [LATIN SMALL LETTER A WITH BREVE]
  161             case '\u0105': // ?  [LATIN SMALL LETTER A WITH OGONEK]
  162             case '\u01CE': // ?  [LATIN SMALL LETTER A WITH CARON]
  163             case '\u01DF': // ?  [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
  164             case '\u01E1': // ?  [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
  165             case '\u01FB': // ?  [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
  166             case '\u0201': // ?  [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
  167             case '\u0203': // ?  [LATIN SMALL LETTER A WITH INVERTED BREVE]
  168             case '\u0227': // ?  [LATIN SMALL LETTER A WITH DOT ABOVE]
  169             case '\u0250': // ?  [LATIN SMALL LETTER TURNED A]
  170             case '\u0259': // ?  [LATIN SMALL LETTER SCHWA]
  171             case '\u025A': // ?  [LATIN SMALL LETTER SCHWA WITH HOOK]
  172             case '\u1D8F': // ?  [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
  173             case '\u1D95': // ?  [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
  174             case '\u1E01': // ?  [LATIN SMALL LETTER A WITH RING BELOW]
  175             case '\u1E9A': // ?  [LATIN SMALL LETTER A WITH RIGHT HALF RING]
  176             case '\u1EA1': // ?  [LATIN SMALL LETTER A WITH DOT BELOW]
  177             case '\u1EA3': // ?  [LATIN SMALL LETTER A WITH HOOK ABOVE]
  178             case '\u1EA5': // ?  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
  179             case '\u1EA7': // ?  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
  180             case '\u1EA9': // ?  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
  181             case '\u1EAB': // ?  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
  182             case '\u1EAD': // ?  [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
  183             case '\u1EAF': // ?  [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
  184             case '\u1EB1': // ?  [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
  185             case '\u1EB3': // ?  [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
  186             case '\u1EB5': // ?  [LATIN SMALL LETTER A WITH BREVE AND TILDE]
  187             case '\u1EB7': // ?  [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
  188             case '\u2090': // ?  [LATIN SUBSCRIPT SMALL LETTER A]
  189             case '\u2094': // ?  [LATIN SUBSCRIPT SMALL LETTER SCHWA]
  190             case '\u24D0': // ?  [CIRCLED LATIN SMALL LETTER A]
  191             case '\u2C65': // ?  [LATIN SMALL LETTER A WITH STROKE]
  192             case '\u2C6F': // ?  [LATIN CAPITAL LETTER TURNED A]
  193             case '\uFF41': // ?  [FULLWIDTH LATIN SMALL LETTER A]
  194               output[outputPos++] = 'a';
  195               break;
  196             case '\uA732': // ?  [LATIN CAPITAL LETTER AA]
  197               output[outputPos++] = 'A';
  198               output[outputPos++] = 'A';
  199               break;
  200             case '\u00C6': //   [LATIN CAPITAL LETTER AE]
  201             case '\u01E2': // ?  [LATIN CAPITAL LETTER AE WITH MACRON]
  202             case '\u01FC': // ?  [LATIN CAPITAL LETTER AE WITH ACUTE]
  203             case '\u1D01': // ?  [LATIN LETTER SMALL CAPITAL AE]
  204               output[outputPos++] = 'A';
  205               output[outputPos++] = 'E';
  206               break;
  207             case '\uA734': // ?  [LATIN CAPITAL LETTER AO]
  208               output[outputPos++] = 'A';                    
  209               output[outputPos++] = 'O';
  210               break;
  211             case '\uA736': // ?  [LATIN CAPITAL LETTER AU]
  212               output[outputPos++] = 'A';
  213               output[outputPos++] = 'U';
  214               break;
  215             case '\uA738': // ?  [LATIN CAPITAL LETTER AV]
  216             case '\uA73A': // ?  [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
  217               output[outputPos++] = 'A';
  218               output[outputPos++] = 'V';
  219               break;
  220             case '\uA73C': // ?  [LATIN CAPITAL LETTER AY]
  221               output[outputPos++] = 'A';
  222               output[outputPos++] = 'Y';
  223               break;
  224             case '\u249C': // ?  [PARENTHESIZED LATIN SMALL LETTER A]
  225               output[outputPos++] = '(';
  226               output[outputPos++] = 'a';
  227               output[outputPos++] = ')';
  228               break;
  229             case '\uA733': // ?  [LATIN SMALL LETTER AA]
  230               output[outputPos++] = 'a';
  231               output[outputPos++] = 'a';
  232               break;
  233             case '\u00E6': //   [LATIN SMALL LETTER AE]
  234             case '\u01E3': // ?  [LATIN SMALL LETTER AE WITH MACRON]
  235             case '\u01FD': // ?  [LATIN SMALL LETTER AE WITH ACUTE]
  236             case '\u1D02': // ?  [LATIN SMALL LETTER TURNED AE]
  237               output[outputPos++] = 'a';
  238               output[outputPos++] = 'e';
  239               break;
  240             case '\uA735': // ?  [LATIN SMALL LETTER AO]
  241               output[outputPos++] = 'a';
  242               output[outputPos++] = 'o';
  243               break;
  244             case '\uA737': // ?  [LATIN SMALL LETTER AU]
  245               output[outputPos++] = 'a';
  246               output[outputPos++] = 'u';
  247               break;
  248             case '\uA739': // ?  [LATIN SMALL LETTER AV]
  249             case '\uA73B': // ?  [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
  250               output[outputPos++] = 'a';
  251               output[outputPos++] = 'v';
  252               break;
  253             case '\uA73D': // ?  [LATIN SMALL LETTER AY]
  254               output[outputPos++] = 'a';
  255               output[outputPos++] = 'y';
  256               break;
  257             case '\u0181': // ?  [LATIN CAPITAL LETTER B WITH HOOK]
  258             case '\u0182': // ?  [LATIN CAPITAL LETTER B WITH TOPBAR]
  259             case '\u0243': // ?  [LATIN CAPITAL LETTER B WITH STROKE]
  260             case '\u0299': // ?  [LATIN LETTER SMALL CAPITAL B]
  261             case '\u1D03': // ?  [LATIN LETTER SMALL CAPITAL BARRED B]
  262             case '\u1E02': // ?  [LATIN CAPITAL LETTER B WITH DOT ABOVE]
  263             case '\u1E04': // ?  [LATIN CAPITAL LETTER B WITH DOT BELOW]
  264             case '\u1E06': // ?  [LATIN CAPITAL LETTER B WITH LINE BELOW]
  265             case '\u24B7': // ?  [CIRCLED LATIN CAPITAL LETTER B]
  266             case '\uFF22': // ?  [FULLWIDTH LATIN CAPITAL LETTER B]
  267               output[outputPos++] = 'B';
  268               break;
  269             case '\u0180': // ?  [LATIN SMALL LETTER B WITH STROKE]
  270             case '\u0183': // ?  [LATIN SMALL LETTER B WITH TOPBAR]
  271             case '\u0253': // ?  [LATIN SMALL LETTER B WITH HOOK]
  272             case '\u1D6C': // ?  [LATIN SMALL LETTER B WITH MIDDLE TILDE]
  273             case '\u1D80': // ?  [LATIN SMALL LETTER B WITH PALATAL HOOK]
  274             case '\u1E03': // ?  [LATIN SMALL LETTER B WITH DOT ABOVE]
  275             case '\u1E05': // ?  [LATIN SMALL LETTER B WITH DOT BELOW]
  276             case '\u1E07': // ?  [LATIN SMALL LETTER B WITH LINE BELOW]
  277             case '\u24D1': // ?  [CIRCLED LATIN SMALL LETTER B]
  278             case '\uFF42': // ?  [FULLWIDTH LATIN SMALL LETTER B]
  279               output[outputPos++] = 'b';
  280               break;
  281             case '\u249D': // ?  [PARENTHESIZED LATIN SMALL LETTER B]
  282               output[outputPos++] = '(';                    
  283               output[outputPos++] = 'b';
  284               output[outputPos++] = ')';
  285               break;
  286             case '\u00C7': //   [LATIN CAPITAL LETTER C WITH CEDILLA]
  287             case '\u0106': // ?  [LATIN CAPITAL LETTER C WITH ACUTE]
  288             case '\u0108': // ?  [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
  289             case '\u010A': // ?  [LATIN CAPITAL LETTER C WITH DOT ABOVE]
  290             case '\u010C': // ?  [LATIN CAPITAL LETTER C WITH CARON]
  291             case '\u0187': // ?  [LATIN CAPITAL LETTER C WITH HOOK]
  292             case '\u023B': // ?  [LATIN CAPITAL LETTER C WITH STROKE]
  293             case '\u0297': // ?  [LATIN LETTER STRETCHED C]
  294             case '\u1D04': // ?  [LATIN LETTER SMALL CAPITAL C]
  295             case '\u1E08': // ?  [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
  296             case '\u24B8': // ?  [CIRCLED LATIN CAPITAL LETTER C]
  297             case '\uFF23': // ?  [FULLWIDTH LATIN CAPITAL LETTER C]
  298               output[outputPos++] = 'C';
  299               break;
  300             case '\u00E7': //   [LATIN SMALL LETTER C WITH CEDILLA]
  301             case '\u0107': // ?  [LATIN SMALL LETTER C WITH ACUTE]
  302             case '\u0109': // ?  [LATIN SMALL LETTER C WITH CIRCUMFLEX]
  303             case '\u010B': // ?  [LATIN SMALL LETTER C WITH DOT ABOVE]
  304             case '\u010D': // ?  [LATIN SMALL LETTER C WITH CARON]
  305             case '\u0188': // ?  [LATIN SMALL LETTER C WITH HOOK]
  306             case '\u023C': // ?  [LATIN SMALL LETTER C WITH STROKE]
  307             case '\u0255': // ?  [LATIN SMALL LETTER C WITH CURL]
  308             case '\u1E09': // ?  [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
  309             case '\u2184': // ?  [LATIN SMALL LETTER REVERSED C]
  310             case '\u24D2': // ?  [CIRCLED LATIN SMALL LETTER C]
  311             case '\uA73E': // ?  [LATIN CAPITAL LETTER REVERSED C WITH DOT]
  312             case '\uA73F': // ?  [LATIN SMALL LETTER REVERSED C WITH DOT]
  313             case '\uFF43': // ?  [FULLWIDTH LATIN SMALL LETTER C]
  314               output[outputPos++] = 'c';
  315               break;
  316             case '\u249E': // ?  [PARENTHESIZED LATIN SMALL LETTER C]
  317               output[outputPos++] = '(';
  318               output[outputPos++] = 'c';
  319               output[outputPos++] = ')';
  320               break;
  321             case '\u00D0': //   [LATIN CAPITAL LETTER ETH]
  322             case '\u010E': // ?  [LATIN CAPITAL LETTER D WITH CARON]
  323             case '\u0110': // ?  [LATIN CAPITAL LETTER D WITH STROKE]
  324             case '\u0189': // ?  [LATIN CAPITAL LETTER AFRICAN D]
  325             case '\u018A': // ?  [LATIN CAPITAL LETTER D WITH HOOK]
  326             case '\u018B': // ?  [LATIN CAPITAL LETTER D WITH TOPBAR]
  327             case '\u1D05': // ?  [LATIN LETTER SMALL CAPITAL D]
  328             case '\u1D06': // ?  [LATIN LETTER SMALL CAPITAL ETH]
  329             case '\u1E0A': // ?  [LATIN CAPITAL LETTER D WITH DOT ABOVE]
  330             case '\u1E0C': // ?  [LATIN CAPITAL LETTER D WITH DOT BELOW]
  331             case '\u1E0E': // ?  [LATIN CAPITAL LETTER D WITH LINE BELOW]
  332             case '\u1E10': // ?  [LATIN CAPITAL LETTER D WITH CEDILLA]
  333             case '\u1E12': // ?  [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
  334             case '\u24B9': // ?  [CIRCLED LATIN CAPITAL LETTER D]
  335             case '\uA779': // ?  [LATIN CAPITAL LETTER INSULAR D]
  336             case '\uFF24': // ?  [FULLWIDTH LATIN CAPITAL LETTER D]
  337               output[outputPos++] = 'D';
  338               break;
  339             case '\u00F0': //   [LATIN SMALL LETTER ETH]
  340             case '\u010F': // ?  [LATIN SMALL LETTER D WITH CARON]
  341             case '\u0111': // ?  [LATIN SMALL LETTER D WITH STROKE]
  342             case '\u018C': // ?  [LATIN SMALL LETTER D WITH TOPBAR]
  343             case '\u0221': // ?  [LATIN SMALL LETTER D WITH CURL]
  344             case '\u0256': // ?  [LATIN SMALL LETTER D WITH TAIL]
  345             case '\u0257': // ?  [LATIN SMALL LETTER D WITH HOOK]
  346             case '\u1D6D': // ?  [LATIN SMALL LETTER D WITH MIDDLE TILDE]
  347             case '\u1D81': // ?  [LATIN SMALL LETTER D WITH PALATAL HOOK]
  348             case '\u1D91': // ?  [LATIN SMALL LETTER D WITH HOOK AND TAIL]
  349             case '\u1E0B': // ?  [LATIN SMALL LETTER D WITH DOT ABOVE]
  350             case '\u1E0D': // ?  [LATIN SMALL LETTER D WITH DOT BELOW]
  351             case '\u1E0F': // ?  [LATIN SMALL LETTER D WITH LINE BELOW]
  352             case '\u1E11': // ?  [LATIN SMALL LETTER D WITH CEDILLA]
  353             case '\u1E13': // ?  [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
  354             case '\u24D3': // ?  [CIRCLED LATIN SMALL LETTER D]
  355             case '\uA77A': // ?  [LATIN SMALL LETTER INSULAR D]
  356             case '\uFF44': // ?  [FULLWIDTH LATIN SMALL LETTER D]
  357               output[outputPos++] = 'd';
  358               break;
  359             case '\u01C4': // ?  [LATIN CAPITAL LETTER DZ WITH CARON]
  360             case '\u01F1': // ?  [LATIN CAPITAL LETTER DZ]
  361               output[outputPos++] = 'D';
  362               output[outputPos++] = 'Z';
  363               break;
  364             case '\u01C5': // ?  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
  365             case '\u01F2': // ?  [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
  366               output[outputPos++] = 'D';
  367               output[outputPos++] = 'z';
  368               break;
  369             case '\u249F': // ?  [PARENTHESIZED LATIN SMALL LETTER D]
  370               output[outputPos++] = '(';
  371               output[outputPos++] = 'd';
  372               output[outputPos++] = ')';
  373               break;
  374             case '\u0238': // ?  [LATIN SMALL LETTER DB DIGRAPH]
  375               output[outputPos++] = 'd';
  376               output[outputPos++] = 'b';
  377               break;
  378             case '\u01C6': // ?  [LATIN SMALL LETTER DZ WITH CARON]
  379             case '\u01F3': // ?  [LATIN SMALL LETTER DZ]
  380             case '\u02A3': // ?  [LATIN SMALL LETTER DZ DIGRAPH]
  381             case '\u02A5': // ?  [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
  382               output[outputPos++] = 'd';
  383               output[outputPos++] = 'z';
  384               break;
  385             case '\u00C8': //   [LATIN CAPITAL LETTER E WITH GRAVE]
  386             case '\u00C9': //   [LATIN CAPITAL LETTER E WITH ACUTE]
  387             case '\u00CA': //   [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
  388             case '\u00CB': //   [LATIN CAPITAL LETTER E WITH DIAERESIS]
  389             case '\u0112': // ?  [LATIN CAPITAL LETTER E WITH MACRON]
  390             case '\u0114': // ?  [LATIN CAPITAL LETTER E WITH BREVE]
  391             case '\u0116': // ?  [LATIN CAPITAL LETTER E WITH DOT ABOVE]
  392             case '\u0118': // ?  [LATIN CAPITAL LETTER E WITH OGONEK]
  393             case '\u011A': // ?  [LATIN CAPITAL LETTER E WITH CARON]
  394             case '\u018E': // ?  [LATIN CAPITAL LETTER REVERSED E]
  395             case '\u0190': // ?  [LATIN CAPITAL LETTER OPEN E]
  396             case '\u0204': // ?  [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
  397             case '\u0206': // ?  [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
  398             case '\u0228': // ?  [LATIN CAPITAL LETTER E WITH CEDILLA]
  399             case '\u0246': // ?  [LATIN CAPITAL LETTER E WITH STROKE]
  400             case '\u1D07': // ?  [LATIN LETTER SMALL CAPITAL E]
  401             case '\u1E14': // ?  [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
  402             case '\u1E16': // ?  [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
  403             case '\u1E18': // ?  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
  404             case '\u1E1A': // ?  [LATIN CAPITAL LETTER E WITH TILDE BELOW]
  405             case '\u1E1C': // ?  [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
  406             case '\u1EB8': // ?  [LATIN CAPITAL LETTER E WITH DOT BELOW]
  407             case '\u1EBA': // ?  [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
  408             case '\u1EBC': // ?  [LATIN CAPITAL LETTER E WITH TILDE]
  409             case '\u1EBE': // ?  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
  410             case '\u1EC0': // ?  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
  411             case '\u1EC2': // ?  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
  412             case '\u1EC4': // ?  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
  413             case '\u1EC6': // ?  [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
  414             case '\u24BA': // ?  [CIRCLED LATIN CAPITAL LETTER E]
  415             case '\u2C7B': // ?  [LATIN LETTER SMALL CAPITAL TURNED E]
  416             case '\uFF25': // ?  [FULLWIDTH LATIN CAPITAL LETTER E]
  417               output[outputPos++] = 'E';
  418               break;
  419             case '\u00E8': //   [LATIN SMALL LETTER E WITH GRAVE]
  420             case '\u00E9': //   [LATIN SMALL LETTER E WITH ACUTE]
  421             case '\u00EA': //   [LATIN SMALL LETTER E WITH CIRCUMFLEX]
  422             case '\u00EB': //   [LATIN SMALL LETTER E WITH DIAERESIS]
  423             case '\u0113': // ?  [LATIN SMALL LETTER E WITH MACRON]
  424             case '\u0115': // ?  [LATIN SMALL LETTER E WITH BREVE]
  425             case '\u0117': // ?  [LATIN SMALL LETTER E WITH DOT ABOVE]
  426             case '\u0119': // ?  [LATIN SMALL LETTER E WITH OGONEK]
  427             case '\u011B': // ?  [LATIN SMALL LETTER E WITH CARON]
  428             case '\u01DD': // ?  [LATIN SMALL LETTER TURNED E]
  429             case '\u0205': // ?  [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
  430             case '\u0207': // ?  [LATIN SMALL LETTER E WITH INVERTED BREVE]
  431             case '\u0229': // ?  [LATIN SMALL LETTER E WITH CEDILLA]
  432             case '\u0247': // ?  [LATIN SMALL LETTER E WITH STROKE]
  433             case '\u0258': // ?  [LATIN SMALL LETTER REVERSED E]
  434             case '\u025B': // ?  [LATIN SMALL LETTER OPEN E]
  435             case '\u025C': // ?  [LATIN SMALL LETTER REVERSED OPEN E]
  436             case '\u025D': // ?  [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
  437             case '\u025E': // ?  [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
  438             case '\u029A': // ?  [LATIN SMALL LETTER CLOSED OPEN E]
  439             case '\u1D08': // ?  [LATIN SMALL LETTER TURNED OPEN E]
  440             case '\u1D92': // ?  [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
  441             case '\u1D93': // ?  [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
  442             case '\u1D94': // ?  [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
  443             case '\u1E15': // ?  [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
  444             case '\u1E17': // ?  [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
  445             case '\u1E19': // ?  [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
  446             case '\u1E1B': // ?  [LATIN SMALL LETTER E WITH TILDE BELOW]
  447             case '\u1E1D': // ?  [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
  448             case '\u1EB9': // ?  [LATIN SMALL LETTER E WITH DOT BELOW]
  449             case '\u1EBB': // ?  [LATIN SMALL LETTER E WITH HOOK ABOVE]
  450             case '\u1EBD': // ?  [LATIN SMALL LETTER E WITH TILDE]
  451             case '\u1EBF': // ?  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
  452             case '\u1EC1': // ?  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
  453             case '\u1EC3': // ?  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
  454             case '\u1EC5': // ?  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
  455             case '\u1EC7': // ?  [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
  456             case '\u2091': // ?  [LATIN SUBSCRIPT SMALL LETTER E]
  457             case '\u24D4': // ?  [CIRCLED LATIN SMALL LETTER E]
  458             case '\u2C78': // ?  [LATIN SMALL LETTER E WITH NOTCH]
  459             case '\uFF45': // ?  [FULLWIDTH LATIN SMALL LETTER E]
  460               output[outputPos++] = 'e';
  461               break;
  462             case '\u24A0': // ?  [PARENTHESIZED LATIN SMALL LETTER E]
  463               output[outputPos++] = '(';
  464               output[outputPos++] = 'e';
  465               output[outputPos++] = ')';
  466               break;
  467             case '\u0191': // ?  [LATIN CAPITAL LETTER F WITH HOOK]
  468             case '\u1E1E': // ?  [LATIN CAPITAL LETTER F WITH DOT ABOVE]
  469             case '\u24BB': // ?  [CIRCLED LATIN CAPITAL LETTER F]
  470             case '\uA730': // ?  [LATIN LETTER SMALL CAPITAL F]
  471             case '\uA77B': // ?  [LATIN CAPITAL LETTER INSULAR F]
  472             case '\uA7FB': // ?  [LATIN EPIGRAPHIC LETTER REVERSED F]
  473             case '\uFF26': // ?  [FULLWIDTH LATIN CAPITAL LETTER F]
  474               output[outputPos++] = 'F';
  475               break;
  476             case '\u0192': // ?  [LATIN SMALL LETTER F WITH HOOK]
  477             case '\u1D6E': // ?  [LATIN SMALL LETTER F WITH MIDDLE TILDE]
  478             case '\u1D82': // ?  [LATIN SMALL LETTER F WITH PALATAL HOOK]
  479             case '\u1E1F': // ?  [LATIN SMALL LETTER F WITH DOT ABOVE]
  480             case '\u1E9B': // ?  [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
  481             case '\u24D5': // ?  [CIRCLED LATIN SMALL LETTER F]
  482             case '\uA77C': // ?  [LATIN SMALL LETTER INSULAR F]
  483             case '\uFF46': // ?  [FULLWIDTH LATIN SMALL LETTER F]
  484               output[outputPos++] = 'f';
  485               break;
  486             case '\u24A1': // ?  [PARENTHESIZED LATIN SMALL LETTER F]
  487               output[outputPos++] = '(';
  488               output[outputPos++] = 'f';
  489               output[outputPos++] = ')';
  490               break;
  491             case '\uFB00': // ?  [LATIN SMALL LIGATURE FF]
  492               output[outputPos++] = 'f';
  493               output[outputPos++] = 'f';
  494               break;
  495             case '\uFB03': // ?  [LATIN SMALL LIGATURE FFI]
  496               output[outputPos++] = 'f';
  497               output[outputPos++] = 'f';
  498               output[outputPos++] = 'i';
  499               break;
  500             case '\uFB04': // ?  [LATIN SMALL LIGATURE FFL]
  501               output[outputPos++] = 'f';
  502               output[outputPos++] = 'f';
  503               output[outputPos++] = 'l';
  504               break;
  505             case '\uFB01': // ?  [LATIN SMALL LIGATURE FI]
  506               output[outputPos++] = 'f';
  507               output[outputPos++] = 'i';
  508               break;
  509             case '\uFB02': // ?  [LATIN SMALL LIGATURE FL]
  510               output[outputPos++] = 'f';
  511               output[outputPos++] = 'l';
  512               break;
  513             case '\u011C': // ?  [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
  514             case '\u011E': // ?  [LATIN CAPITAL LETTER G WITH BREVE]
  515             case '\u0120': // ?  [LATIN CAPITAL LETTER G WITH DOT ABOVE]
  516             case '\u0122': // ?  [LATIN CAPITAL LETTER G WITH CEDILLA]
  517             case '\u0193': // ?  [LATIN CAPITAL LETTER G WITH HOOK]
  518             case '\u01E4': // ?  [LATIN CAPITAL LETTER G WITH STROKE]
  519             case '\u01E5': // ?  [LATIN SMALL LETTER G WITH STROKE]
  520             case '\u01E6': // ?  [LATIN CAPITAL LETTER G WITH CARON]
  521             case '\u01E7': // ?  [LATIN SMALL LETTER G WITH CARON]
  522             case '\u01F4': // ?  [LATIN CAPITAL LETTER G WITH ACUTE]
  523             case '\u0262': // ?  [LATIN LETTER SMALL CAPITAL G]
  524             case '\u029B': // ?  [LATIN LETTER SMALL CAPITAL G WITH HOOK]
  525             case '\u1E20': // ?  [LATIN CAPITAL LETTER G WITH MACRON]
  526             case '\u24BC': // ?  [CIRCLED LATIN CAPITAL LETTER G]
  527             case '\uA77D': // ?  [LATIN CAPITAL LETTER INSULAR G]
  528             case '\uA77E': // ?  [LATIN CAPITAL LETTER TURNED INSULAR G]
  529             case '\uFF27': // ?  [FULLWIDTH LATIN CAPITAL LETTER G]
  530               output[outputPos++] = 'G';
  531               break;
  532             case '\u011D': // ?  [LATIN SMALL LETTER G WITH CIRCUMFLEX]
  533             case '\u011F': // ?  [LATIN SMALL LETTER G WITH BREVE]
  534             case '\u0121': // ?  [LATIN SMALL LETTER G WITH DOT ABOVE]
  535             case '\u0123': // ?  [LATIN SMALL LETTER G WITH CEDILLA]
  536             case '\u01F5': // ?  [LATIN SMALL LETTER G WITH ACUTE]
  537             case '\u0260': // ?  [LATIN SMALL LETTER G WITH HOOK]
  538             case '\u0261': // ?  [LATIN SMALL LETTER SCRIPT G]
  539             case '\u1D77': // ?  [LATIN SMALL LETTER TURNED G]
  540             case '\u1D79': // ?  [LATIN SMALL LETTER INSULAR G]
  541             case '\u1D83': // ?  [LATIN SMALL LETTER G WITH PALATAL HOOK]
  542             case '\u1E21': // ?  [LATIN SMALL LETTER G WITH MACRON]
  543             case '\u24D6': // ?  [CIRCLED LATIN SMALL LETTER G]
  544             case '\uA77F': // ?  [LATIN SMALL LETTER TURNED INSULAR G]
  545             case '\uFF47': // ?  [FULLWIDTH LATIN SMALL LETTER G]
  546               output[outputPos++] = 'g';
  547               break;
  548             case '\u24A2': // ?  [PARENTHESIZED LATIN SMALL LETTER G]
  549               output[outputPos++] = '(';
  550               output[outputPos++] = 'g';
  551               output[outputPos++] = ')';
  552               break;
  553             case '\u0124': // ?  [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
  554             case '\u0126': // ?  [LATIN CAPITAL LETTER H WITH STROKE]
  555             case '\u021E': // ?  [LATIN CAPITAL LETTER H WITH CARON]
  556             case '\u029C': // ?  [LATIN LETTER SMALL CAPITAL H]
  557             case '\u1E22': // ?  [LATIN CAPITAL LETTER H WITH DOT ABOVE]
  558             case '\u1E24': // ?  [LATIN CAPITAL LETTER H WITH DOT BELOW]
  559             case '\u1E26': // ?  [LATIN CAPITAL LETTER H WITH DIAERESIS]
  560             case '\u1E28': // ?  [LATIN CAPITAL LETTER H WITH CEDILLA]
  561             case '\u1E2A': // ?  [LATIN CAPITAL LETTER H WITH BREVE BELOW]
  562             case '\u24BD': // ?  [CIRCLED LATIN CAPITAL LETTER H]
  563             case '\u2C67': // ?  [LATIN CAPITAL LETTER H WITH DESCENDER]
  564             case '\u2C75': // ?  [LATIN CAPITAL LETTER HALF H]
  565             case '\uFF28': // ?  [FULLWIDTH LATIN CAPITAL LETTER H]
  566               output[outputPos++] = 'H';
  567               break;
  568             case '\u0125': // ?  [LATIN SMALL LETTER H WITH CIRCUMFLEX]
  569             case '\u0127': // ?  [LATIN SMALL LETTER H WITH STROKE]
  570             case '\u021F': // ?  [LATIN SMALL LETTER H WITH CARON]
  571             case '\u0265': // ?  [LATIN SMALL LETTER TURNED H]
  572             case '\u0266': // ?  [LATIN SMALL LETTER H WITH HOOK]
  573             case '\u02AE': // ?  [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
  574             case '\u02AF': // ?  [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
  575             case '\u1E23': // ?  [LATIN SMALL LETTER H WITH DOT ABOVE]
  576             case '\u1E25': // ?  [LATIN SMALL LETTER H WITH DOT BELOW]
  577             case '\u1E27': // ?  [LATIN SMALL LETTER H WITH DIAERESIS]
  578             case '\u1E29': // ?  [LATIN SMALL LETTER H WITH CEDILLA]
  579             case '\u1E2B': // ?  [LATIN SMALL LETTER H WITH BREVE BELOW]
  580             case '\u1E96': // ?  [LATIN SMALL LETTER H WITH LINE BELOW]
  581             case '\u24D7': // ?  [CIRCLED LATIN SMALL LETTER H]
  582             case '\u2C68': // ?  [LATIN SMALL LETTER H WITH DESCENDER]
  583             case '\u2C76': // ?  [LATIN SMALL LETTER HALF H]
  584             case '\uFF48': // ?  [FULLWIDTH LATIN SMALL LETTER H]
  585               output[outputPos++] = 'h';
  586               break;
  587             case '\u01F6': // ?  http://en.wikipedia.org/wiki/Hwair  [LATIN CAPITAL LETTER HWAIR]
  588               output[outputPos++] = 'H';
  589               output[outputPos++] = 'V';
  590               break;
  591             case '\u24A3': // ?  [PARENTHESIZED LATIN SMALL LETTER H]
  592               output[outputPos++] = '(';
  593               output[outputPos++] = 'h';
  594               output[outputPos++] = ')';
  595               break;
  596             case '\u0195': // ?  [LATIN SMALL LETTER HV]
  597               output[outputPos++] = 'h';
  598               output[outputPos++] = 'v';
  599               break;
  600             case '\u00CC': //   [LATIN CAPITAL LETTER I WITH GRAVE]
  601             case '\u00CD': //   [LATIN CAPITAL LETTER I WITH ACUTE]
  602             case '\u00CE': //   [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
  603             case '\u00CF': //   [LATIN CAPITAL LETTER I WITH DIAERESIS]
  604             case '\u0128': // ?  [LATIN CAPITAL LETTER I WITH TILDE]
  605             case '\u012A': // ?  [LATIN CAPITAL LETTER I WITH MACRON]
  606             case '\u012C': // ?  [LATIN CAPITAL LETTER I WITH BREVE]
  607             case '\u012E': // ?  [LATIN CAPITAL LETTER I WITH OGONEK]
  608             case '\u0130': // ?  [LATIN CAPITAL LETTER I WITH DOT ABOVE]
  609             case '\u0196': // ?  [LATIN CAPITAL LETTER IOTA]
  610             case '\u0197': // ?  [LATIN CAPITAL LETTER I WITH STROKE]
  611             case '\u01CF': // ?  [LATIN CAPITAL LETTER I WITH CARON]
  612             case '\u0208': // ?  [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
  613             case '\u020A': // ?  [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
  614             case '\u026A': // ?  [LATIN LETTER SMALL CAPITAL I]
  615             case '\u1D7B': // ?  [LATIN SMALL CAPITAL LETTER I WITH STROKE]
  616             case '\u1E2C': // ?  [LATIN CAPITAL LETTER I WITH TILDE BELOW]
  617             case '\u1E2E': // ?  [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
  618             case '\u1EC8': // ?  [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
  619             case '\u1ECA': // ?  [LATIN CAPITAL LETTER I WITH DOT BELOW]
  620             case '\u24BE': // ?  [CIRCLED LATIN CAPITAL LETTER I]
  621             case '\uA7FE': // ?  [LATIN EPIGRAPHIC LETTER I LONGA]
  622             case '\uFF29': // ?  [FULLWIDTH LATIN CAPITAL LETTER I]
  623               output[outputPos++] = 'I';
  624               break;
  625             case '\u00EC': //   [LATIN SMALL LETTER I WITH GRAVE]
  626             case '\u00ED': //   [LATIN SMALL LETTER I WITH ACUTE]
  627             case '\u00EE': //   [LATIN SMALL LETTER I WITH CIRCUMFLEX]
  628             case '\u00EF': //   [LATIN SMALL LETTER I WITH DIAERESIS]
  629             case '\u0129': // ?  [LATIN SMALL LETTER I WITH TILDE]
  630             case '\u012B': // ?  [LATIN SMALL LETTER I WITH MACRON]
  631             case '\u012D': // ?  [LATIN SMALL LETTER I WITH BREVE]
  632             case '\u012F': // ?  [LATIN SMALL LETTER I WITH OGONEK]
  633             case '\u0131': // ?  [LATIN SMALL LETTER DOTLESS I]
  634             case '\u01D0': // ?  [LATIN SMALL LETTER I WITH CARON]
  635             case '\u0209': // ?  [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
  636             case '\u020B': // ?  [LATIN SMALL LETTER I WITH INVERTED BREVE]
  637             case '\u0268': // ?  [LATIN SMALL LETTER I WITH STROKE]
  638             case '\u1D09': // ?  [LATIN SMALL LETTER TURNED I]
  639             case '\u1D62': // ?  [LATIN SUBSCRIPT SMALL LETTER I]
  640             case '\u1D7C': // ?  [LATIN SMALL LETTER IOTA WITH STROKE]
  641             case '\u1D96': // ?  [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
  642             case '\u1E2D': // ?  [LATIN SMALL LETTER I WITH TILDE BELOW]
  643             case '\u1E2F': // ?  [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
  644             case '\u1EC9': // ?  [LATIN SMALL LETTER I WITH HOOK ABOVE]
  645             case '\u1ECB': // ?  [LATIN SMALL LETTER I WITH DOT BELOW]
  646             case '\u2071': // ?  [SUPERSCRIPT LATIN SMALL LETTER I]
  647             case '\u24D8': // ?  [CIRCLED LATIN SMALL LETTER I]
  648             case '\uFF49': // ?  [FULLWIDTH LATIN SMALL LETTER I]
  649               output[outputPos++] = 'i';
  650               break;
  651             case '\u0132': // ?  [LATIN CAPITAL LIGATURE IJ]
  652               output[outputPos++] = 'I';
  653               output[outputPos++] = 'J';
  654               break;
  655             case '\u24A4': // ?  [PARENTHESIZED LATIN SMALL LETTER I]
  656               output[outputPos++] = '(';
  657               output[outputPos++] = 'i';
  658               output[outputPos++] = ')';
  659               break;
  660             case '\u0133': // ?  [LATIN SMALL LIGATURE IJ]
  661               output[outputPos++] = 'i';
  662               output[outputPos++] = 'j';
  663               break;
  664             case '\u0134': // ?  [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
  665             case '\u0248': // ?  [LATIN CAPITAL LETTER J WITH STROKE]
  666             case '\u1D0A': // ?  [LATIN LETTER SMALL CAPITAL J]
  667             case '\u24BF': // ?  [CIRCLED LATIN CAPITAL LETTER J]
  668             case '\uFF2A': // ?  [FULLWIDTH LATIN CAPITAL LETTER J]
  669               output[outputPos++] = 'J';
  670               break;
  671             case '\u0135': // ?  [LATIN SMALL LETTER J WITH CIRCUMFLEX]
  672             case '\u01F0': // ?  [LATIN SMALL LETTER J WITH CARON]
  673             case '\u0237': // ?  [LATIN SMALL LETTER DOTLESS J]
  674             case '\u0249': // ?  [LATIN SMALL LETTER J WITH STROKE]
  675             case '\u025F': // ?  [LATIN SMALL LETTER DOTLESS J WITH STROKE]
  676             case '\u0284': // ?  [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
  677             case '\u029D': // ?  [LATIN SMALL LETTER J WITH CROSSED-TAIL]
  678             case '\u24D9': // ?  [CIRCLED LATIN SMALL LETTER J]
  679             case '\u2C7C': // ?  [LATIN SUBSCRIPT SMALL LETTER J]
  680             case '\uFF4A': // ?  [FULLWIDTH LATIN SMALL LETTER J]
  681               output[outputPos++] = 'j';
  682               break;
  683             case '\u24A5': // ?  [PARENTHESIZED LATIN SMALL LETTER J]
  684               output[outputPos++] = '(';
  685               output[outputPos++] = 'j';
  686               output[outputPos++] = ')';
  687               break;
  688             case '\u0136': // ?  [LATIN CAPITAL LETTER K WITH CEDILLA]
  689             case '\u0198': // ?  [LATIN CAPITAL LETTER K WITH HOOK]
  690             case '\u01E8': // ?  [LATIN CAPITAL LETTER K WITH CARON]
  691             case '\u1D0B': // ?  [LATIN LETTER SMALL CAPITAL K]
  692             case '\u1E30': // ?  [LATIN CAPITAL LETTER K WITH ACUTE]
  693             case '\u1E32': // ?  [LATIN CAPITAL LETTER K WITH DOT BELOW]
  694             case '\u1E34': // ?  [LATIN CAPITAL LETTER K WITH LINE BELOW]
  695             case '\u24C0': // ?  [CIRCLED LATIN CAPITAL LETTER K]
  696             case '\u2C69': // ?  [LATIN CAPITAL LETTER K WITH DESCENDER]
  697             case '\uA740': // ?  [LATIN CAPITAL LETTER K WITH STROKE]
  698             case '\uA742': // ?  [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
  699             case '\uA744': // ?  [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
  700             case '\uFF2B': // ?  [FULLWIDTH LATIN CAPITAL LETTER K]
  701               output[outputPos++] = 'K';
  702               break;
  703             case '\u0137': // ?  [LATIN SMALL LETTER K WITH CEDILLA]
  704             case '\u0199': // ?  [LATIN SMALL LETTER K WITH HOOK]
  705             case '\u01E9': // ?  [LATIN SMALL LETTER K WITH CARON]
  706             case '\u029E': // ?  [LATIN SMALL LETTER TURNED K]
  707             case '\u1D84': // ?  [LATIN SMALL LETTER K WITH PALATAL HOOK]
  708             case '\u1E31': // ?  [LATIN SMALL LETTER K WITH ACUTE]
  709             case '\u1E33': // ?  [LATIN SMALL LETTER K WITH DOT BELOW]
  710             case '\u1E35': // ?  [LATIN SMALL LETTER K WITH LINE BELOW]
  711             case '\u24DA': // ?  [CIRCLED LATIN SMALL LETTER K]
  712             case '\u2C6A': // ?  [LATIN SMALL LETTER K WITH DESCENDER]
  713             case '\uA741': // ?  [LATIN SMALL LETTER K WITH STROKE]
  714             case '\uA743': // ?  [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
  715             case '\uA745': // ?  [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
  716             case '\uFF4B': // ?  [FULLWIDTH LATIN SMALL LETTER K]
  717               output[outputPos++] = 'k';
  718               break;
  719             case '\u24A6': // ?  [PARENTHESIZED LATIN SMALL LETTER K]
  720               output[outputPos++] = '(';
  721               output[outputPos++] = 'k';
  722               output[outputPos++] = ')';
  723               break;
  724             case '\u0139': // ?  [LATIN CAPITAL LETTER L WITH ACUTE]
  725             case '\u013B': // ?  [LATIN CAPITAL LETTER L WITH CEDILLA]
  726             case '\u013D': // ?  [LATIN CAPITAL LETTER L WITH CARON]
  727             case '\u013F': // ?  [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
  728             case '\u0141': // ?  [LATIN CAPITAL LETTER L WITH STROKE]
  729             case '\u023D': // ?  [LATIN CAPITAL LETTER L WITH BAR]
  730             case '\u029F': // ?  [LATIN LETTER SMALL CAPITAL L]
  731             case '\u1D0C': // ?  [LATIN LETTER SMALL CAPITAL L WITH STROKE]
  732             case '\u1E36': // ?  [LATIN CAPITAL LETTER L WITH DOT BELOW]
  733             case '\u1E38': // ?  [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
  734             case '\u1E3A': // ?  [LATIN CAPITAL LETTER L WITH LINE BELOW]
  735             case '\u1E3C': // ?  [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
  736             case '\u24C1': // ?  [CIRCLED LATIN CAPITAL LETTER L]
  737             case '\u2C60': // ?  [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
  738             case '\u2C62': // ?  [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
  739             case '\uA746': // ?  [LATIN CAPITAL LETTER BROKEN L]
  740             case '\uA748': // ?  [LATIN CAPITAL LETTER L WITH HIGH STROKE]
  741             case '\uA780': // ?  [LATIN CAPITAL LETTER TURNED L]
  742             case '\uFF2C': // ?  [FULLWIDTH LATIN CAPITAL LETTER L]
  743               output[outputPos++] = 'L';
  744               break;
  745             case '\u013A': // ?  [LATIN SMALL LETTER L WITH ACUTE]
  746             case '\u013C': // ?  [LATIN SMALL LETTER L WITH CEDILLA]
  747             case '\u013E': // ?  [LATIN SMALL LETTER L WITH CARON]
  748             case '\u0140': // ?  [LATIN SMALL LETTER L WITH MIDDLE DOT]
  749             case '\u0142': // ?  [LATIN SMALL LETTER L WITH STROKE]
  750             case '\u019A': // ?  [LATIN SMALL LETTER L WITH BAR]
  751             case '\u0234': // ?  [LATIN SMALL LETTER L WITH CURL]
  752             case '\u026B': // ?  [LATIN SMALL LETTER L WITH MIDDLE TILDE]
  753             case '\u026C': // ?  [LATIN SMALL LETTER L WITH BELT]
  754             case '\u026D': // ?  [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
  755             case '\u1D85': // ?  [LATIN SMALL LETTER L WITH PALATAL HOOK]
  756             case '\u1E37': // ?  [LATIN SMALL LETTER L WITH DOT BELOW]
  757             case '\u1E39': // ?  [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
  758             case '\u1E3B': // ?  [LATIN SMALL LETTER L WITH LINE BELOW]
  759             case '\u1E3D': // ?  [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
  760             case '\u24DB': // ?  [CIRCLED LATIN SMALL LETTER L]
  761             case '\u2C61': // ?  [LATIN SMALL LETTER L WITH DOUBLE BAR]
  762             case '\uA747': // ?  [LATIN SMALL LETTER BROKEN L]
  763             case '\uA749': // ?  [LATIN SMALL LETTER L WITH HIGH STROKE]
  764             case '\uA781': // ?  [LATIN SMALL LETTER TURNED L]
  765             case '\uFF4C': // ?  [FULLWIDTH LATIN SMALL LETTER L]
  766               output[outputPos++] = 'l';
  767               break;
  768             case '\u01C7': // ?  [LATIN CAPITAL LETTER LJ]
  769               output[outputPos++] = 'L';
  770               output[outputPos++] = 'J';
  771               break;
  772             case '\u1EFA': // ?  [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
  773               output[outputPos++] = 'L';
  774               output[outputPos++] = 'L';
  775               break;
  776             case '\u01C8': // ?  [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
  777               output[outputPos++] = 'L';
  778               output[outputPos++] = 'j';
  779               break;
  780             case '\u24A7': // ?  [PARENTHESIZED LATIN SMALL LETTER L]
  781               output[outputPos++] = '(';
  782               output[outputPos++] = 'l';
  783               output[outputPos++] = ')';
  784               break;
  785             case '\u01C9': // ?  [LATIN SMALL LETTER LJ]
  786               output[outputPos++] = 'l';
  787               output[outputPos++] = 'j';
  788               break;
  789             case '\u1EFB': // ?  [LATIN SMALL LETTER MIDDLE-WELSH LL]
  790               output[outputPos++] = 'l';
  791               output[outputPos++] = 'l';
  792               break;
  793             case '\u02AA': // ?  [LATIN SMALL LETTER LS DIGRAPH]
  794               output[outputPos++] = 'l';
  795               output[outputPos++] = 's';
  796               break;
  797             case '\u02AB': // ?  [LATIN SMALL LETTER LZ DIGRAPH]
  798               output[outputPos++] = 'l';
  799               output[outputPos++] = 'z';
  800               break;
  801             case '\u019C': // ?  [LATIN CAPITAL LETTER TURNED M]
  802             case '\u1D0D': // ?  [LATIN LETTER SMALL CAPITAL M]
  803             case '\u1E3E': // ?  [LATIN CAPITAL LETTER M WITH ACUTE]
  804             case '\u1E40': // ?  [LATIN CAPITAL LETTER M WITH DOT ABOVE]
  805             case '\u1E42': // ?  [LATIN CAPITAL LETTER M WITH DOT BELOW]
  806             case '\u24C2': // ?  [CIRCLED LATIN CAPITAL LETTER M]
  807             case '\u2C6E': // ?  [LATIN CAPITAL LETTER M WITH HOOK]
  808             case '\uA7FD': // ?  [LATIN EPIGRAPHIC LETTER INVERTED M]
  809             case '\uA7FF': // ?  [LATIN EPIGRAPHIC LETTER ARCHAIC M]
  810             case '\uFF2D': // ?  [FULLWIDTH LATIN CAPITAL LETTER M]
  811               output[outputPos++] = 'M';
  812               break;
  813             case '\u026F': // ?  [LATIN SMALL LETTER TURNED M]
  814             case '\u0270': // ?  [LATIN SMALL LETTER TURNED M WITH LONG LEG]
  815             case '\u0271': // ?  [LATIN SMALL LETTER M WITH HOOK]
  816             case '\u1D6F': // ?  [LATIN SMALL LETTER M WITH MIDDLE TILDE]
  817             case '\u1D86': // ?  [LATIN SMALL LETTER M WITH PALATAL HOOK]
  818             case '\u1E3F': // ?  [LATIN SMALL LETTER M WITH ACUTE]
  819             case '\u1E41': // ?  [LATIN SMALL LETTER M WITH DOT ABOVE]
  820             case '\u1E43': // ?  [LATIN SMALL LETTER M WITH DOT BELOW]
  821             case '\u24DC': // ?  [CIRCLED LATIN SMALL LETTER M]
  822             case '\uFF4D': // ?  [FULLWIDTH LATIN SMALL LETTER M]
  823               output[outputPos++] = 'm';
  824               break;
  825             case '\u24A8': // ?  [PARENTHESIZED LATIN SMALL LETTER M]
  826               output[outputPos++] = '(';
  827               output[outputPos++] = 'm';
  828               output[outputPos++] = ')';
  829               break;
  830             case '\u00D1': //   [LATIN CAPITAL LETTER N WITH TILDE]
  831             case '\u0143': // ?  [LATIN CAPITAL LETTER N WITH ACUTE]
  832             case '\u0145': // ?  [LATIN CAPITAL LETTER N WITH CEDILLA]
  833             case '\u0147': // ?  [LATIN CAPITAL LETTER N WITH CARON]
  834             case '\u014A': // ?  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN CAPITAL LETTER ENG]
  835             case '\u019D': // ?  [LATIN CAPITAL LETTER N WITH LEFT HOOK]
  836             case '\u01F8': // ?  [LATIN CAPITAL LETTER N WITH GRAVE]
  837             case '\u0220': // ?  [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
  838             case '\u0274': // ?  [LATIN LETTER SMALL CAPITAL N]
  839             case '\u1D0E': // ?  [LATIN LETTER SMALL CAPITAL REVERSED N]
  840             case '\u1E44': // ?  [LATIN CAPITAL LETTER N WITH DOT ABOVE]
  841             case '\u1E46': // ?  [LATIN CAPITAL LETTER N WITH DOT BELOW]
  842             case '\u1E48': // ?  [LATIN CAPITAL LETTER N WITH LINE BELOW]
  843             case '\u1E4A': // ?  [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
  844             case '\u24C3': // ?  [CIRCLED LATIN CAPITAL LETTER N]
  845             case '\uFF2E': // ?  [FULLWIDTH LATIN CAPITAL LETTER N]
  846               output[outputPos++] = 'N';
  847               break;
  848             case '\u00F1': //   [LATIN SMALL LETTER N WITH TILDE]
  849             case '\u0144': // ?  [LATIN SMALL LETTER N WITH ACUTE]
  850             case '\u0146': // ?  [LATIN SMALL LETTER N WITH CEDILLA]
  851             case '\u0148': // ?  [LATIN SMALL LETTER N WITH CARON]
  852             case '\u0149': // ?  [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
  853             case '\u014B': // ?  http://en.wikipedia.org/wiki/Eng_(letter)  [LATIN SMALL LETTER ENG]
  854             case '\u019E': // ?  [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
  855             case '\u01F9': // ?  [LATIN SMALL LETTER N WITH GRAVE]
  856             case '\u0235': // ?  [LATIN SMALL LETTER N WITH CURL]
  857             case '\u0272': // ?  [LATIN SMALL LETTER N WITH LEFT HOOK]
  858             case '\u0273': // ?  [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
  859             case '\u1D70': // ?  [LATIN SMALL LETTER N WITH MIDDLE TILDE]
  860             case '\u1D87': // ?  [LATIN SMALL LETTER N WITH PALATAL HOOK]
  861             case '\u1E45': // ?  [LATIN SMALL LETTER N WITH DOT ABOVE]
  862             case '\u1E47': // ?  [LATIN SMALL LETTER N WITH DOT BELOW]
  863             case '\u1E49': // ?  [LATIN SMALL LETTER N WITH LINE BELOW]
  864             case '\u1E4B': // ?  [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
  865             case '\u207F': // ?  [SUPERSCRIPT LATIN SMALL LETTER N]
  866             case '\u24DD': // ?  [CIRCLED LATIN SMALL LETTER N]
  867             case '\uFF4E': // ?  [FULLWIDTH LATIN SMALL LETTER N]
  868               output[outputPos++] = 'n';
  869               break;
  870             case '\u01CA': // ?  [LATIN CAPITAL LETTER NJ]
  871               output[outputPos++] = 'N';
  872               output[outputPos++] = 'J';
  873               break;
  874             case '\u01CB': // ?  [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
  875               output[outputPos++] = 'N';
  876               output[outputPos++] = 'j';
  877               break;
  878             case '\u24A9': // ?  [PARENTHESIZED LATIN SMALL LETTER N]
  879               output[outputPos++] = '(';
  880               output[outputPos++] = 'n';
  881               output[outputPos++] = ')';
  882               break;
  883             case '\u01CC': // ?  [LATIN SMALL LETTER NJ]
  884               output[outputPos++] = 'n';
  885               output[outputPos++] = 'j';
  886               break;
  887             case '\u00D2': //   [LATIN CAPITAL LETTER O WITH GRAVE]
  888             case '\u00D3': //   [LATIN CAPITAL LETTER O WITH ACUTE]
  889             case '\u00D4': //   [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
  890             case '\u00D5': //   [LATIN CAPITAL LETTER O WITH TILDE]
  891             case '\u00D6': //   [LATIN CAPITAL LETTER O WITH DIAERESIS]
  892             case '\u00D8': //   [LATIN CAPITAL LETTER O WITH STROKE]
  893             case '\u014C': // ?  [LATIN CAPITAL LETTER O WITH MACRON]
  894             case '\u014E': // ?  [LATIN CAPITAL LETTER O WITH BREVE]
  895             case '\u0150': // ?  [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
  896             case '\u0186': // ?  [LATIN CAPITAL LETTER OPEN O]
  897             case '\u019F': // ?  [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
  898             case '\u01A0': // ?  [LATIN CAPITAL LETTER O WITH HORN]
  899             case '\u01D1': // ?  [LATIN CAPITAL LETTER O WITH CARON]
  900             case '\u01EA': // ?  [LATIN CAPITAL LETTER O WITH OGONEK]
  901             case '\u01EC': // ?  [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
  902             case '\u01FE': // ?  [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
  903             case '\u020C': // ?  [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
  904             case '\u020E': // ?  [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
  905             case '\u022A': // ?  [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
  906             case '\u022C': // ?  [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
  907             case '\u022E': // ?  [LATIN CAPITAL LETTER O WITH DOT ABOVE]
  908             case '\u0230': // ?  [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
  909             case '\u1D0F': // ?  [LATIN LETTER SMALL CAPITAL O]
  910             case '\u1D10': // ?  [LATIN LETTER SMALL CAPITAL OPEN O]
  911             case '\u1E4C': // ?  [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
  912             case '\u1E4E': // ?  [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
  913             case '\u1E50': // ?  [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
  914             case '\u1E52': // ?  [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
  915             case '\u1ECC': // ?  [LATIN CAPITAL LETTER O WITH DOT BELOW]
  916             case '\u1ECE': // ?  [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
  917             case '\u1ED0': // ?  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
  918             case '\u1ED2': // ?  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
  919             case '\u1ED4': // ?  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
  920             case '\u1ED6': // ?  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
  921             case '\u1ED8': // ?  [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
  922             case '\u1EDA': // ?  [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
  923             case '\u1EDC': // ?  [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
  924             case '\u1EDE': // ?  [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
  925             case '\u1EE0': // ?  [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
  926             case '\u1EE2': // ?  [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
  927             case '\u24C4': // ?  [CIRCLED LATIN CAPITAL LETTER O]
  928             case '\uA74A': // ?  [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
  929             case '\uA74C': // ?  [LATIN CAPITAL LETTER O WITH LOOP]
  930             case '\uFF2F': // ?  [FULLWIDTH LATIN CAPITAL LETTER O]
  931               output[outputPos++] = 'O';
  932               break;
  933             case '\u00F2': //   [LATIN SMALL LETTER O WITH GRAVE]
  934             case '\u00F3': //   [LATIN SMALL LETTER O WITH ACUTE]
  935             case '\u00F4': //   [LATIN SMALL LETTER O WITH CIRCUMFLEX]
  936             case '\u00F5': //   [LATIN SMALL LETTER O WITH TILDE]
  937             case '\u00F6': //   [LATIN SMALL LETTER O WITH DIAERESIS]
  938             case '\u00F8': //   [LATIN SMALL LETTER O WITH STROKE]
  939             case '\u014D': // ?  [LATIN SMALL LETTER O WITH MACRON]
  940             case '\u014F': // ?  [LATIN SMALL LETTER O WITH BREVE]
  941             case '\u0151': // ?  [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
  942             case '\u01A1': // ?  [LATIN SMALL LETTER O WITH HORN]
  943             case '\u01D2': // ?  [LATIN SMALL LETTER O WITH CARON]
  944             case '\u01EB': // ?  [LATIN SMALL LETTER O WITH OGONEK]
  945             case '\u01ED': // ?  [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
  946             case '\u01FF': // ?  [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
  947             case '\u020D': // ?  [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
  948             case '\u020F': // ?  [LATIN SMALL LETTER O WITH INVERTED BREVE]
  949             case '\u022B': // ?  [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
  950             case '\u022D': // ?  [LATIN SMALL LETTER O WITH TILDE AND MACRON]
  951             case '\u022F': // ?  [LATIN SMALL LETTER O WITH DOT ABOVE]
  952             case '\u0231': // ?  [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
  953             case '\u0254': // ?  [LATIN SMALL LETTER OPEN O]
  954             case '\u0275': // ?  [LATIN SMALL LETTER BARRED O]
  955             case '\u1D16': // ?  [LATIN SMALL LETTER TOP HALF O]
  956             case '\u1D17': // ?  [LATIN SMALL LETTER BOTTOM HALF O]
  957             case '\u1D97': // ?  [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
  958             case '\u1E4D': // ?  [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
  959             case '\u1E4F': // ?  [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
  960             case '\u1E51': // ?  [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
  961             case '\u1E53': // ?  [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
  962             case '\u1ECD': // ?  [LATIN SMALL LETTER O WITH DOT BELOW]
  963             case '\u1ECF': // ?  [LATIN SMALL LETTER O WITH HOOK ABOVE]
  964             case '\u1ED1': // ?  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
  965             case '\u1ED3': // ?  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
  966             case '\u1ED5': // ?  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
  967             case '\u1ED7': // ?  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
  968             case '\u1ED9': // ?  [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
  969             case '\u1EDB': // ?  [LATIN SMALL LETTER O WITH HORN AND ACUTE]
  970             case '\u1EDD': // ?  [LATIN SMALL LETTER O WITH HORN AND GRAVE]
  971             case '\u1EDF': // ?  [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
  972             case '\u1EE1': // ?  [LATIN SMALL LETTER O WITH HORN AND TILDE]
  973             case '\u1EE3': // ?  [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
  974             case '\u2092': // ?  [LATIN SUBSCRIPT SMALL LETTER O]
  975             case '\u24DE': // ?  [CIRCLED LATIN SMALL LETTER O]
  976             case '\u2C7A': // ?  [LATIN SMALL LETTER O WITH LOW RING INSIDE]
  977             case '\uA74B': // ?  [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
  978             case '\uA74D': // ?  [LATIN SMALL LETTER O WITH LOOP]
  979             case '\uFF4F': // ?  [FULLWIDTH LATIN SMALL LETTER O]
  980               output[outputPos++] = 'o';
  981               break;
  982             case '\u0152': // ?  [LATIN CAPITAL LIGATURE OE]
  983             case '\u0276': // ?  [LATIN LETTER SMALL CAPITAL OE]
  984               output[outputPos++] = 'O';
  985               output[outputPos++] = 'E';
  986               break;
  987             case '\uA74E': // ?  [LATIN CAPITAL LETTER OO]
  988               output[outputPos++] = 'O';
  989               output[outputPos++] = 'O';
  990               break;
  991             case '\u0222': // ?  http://en.wikipedia.org/wiki/OU  [LATIN CAPITAL LETTER OU]
  992             case '\u1D15': // ?  [LATIN LETTER SMALL CAPITAL OU]
  993               output[outputPos++] = 'O';
  994               output[outputPos++] = 'U';
  995               break;
  996             case '\u24AA': // ?  [PARENTHESIZED LATIN SMALL LETTER O]
  997               output[outputPos++] = '(';
  998               output[outputPos++] = 'o';
  999               output[outputPos++] = ')';
 1000               break;
 1001             case '\u0153': // ?  [LATIN SMALL LIGATURE OE]
 1002             case '\u1D14': // ?  [LATIN SMALL LETTER TURNED OE]
 1003               output[outputPos++] = 'o';
 1004               output[outputPos++] = 'e';
 1005               break;
 1006             case '\uA74F': // ?  [LATIN SMALL LETTER OO]
 1007               output[outputPos++] = 'o';
 1008               output[outputPos++] = 'o';
 1009               break;
 1010             case '\u0223': // ?  http://en.wikipedia.org/wiki/OU  [LATIN SMALL LETTER OU]
 1011               output[outputPos++] = 'o';
 1012               output[outputPos++] = 'u';
 1013               break;
 1014             case '\u01A4': // ?  [LATIN CAPITAL LETTER P WITH HOOK]
 1015             case '\u1D18': // ?  [LATIN LETTER SMALL CAPITAL P]
 1016             case '\u1E54': // ?  [LATIN CAPITAL LETTER P WITH ACUTE]
 1017             case '\u1E56': // ?  [LATIN CAPITAL LETTER P WITH DOT ABOVE]
 1018             case '\u24C5': // ?  [CIRCLED LATIN CAPITAL LETTER P]
 1019             case '\u2C63': // ?  [LATIN CAPITAL LETTER P WITH STROKE]
 1020             case '\uA750': // ?  [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
 1021             case '\uA752': // ?  [LATIN CAPITAL LETTER P WITH FLOURISH]
 1022             case '\uA754': // ?  [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
 1023             case '\uFF30': // ?  [FULLWIDTH LATIN CAPITAL LETTER P]
 1024               output[outputPos++] = 'P';
 1025               break;
 1026             case '\u01A5': // ?  [LATIN SMALL LETTER P WITH HOOK]
 1027             case '\u1D71': // ?  [LATIN SMALL LETTER P WITH MIDDLE TILDE]
 1028             case '\u1D7D': // ?  [LATIN SMALL LETTER P WITH STROKE]
 1029             case '\u1D88': // ?  [LATIN SMALL LETTER P WITH PALATAL HOOK]
 1030             case '\u1E55': // ?  [LATIN SMALL LETTER P WITH ACUTE]
 1031             case '\u1E57': // ?  [LATIN SMALL LETTER P WITH DOT ABOVE]
 1032             case '\u24DF': // ?  [CIRCLED LATIN SMALL LETTER P]
 1033             case '\uA751': // ?  [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
 1034             case '\uA753': // ?  [LATIN SMALL LETTER P WITH FLOURISH]
 1035             case '\uA755': // ?  [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
 1036             case '\uA7FC': // ?  [LATIN EPIGRAPHIC LETTER REVERSED P]
 1037             case '\uFF50': // ?  [FULLWIDTH LATIN SMALL LETTER P]
 1038               output[outputPos++] = 'p';
 1039               break;
 1040             case '\u24AB': // ?  [PARENTHESIZED LATIN SMALL LETTER P]
 1041               output[outputPos++] = '(';
 1042               output[outputPos++] = 'p';
 1043               output[outputPos++] = ')';
 1044               break;
 1045             case '\u024A': // ?  [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
 1046             case '\u24C6': // ?  [CIRCLED LATIN CAPITAL LETTER Q]
 1047             case '\uA756': // ?  [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
 1048             case '\uA758': // ?  [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
 1049             case '\uFF31': // ?  [FULLWIDTH LATIN CAPITAL LETTER Q]
 1050               output[outputPos++] = 'Q';
 1051               break;
 1052             case '\u0138': // ?  http://en.wikipedia.org/wiki/Kra_(letter)  [LATIN SMALL LETTER KRA]
 1053             case '\u024B': // ?  [LATIN SMALL LETTER Q WITH HOOK TAIL]
 1054             case '\u02A0': // ?  [LATIN SMALL LETTER Q WITH HOOK]
 1055             case '\u24E0': // ?  [CIRCLED LATIN SMALL LETTER Q]
 1056             case '\uA757': // ?  [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
 1057             case '\uA759': // ?  [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
 1058             case '\uFF51': // ?  [FULLWIDTH LATIN SMALL LETTER Q]
 1059               output[outputPos++] = 'q';
 1060               break;
 1061             case '\u24AC': // ?  [PARENTHESIZED LATIN SMALL LETTER Q]
 1062               output[outputPos++] = '(';
 1063               output[outputPos++] = 'q';
 1064               output[outputPos++] = ')';
 1065               break;
 1066             case '\u0239': // ?  [LATIN SMALL LETTER QP DIGRAPH]
 1067               output[outputPos++] = 'q';
 1068               output[outputPos++] = 'p';
 1069               break;
 1070             case '\u0154': // ?  [LATIN CAPITAL LETTER R WITH ACUTE]
 1071             case '\u0156': // ?  [LATIN CAPITAL LETTER R WITH CEDILLA]
 1072             case '\u0158': // ?  [LATIN CAPITAL LETTER R WITH CARON]
 1073             case '\u0210': // ?  [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
 1074             case '\u0212': // ?  [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
 1075             case '\u024C': // ?  [LATIN CAPITAL LETTER R WITH STROKE]
 1076             case '\u0280': // ?  [LATIN LETTER SMALL CAPITAL R]
 1077             case '\u0281': // ?  [LATIN LETTER SMALL CAPITAL INVERTED R]
 1078             case '\u1D19': // ?  [LATIN LETTER SMALL CAPITAL REVERSED R]
 1079             case '\u1D1A': // ?  [LATIN LETTER SMALL CAPITAL TURNED R]
 1080             case '\u1E58': // ?  [LATIN CAPITAL LETTER R WITH DOT ABOVE]
 1081             case '\u1E5A': // ?  [LATIN CAPITAL LETTER R WITH DOT BELOW]
 1082             case '\u1E5C': // ?  [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
 1083             case '\u1E5E': // ?  [LATIN CAPITAL LETTER R WITH LINE BELOW]
 1084             case '\u24C7': // ?  [CIRCLED LATIN CAPITAL LETTER R]
 1085             case '\u2C64': // ?  [LATIN CAPITAL LETTER R WITH TAIL]
 1086             case '\uA75A': // ?  [LATIN CAPITAL LETTER R ROTUNDA]
 1087             case '\uA782': // ?  [LATIN CAPITAL LETTER INSULAR R]
 1088             case '\uFF32': // ?  [FULLWIDTH LATIN CAPITAL LETTER R]
 1089               output[outputPos++] = 'R';
 1090               break;
 1091             case '\u0155': // ?  [LATIN SMALL LETTER R WITH ACUTE]
 1092             case '\u0157': // ?  [LATIN SMALL LETTER R WITH CEDILLA]
 1093             case '\u0159': // ?  [LATIN SMALL LETTER R WITH CARON]
 1094             case '\u0211': // ?  [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
 1095             case '\u0213': // ?  [LATIN SMALL LETTER R WITH INVERTED BREVE]
 1096             case '\u024D': // ?  [LATIN SMALL LETTER R WITH STROKE]
 1097             case '\u027C': // ?  [LATIN SMALL LETTER R WITH LONG LEG]
 1098             case '\u027D': // ?  [LATIN SMALL LETTER R WITH TAIL]
 1099             case '\u027E': // ?  [LATIN SMALL LETTER R WITH FISHHOOK]
 1100             case '\u027F': // ?  [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
 1101             case '\u1D63': // ?  [LATIN SUBSCRIPT SMALL LETTER R]
 1102             case '\u1D72': // ?  [LATIN SMALL LETTER R WITH MIDDLE TILDE]
 1103             case '\u1D73': // ?  [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
 1104             case '\u1D89': // ?  [LATIN SMALL LETTER R WITH PALATAL HOOK]
 1105             case '\u1E59': // ?  [LATIN SMALL LETTER R WITH DOT ABOVE]
 1106             case '\u1E5B': // ?  [LATIN SMALL LETTER R WITH DOT BELOW]
 1107             case '\u1E5D': // ?  [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
 1108             case '\u1E5F': // ?  [LATIN SMALL LETTER R WITH LINE BELOW]
 1109             case '\u24E1': // ?  [CIRCLED LATIN SMALL LETTER R]
 1110             case '\uA75B': // ?  [LATIN SMALL LETTER R ROTUNDA]
 1111             case '\uA783': // ?  [LATIN SMALL LETTER INSULAR R]
 1112             case '\uFF52': // ?  [FULLWIDTH LATIN SMALL LETTER R]
 1113               output[outputPos++] = 'r';
 1114               break;
 1115             case '\u24AD': // ?  [PARENTHESIZED LATIN SMALL LETTER R]
 1116               output[outputPos++] = '(';
 1117               output[outputPos++] = 'r';
 1118               output[outputPos++] = ')';
 1119               break;
 1120             case '\u015A': // ?  [LATIN CAPITAL LETTER S WITH ACUTE]
 1121             case '\u015C': // ?  [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
 1122             case '\u015E': // ?  [LATIN CAPITAL LETTER S WITH CEDILLA]
 1123             case '\u0160': // ?  [LATIN CAPITAL LETTER S WITH CARON]
 1124             case '\u0218': // ?  [LATIN CAPITAL LETTER S WITH COMMA BELOW]
 1125             case '\u1E60': // ?  [LATIN CAPITAL LETTER S WITH DOT ABOVE]
 1126             case '\u1E62': // ?  [LATIN CAPITAL LETTER S WITH DOT BELOW]
 1127             case '\u1E64': // ?  [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
 1128             case '\u1E66': // ?  [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
 1129             case '\u1E68': // ?  [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
 1130             case '\u24C8': // ?  [CIRCLED LATIN CAPITAL LETTER S]
 1131             case '\uA731': // ?  [LATIN LETTER SMALL CAPITAL S]
 1132             case '\uA785': // ?  [LATIN SMALL LETTER INSULAR S]
 1133             case '\uFF33': // ?  [FULLWIDTH LATIN CAPITAL LETTER S]
 1134               output[outputPos++] = 'S';
 1135               break;
 1136             case '\u015B': // ?  [LATIN SMALL LETTER S WITH ACUTE]
 1137             case '\u015D': // ?  [LATIN SMALL LETTER S WITH CIRCUMFLEX]
 1138             case '\u015F': // ?  [LATIN SMALL LETTER S WITH CEDILLA]
 1139             case '\u0161': // ?  [LATIN SMALL LETTER S WITH CARON]
 1140             case '\u017F': // ?  http://en.wikipedia.org/wiki/Long_S  [LATIN SMALL LETTER LONG S]
 1141             case '\u0219': // ?  [LATIN SMALL LETTER S WITH COMMA BELOW]
 1142             case '\u023F': // ?  [LATIN SMALL LETTER S WITH SWASH TAIL]
 1143             case '\u0282': // ?  [LATIN SMALL LETTER S WITH HOOK]
 1144             case '\u1D74': // ?  [LATIN SMALL LETTER S WITH MIDDLE TILDE]
 1145             case '\u1D8A': // ?  [LATIN SMALL LETTER S WITH PALATAL HOOK]
 1146             case '\u1E61': // ?  [LATIN SMALL LETTER S WITH DOT ABOVE]
 1147             case '\u1E63': // ?  [LATIN SMALL LETTER S WITH DOT BELOW]
 1148             case '\u1E65': // ?  [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
 1149             case '\u1E67': // ?  [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
 1150             case '\u1E69': // ?  [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
 1151             case '\u1E9C': // ?  [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
 1152             case '\u1E9D': // ?  [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
 1153             case '\u24E2': // ?  [CIRCLED LATIN SMALL LETTER S]
 1154             case '\uA784': // ?  [LATIN CAPITAL LETTER INSULAR S]
 1155             case '\uFF53': // ?  [FULLWIDTH LATIN SMALL LETTER S]
 1156               output[outputPos++] = 's';
 1157               break;
 1158             case '\u1E9E': // ?  [LATIN CAPITAL LETTER SHARP S]
 1159               output[outputPos++] = 'S';
 1160               output[outputPos++] = 'S';
 1161               break;
 1162             case '\u24AE': // ?  [PARENTHESIZED LATIN SMALL LETTER S]
 1163               output[outputPos++] = '(';
 1164               output[outputPos++] = 's';
 1165               output[outputPos++] = ')';
 1166               break;
 1167             case '\u00DF': //   [LATIN SMALL LETTER SHARP S]
 1168               output[outputPos++] = 's';
 1169               output[outputPos++] = 's';
 1170               break;
 1171             case '\uFB06': // ?  [LATIN SMALL LIGATURE ST]
 1172               output[outputPos++] = 's';
 1173               output[outputPos++] = 't';
 1174               break;
 1175             case '\u0162': // ?  [LATIN CAPITAL LETTER T WITH CEDILLA]
 1176             case '\u0164': // ?  [LATIN CAPITAL LETTER T WITH CARON]
 1177             case '\u0166': // ?  [LATIN CAPITAL LETTER T WITH STROKE]
 1178             case '\u01AC': // ?  [LATIN CAPITAL LETTER T WITH HOOK]
 1179             case '\u01AE': // ?  [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
 1180             case '\u021A': // ?  [LATIN CAPITAL LETTER T WITH COMMA BELOW]
 1181             case '\u023E': // ?  [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
 1182             case '\u1D1B': // ?  [LATIN LETTER SMALL CAPITAL T]
 1183             case '\u1E6A': // ?  [LATIN CAPITAL LETTER T WITH DOT ABOVE]
 1184             case '\u1E6C': // ?  [LATIN CAPITAL LETTER T WITH DOT BELOW]
 1185             case '\u1E6E': // ?  [LATIN CAPITAL LETTER T WITH LINE BELOW]
 1186             case '\u1E70': // ?  [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
 1187             case '\u24C9': // ?  [CIRCLED LATIN CAPITAL LETTER T]
 1188             case '\uA786': // ?  [LATIN CAPITAL LETTER INSULAR T]
 1189             case '\uFF34': // ?  [FULLWIDTH LATIN CAPITAL LETTER T]
 1190               output[outputPos++] = 'T';
 1191               break;
 1192             case '\u0163': // ?  [LATIN SMALL LETTER T WITH CEDILLA]
 1193             case '\u0165': // ?  [LATIN SMALL LETTER T WITH CARON]
 1194             case '\u0167': // ?  [LATIN SMALL LETTER T WITH STROKE]
 1195             case '\u01AB': // ?  [LATIN SMALL LETTER T WITH PALATAL HOOK]
 1196             case '\u01AD': // ?  [LATIN SMALL LETTER T WITH HOOK]
 1197             case '\u021B': // ?  [LATIN SMALL LETTER T WITH COMMA BELOW]
 1198             case '\u0236': // ?  [LATIN SMALL LETTER T WITH CURL]
 1199             case '\u0287': // ?  [LATIN SMALL LETTER TURNED T]
 1200             case '\u0288': // ?  [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
 1201             case '\u1D75': // ?  [LATIN SMALL LETTER T WITH MIDDLE TILDE]
 1202             case '\u1E6B': // ?  [LATIN SMALL LETTER T WITH DOT ABOVE]
 1203             case '\u1E6D': // ?  [LATIN SMALL LETTER T WITH DOT BELOW]
 1204             case '\u1E6F': // ?  [LATIN SMALL LETTER T WITH LINE BELOW]
 1205             case '\u1E71': // ?  [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
 1206             case '\u1E97': // ?  [LATIN SMALL LETTER T WITH DIAERESIS]
 1207             case '\u24E3': // ?  [CIRCLED LATIN SMALL LETTER T]
 1208             case '\u2C66': // ?  [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
 1209             case '\uFF54': // ?  [FULLWIDTH LATIN SMALL LETTER T]
 1210               output[outputPos++] = 't';
 1211               break;
 1212             case '\u00DE': //   [LATIN CAPITAL LETTER THORN]
 1213             case '\uA766': // ?  [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
 1214               output[outputPos++] = 'T';
 1215               output[outputPos++] = 'H';
 1216               break;
 1217             case '\uA728': // ?  [LATIN CAPITAL LETTER TZ]
 1218               output[outputPos++] = 'T';
 1219               output[outputPos++] = 'Z';
 1220               break;
 1221             case '\u24AF': // ?  [PARENTHESIZED LATIN SMALL LETTER T]
 1222               output[outputPos++] = '(';
 1223               output[outputPos++] = 't';
 1224               output[outputPos++] = ')';
 1225               break;
 1226             case '\u02A8': // ?  [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
 1227               output[outputPos++] = 't';
 1228               output[outputPos++] = 'c';
 1229               break;
 1230             case '\u00FE': //   [LATIN SMALL LETTER THORN]
 1231             case '\u1D7A': // ?  [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
 1232             case '\uA767': // ?  [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
 1233               output[outputPos++] = 't';
 1234               output[outputPos++] = 'h';
 1235               break;
 1236             case '\u02A6': // ?  [LATIN SMALL LETTER TS DIGRAPH]
 1237               output[outputPos++] = 't';
 1238               output[outputPos++] = 's';
 1239               break;
 1240             case '\uA729': // ?  [LATIN SMALL LETTER TZ]
 1241               output[outputPos++] = 't';
 1242               output[outputPos++] = 'z';
 1243               break;
 1244             case '\u00D9': //   [LATIN CAPITAL LETTER U WITH GRAVE]
 1245             case '\u00DA': //   [LATIN CAPITAL LETTER U WITH ACUTE]
 1246             case '\u00DB': //   [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
 1247             case '\u00DC': //   [LATIN CAPITAL LETTER U WITH DIAERESIS]
 1248             case '\u0168': // ?  [LATIN CAPITAL LETTER U WITH TILDE]
 1249             case '\u016A': // ?  [LATIN CAPITAL LETTER U WITH MACRON]
 1250             case '\u016C': // ?  [LATIN CAPITAL LETTER U WITH BREVE]
 1251             case '\u016E': // ?  [LATIN CAPITAL LETTER U WITH RING ABOVE]
 1252             case '\u0170': // ?  [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
 1253             case '\u0172': // ?  [LATIN CAPITAL LETTER U WITH OGONEK]
 1254             case '\u01AF': // ?  [LATIN CAPITAL LETTER U WITH HORN]
 1255             case '\u01D3': // ?  [LATIN CAPITAL LETTER U WITH CARON]
 1256             case '\u01D5': // ?  [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
 1257             case '\u01D7': // ?  [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
 1258             case '\u01D9': // ?  [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
 1259             case '\u01DB': // ?  [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
 1260             case '\u0214': // ?  [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
 1261             case '\u0216': // ?  [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
 1262             case '\u0244': // ?  [LATIN CAPITAL LETTER U BAR]
 1263             case '\u1D1C': // ?  [LATIN LETTER SMALL CAPITAL U]
 1264             case '\u1D7E': // ?  [LATIN SMALL CAPITAL LETTER U WITH STROKE]
 1265             case '\u1E72': // ?  [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
 1266             case '\u1E74': // ?  [LATIN CAPITAL LETTER U WITH TILDE BELOW]
 1267             case '\u1E76': // ?  [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
 1268             case '\u1E78': // ?  [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
 1269             case '\u1E7A': // ?  [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
 1270             case '\u1EE4': // ?  [LATIN CAPITAL LETTER U WITH DOT BELOW]
 1271             case '\u1EE6': // ?  [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
 1272             case '\u1EE8': // ?  [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
 1273             case '\u1EEA': // ?  [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
 1274             case '\u1EEC': // ?  [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
 1275             case '\u1EEE': // ?  [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
 1276             case '\u1EF0': // ?  [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
 1277             case '\u24CA': // ?  [CIRCLED LATIN CAPITAL LETTER U]
 1278             case '\uFF35': // ?  [FULLWIDTH LATIN CAPITAL LETTER U]
 1279               output[outputPos++] = 'U';
 1280               break;
 1281             case '\u00F9': //   [LATIN SMALL LETTER U WITH GRAVE]
 1282             case '\u00FA': //   [LATIN SMALL LETTER U WITH ACUTE]
 1283             case '\u00FB': //   [LATIN SMALL LETTER U WITH CIRCUMFLEX]
 1284             case '\u00FC': //   [LATIN SMALL LETTER U WITH DIAERESIS]
 1285             case '\u0169': // ?  [LATIN SMALL LETTER U WITH TILDE]
 1286             case '\u016B': // ?  [LATIN SMALL LETTER U WITH MACRON]
 1287             case '\u016D': // ?  [LATIN SMALL LETTER U WITH BREVE]
 1288             case '\u016F': // ?  [LATIN SMALL LETTER U WITH RING ABOVE]
 1289             case '\u0171': // ?  [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
 1290             case '\u0173': // ?  [LATIN SMALL LETTER U WITH OGONEK]
 1291             case '\u01B0': // ?  [LATIN SMALL LETTER U WITH HORN]
 1292             case '\u01D4': // ?  [LATIN SMALL LETTER U WITH CARON]
 1293             case '\u01D6': // ?  [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
 1294             case '\u01D8': // ?  [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
 1295             case '\u01DA': // ?  [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
 1296             case '\u01DC': // ?  [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
 1297             case '\u0215': // ?  [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
 1298             case '\u0217': // ?  [LATIN SMALL LETTER U WITH INVERTED BREVE]
 1299             case '\u0289': // ?  [LATIN SMALL LETTER U BAR]
 1300             case '\u1D64': // ?  [LATIN SUBSCRIPT SMALL LETTER U]
 1301             case '\u1D99': // ?  [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
 1302             case '\u1E73': // ?  [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
 1303             case '\u1E75': // ?  [LATIN SMALL LETTER U WITH TILDE BELOW]
 1304             case '\u1E77': // ?  [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
 1305             case '\u1E79': // ?  [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
 1306             case '\u1E7B': // ?  [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
 1307             case '\u1EE5': // ?  [LATIN SMALL LETTER U WITH DOT BELOW]
 1308             case '\u1EE7': // ?  [LATIN SMALL LETTER U WITH HOOK ABOVE]
 1309             case '\u1EE9': // ?  [LATIN SMALL LETTER U WITH HORN AND ACUTE]
 1310             case '\u1EEB': // ?  [LATIN SMALL LETTER U WITH HORN AND GRAVE]
 1311             case '\u1EED': // ?  [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
 1312             case '\u1EEF': // ?  [LATIN SMALL LETTER U WITH HORN AND TILDE]
 1313             case '\u1EF1': // ?  [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
 1314             case '\u24E4': // ?  [CIRCLED LATIN SMALL LETTER U]
 1315             case '\uFF55': // ?  [FULLWIDTH LATIN SMALL LETTER U]
 1316               output[outputPos++] = 'u';
 1317               break;
 1318             case '\u24B0': // ?  [PARENTHESIZED LATIN SMALL LETTER U]
 1319               output[outputPos++] = '(';
 1320               output[outputPos++] = 'u';
 1321               output[outputPos++] = ')';
 1322               break;
 1323             case '\u1D6B': // ?  [LATIN SMALL LETTER UE]
 1324               output[outputPos++] = 'u';
 1325               output[outputPos++] = 'e';
 1326               break;
 1327             case '\u01B2': // ?  [LATIN CAPITAL LETTER V WITH HOOK]
 1328             case '\u0245': // ?  [LATIN CAPITAL LETTER TURNED V]
 1329             case '\u1D20': // ?  [LATIN LETTER SMALL CAPITAL V]
 1330             case '\u1E7C': // ?  [LATIN CAPITAL LETTER V WITH TILDE]
 1331             case '\u1E7E': // ?  [LATIN CAPITAL LETTER V WITH DOT BELOW]
 1332             case '\u1EFC': // ?  [LATIN CAPITAL LETTER MIDDLE-WELSH V]
 1333             case '\u24CB': // ?  [CIRCLED LATIN CAPITAL LETTER V]
 1334             case '\uA75E': // ?  [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
 1335             case '\uA768': // ?  [LATIN CAPITAL LETTER VEND]
 1336             case '\uFF36': // ?  [FULLWIDTH LATIN CAPITAL LETTER V]
 1337               output[outputPos++] = 'V';
 1338               break;
 1339             case '\u028B': // ?  [LATIN SMALL LETTER V WITH HOOK]
 1340             case '\u028C': // ?  [LATIN SMALL LETTER TURNED V]
 1341             case '\u1D65': // ?  [LATIN SUBSCRIPT SMALL LETTER V]
 1342             case '\u1D8C': // ?  [LATIN SMALL LETTER V WITH PALATAL HOOK]
 1343             case '\u1E7D': // ?  [LATIN SMALL LETTER V WITH TILDE]
 1344             case '\u1E7F': // ?  [LATIN SMALL LETTER V WITH DOT BELOW]
 1345             case '\u24E5': // ?  [CIRCLED LATIN SMALL LETTER V]
 1346             case '\u2C71': // ?  [LATIN SMALL LETTER V WITH RIGHT HOOK]
 1347             case '\u2C74': // ?  [LATIN SMALL LETTER V WITH CURL]
 1348             case '\uA75F': // ?  [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
 1349             case '\uFF56': // ?  [FULLWIDTH LATIN SMALL LETTER V]
 1350               output[outputPos++] = 'v';
 1351               break;
 1352             case '\uA760': // ?  [LATIN CAPITAL LETTER VY]
 1353               output[outputPos++] = 'V';
 1354               output[outputPos++] = 'Y';
 1355               break;
 1356             case '\u24B1': // ?  [PARENTHESIZED LATIN SMALL LETTER V]
 1357               output[outputPos++] = '(';
 1358               output[outputPos++] = 'v';
 1359               output[outputPos++] = ')';
 1360               break;
 1361             case '\uA761': // ?  [LATIN SMALL LETTER VY]
 1362               output[outputPos++] = 'v';
 1363               output[outputPos++] = 'y';
 1364               break;
 1365             case '\u0174': // ?  [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
 1366             case '\u01F7': // ?  http://en.wikipedia.org/wiki/Wynn  [LATIN CAPITAL LETTER WYNN]
 1367             case '\u1D21': // ?  [LATIN LETTER SMALL CAPITAL W]
 1368             case '\u1E80': // ?  [LATIN CAPITAL LETTER W WITH GRAVE]
 1369             case '\u1E82': // ?  [LATIN CAPITAL LETTER W WITH ACUTE]
 1370             case '\u1E84': // ?  [LATIN CAPITAL LETTER W WITH DIAERESIS]
 1371             case '\u1E86': // ?  [LATIN CAPITAL LETTER W WITH DOT ABOVE]
 1372             case '\u1E88': // ?  [LATIN CAPITAL LETTER W WITH DOT BELOW]
 1373             case '\u24CC': // ?  [CIRCLED LATIN CAPITAL LETTER W]
 1374             case '\u2C72': // ?  [LATIN CAPITAL LETTER W WITH HOOK]
 1375             case '\uFF37': // ?  [FULLWIDTH LATIN CAPITAL LETTER W]
 1376               output[outputPos++] = 'W';
 1377               break;
 1378             case '\u0175': // ?  [LATIN SMALL LETTER W WITH CIRCUMFLEX]
 1379             case '\u01BF': // ?  http://en.wikipedia.org/wiki/Wynn  [LATIN LETTER WYNN]
 1380             case '\u028D': // ?  [LATIN SMALL LETTER TURNED W]
 1381             case '\u1E81': // ?  [LATIN SMALL LETTER W WITH GRAVE]
 1382             case '\u1E83': // ?  [LATIN SMALL LETTER W WITH ACUTE]
 1383             case '\u1E85': // ?  [LATIN SMALL LETTER W WITH DIAERESIS]
 1384             case '\u1E87': // ?  [LATIN SMALL LETTER W WITH DOT ABOVE]
 1385             case '\u1E89': // ?  [LATIN SMALL LETTER W WITH DOT BELOW]
 1386             case '\u1E98': // ?  [LATIN SMALL LETTER W WITH RING ABOVE]
 1387             case '\u24E6': // ?  [CIRCLED LATIN SMALL LETTER W]
 1388             case '\u2C73': // ?  [LATIN SMALL LETTER W WITH HOOK]
 1389             case '\uFF57': // ?  [FULLWIDTH LATIN SMALL LETTER W]
 1390               output[outputPos++] = 'w';
 1391               break;
 1392             case '\u24B2': // ?  [PARENTHESIZED LATIN SMALL LETTER W]
 1393               output[outputPos++] = '(';
 1394               output[outputPos++] = 'w';
 1395               output[outputPos++] = ')';
 1396               break;
 1397             case '\u1E8A': // ?  [LATIN CAPITAL LETTER X WITH DOT ABOVE]
 1398             case '\u1E8C': // ?  [LATIN CAPITAL LETTER X WITH DIAERESIS]
 1399             case '\u24CD': // ?  [CIRCLED LATIN CAPITAL LETTER X]
 1400             case '\uFF38': // ?  [FULLWIDTH LATIN CAPITAL LETTER X]
 1401               output[outputPos++] = 'X';
 1402               break;
 1403             case '\u1D8D': // ?  [LATIN SMALL LETTER X WITH PALATAL HOOK]
 1404             case '\u1E8B': // ?  [LATIN SMALL LETTER X WITH DOT ABOVE]
 1405             case '\u1E8D': // ?  [LATIN SMALL LETTER X WITH DIAERESIS]
 1406             case '\u2093': // ?  [LATIN SUBSCRIPT SMALL LETTER X]
 1407             case '\u24E7': // ?  [CIRCLED LATIN SMALL LETTER X]
 1408             case '\uFF58': // ?  [FULLWIDTH LATIN SMALL LETTER X]
 1409               output[outputPos++] = 'x';
 1410               break;
 1411             case '\u24B3': // ?  [PARENTHESIZED LATIN SMALL LETTER X]
 1412               output[outputPos++] = '(';
 1413               output[outputPos++] = 'x';
 1414               output[outputPos++] = ')';
 1415               break;
 1416             case '\u00DD': //   [LATIN CAPITAL LETTER Y WITH ACUTE]
 1417             case '\u0176': // ?  [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
 1418             case '\u0178': // ?  [LATIN CAPITAL LETTER Y WITH DIAERESIS]
 1419             case '\u01B3': // ?  [LATIN CAPITAL LETTER Y WITH HOOK]
 1420             case '\u0232': // ?  [LATIN CAPITAL LETTER Y WITH MACRON]
 1421             case '\u024E': // ?  [LATIN CAPITAL LETTER Y WITH STROKE]
 1422             case '\u028F': // ?  [LATIN LETTER SMALL CAPITAL Y]
 1423             case '\u1E8E': // ?  [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
 1424             case '\u1EF2': // ?  [LATIN CAPITAL LETTER Y WITH GRAVE]
 1425             case '\u1EF4': // ?  [LATIN CAPITAL LETTER Y WITH DOT BELOW]
 1426             case '\u1EF6': // ?  [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
 1427             case '\u1EF8': // ?  [LATIN CAPITAL LETTER Y WITH TILDE]
 1428             case '\u1EFE': // ?  [LATIN CAPITAL LETTER Y WITH LOOP]
 1429             case '\u24CE': // ?  [CIRCLED LATIN CAPITAL LETTER Y]
 1430             case '\uFF39': // ?  [FULLWIDTH LATIN CAPITAL LETTER Y]
 1431               output[outputPos++] = 'Y';
 1432               break;
 1433             case '\u00FD': //   [LATIN SMALL LETTER Y WITH ACUTE]
 1434             case '\u00FF': //   [LATIN SMALL LETTER Y WITH DIAERESIS]
 1435             case '\u0177': // ?  [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
 1436             case '\u01B4': // ?  [LATIN SMALL LETTER Y WITH HOOK]
 1437             case '\u0233': // ?  [LATIN SMALL LETTER Y WITH MACRON]
 1438             case '\u024F': // ?  [LATIN SMALL LETTER Y WITH STROKE]
 1439             case '\u028E': // ?  [LATIN SMALL LETTER TURNED Y]
 1440             case '\u1E8F': // ?  [LATIN SMALL LETTER Y WITH DOT ABOVE]
 1441             case '\u1E99': // ?  [LATIN SMALL LETTER Y WITH RING ABOVE]
 1442             case '\u1EF3': // ?  [LATIN SMALL LETTER Y WITH GRAVE]
 1443             case '\u1EF5': // ?  [LATIN SMALL LETTER Y WITH DOT BELOW]
 1444             case '\u1EF7': // ?  [LATIN SMALL LETTER Y WITH HOOK ABOVE]
 1445             case '\u1EF9': // ?  [LATIN SMALL LETTER Y WITH TILDE]
 1446             case '\u1EFF': // ?  [LATIN SMALL LETTER Y WITH LOOP]
 1447             case '\u24E8': // ?  [CIRCLED LATIN SMALL LETTER Y]
 1448             case '\uFF59': // ?  [FULLWIDTH LATIN SMALL LETTER Y]
 1449               output[outputPos++] = 'y';
 1450               break;
 1451             case '\u24B4': // ?  [PARENTHESIZED LATIN SMALL LETTER Y]
 1452               output[outputPos++] = '(';
 1453               output[outputPos++] = 'y';
 1454               output[outputPos++] = ')';
 1455               break;
 1456             case '\u0179': // ?  [LATIN CAPITAL LETTER Z WITH ACUTE]
 1457             case '\u017B': // ?  [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
 1458             case '\u017D': // ?  [LATIN CAPITAL LETTER Z WITH CARON]
 1459             case '\u01B5': // ?  [LATIN CAPITAL LETTER Z WITH STROKE]
 1460             case '\u021C': // ?  http://en.wikipedia.org/wiki/Yogh  [LATIN CAPITAL LETTER YOGH]
 1461             case '\u0224': // ?  [LATIN CAPITAL LETTER Z WITH HOOK]
 1462             case '\u1D22': // ?  [LATIN LETTER SMALL CAPITAL Z]
 1463             case '\u1E90': // ?  [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
 1464             case '\u1E92': // ?  [LATIN CAPITAL LETTER Z WITH DOT BELOW]
 1465             case '\u1E94': // ?  [LATIN CAPITAL LETTER Z WITH LINE BELOW]
 1466             case '\u24CF': // ?  [CIRCLED LATIN CAPITAL LETTER Z]
 1467             case '\u2C6B': // ?  [LATIN CAPITAL LETTER Z WITH DESCENDER]
 1468             case '\uA762': // ?  [LATIN CAPITAL LETTER VISIGOTHIC Z]
 1469             case '\uFF3A': // ?  [FULLWIDTH LATIN CAPITAL LETTER Z]
 1470               output[outputPos++] = 'Z';
 1471               break;
 1472             case '\u017A': // ?  [LATIN SMALL LETTER Z WITH ACUTE]
 1473             case '\u017C': // ?  [LATIN SMALL LETTER Z WITH DOT ABOVE]
 1474             case '\u017E': // ?  [LATIN SMALL LETTER Z WITH CARON]
 1475             case '\u01B6': // ?  [LATIN SMALL LETTER Z WITH STROKE]
 1476             case '\u021D': // ?  http://en.wikipedia.org/wiki/Yogh  [LATIN SMALL LETTER YOGH]
 1477             case '\u0225': // ?  [LATIN SMALL LETTER Z WITH HOOK]
 1478             case '\u0240': // ?  [LATIN SMALL LETTER Z WITH SWASH TAIL]
 1479             case '\u0290': // ?  [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
 1480             case '\u0291': // ?  [LATIN SMALL LETTER Z WITH CURL]
 1481             case '\u1D76': // ?  [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
 1482             case '\u1D8E': // ?  [LATIN SMALL LETTER Z WITH PALATAL HOOK]
 1483             case '\u1E91': // ?  [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
 1484             case '\u1E93': // ?  [LATIN SMALL LETTER Z WITH DOT BELOW]
 1485             case '\u1E95': // ?  [LATIN SMALL LETTER Z WITH LINE BELOW]
 1486             case '\u24E9': // ?  [CIRCLED LATIN SMALL LETTER Z]
 1487             case '\u2C6C': // ?  [LATIN SMALL LETTER Z WITH DESCENDER]
 1488             case '\uA763': // ?  [LATIN SMALL LETTER VISIGOTHIC Z]
 1489             case '\uFF5A': // ?  [FULLWIDTH LATIN SMALL LETTER Z]
 1490               output[outputPos++] = 'z';
 1491               break;
 1492             case '\u24B5': // ?  [PARENTHESIZED LATIN SMALL LETTER Z]
 1493               output[outputPos++] = '(';
 1494               output[outputPos++] = 'z';
 1495               output[outputPos++] = ')';
 1496               break;
 1497             case '\u2070': // ?  [SUPERSCRIPT ZERO]
 1498             case '\u2080': // ?  [SUBSCRIPT ZERO]
 1499             case '\u24EA': // ?  [CIRCLED DIGIT ZERO]
 1500             case '\u24FF': // ?  [NEGATIVE CIRCLED DIGIT ZERO]
 1501             case '\uFF10': // ?  [FULLWIDTH DIGIT ZERO]
 1502               output[outputPos++] = '0';
 1503               break;
 1504             case '\u00B9': //   [SUPERSCRIPT ONE]
 1505             case '\u2081': // ?  [SUBSCRIPT ONE]
 1506             case '\u2460': // ?  [CIRCLED DIGIT ONE]
 1507             case '\u24F5': // ?  [DOUBLE CIRCLED DIGIT ONE]
 1508             case '\u2776': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
 1509             case '\u2780': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
 1510             case '\u278A': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
 1511             case '\uFF11': // ?  [FULLWIDTH DIGIT ONE]
 1512               output[outputPos++] = '1';
 1513               break;
 1514             case '\u2488': // ?  [DIGIT ONE FULL STOP]
 1515               output[outputPos++] = '1';
 1516               output[outputPos++] = '.';
 1517               break;
 1518             case '\u2474': // ?  [PARENTHESIZED DIGIT ONE]
 1519               output[outputPos++] = '(';
 1520               output[outputPos++] = '1';
 1521               output[outputPos++] = ')';
 1522               break;
 1523             case '\u00B2': //   [SUPERSCRIPT TWO]
 1524             case '\u2082': // ?  [SUBSCRIPT TWO]
 1525             case '\u2461': // ?  [CIRCLED DIGIT TWO]
 1526             case '\u24F6': // ?  [DOUBLE CIRCLED DIGIT TWO]
 1527             case '\u2777': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
 1528             case '\u2781': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
 1529             case '\u278B': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
 1530             case '\uFF12': // ?  [FULLWIDTH DIGIT TWO]
 1531               output[outputPos++] = '2';
 1532               break;
 1533             case '\u2489': // ?  [DIGIT TWO FULL STOP]
 1534               output[outputPos++] = '2';
 1535               output[outputPos++] = '.';
 1536               break;
 1537             case '\u2475': // ?  [PARENTHESIZED DIGIT TWO]
 1538               output[outputPos++] = '(';
 1539               output[outputPos++] = '2';
 1540               output[outputPos++] = ')';
 1541               break;
 1542             case '\u00B3': //   [SUPERSCRIPT THREE]
 1543             case '\u2083': // ?  [SUBSCRIPT THREE]
 1544             case '\u2462': // ?  [CIRCLED DIGIT THREE]
 1545             case '\u24F7': // ?  [DOUBLE CIRCLED DIGIT THREE]
 1546             case '\u2778': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
 1547             case '\u2782': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
 1548             case '\u278C': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
 1549             case '\uFF13': // ?  [FULLWIDTH DIGIT THREE]
 1550               output[outputPos++] = '3';
 1551               break;
 1552             case '\u248A': // ?  [DIGIT THREE FULL STOP]
 1553               output[outputPos++] = '3';
 1554               output[outputPos++] = '.';
 1555               break;
 1556             case '\u2476': // ?  [PARENTHESIZED DIGIT THREE]
 1557               output[outputPos++] = '(';
 1558               output[outputPos++] = '3';
 1559               output[outputPos++] = ')';
 1560               break;
 1561             case '\u2074': // ?  [SUPERSCRIPT FOUR]
 1562             case '\u2084': // ?  [SUBSCRIPT FOUR]
 1563             case '\u2463': // ?  [CIRCLED DIGIT FOUR]
 1564             case '\u24F8': // ?  [DOUBLE CIRCLED DIGIT FOUR]
 1565             case '\u2779': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
 1566             case '\u2783': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
 1567             case '\u278D': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
 1568             case '\uFF14': // ?  [FULLWIDTH DIGIT FOUR]
 1569               output[outputPos++] = '4';
 1570               break;
 1571             case '\u248B': // ?  [DIGIT FOUR FULL STOP]
 1572               output[outputPos++] = '4';
 1573               output[outputPos++] = '.';
 1574               break;
 1575             case '\u2477': // ?  [PARENTHESIZED DIGIT FOUR]
 1576               output[outputPos++] = '(';
 1577               output[outputPos++] = '4';
 1578               output[outputPos++] = ')';
 1579               break;
 1580             case '\u2075': // ?  [SUPERSCRIPT FIVE]
 1581             case '\u2085': // ?  [SUBSCRIPT FIVE]
 1582             case '\u2464': // ?  [CIRCLED DIGIT FIVE]
 1583             case '\u24F9': // ?  [DOUBLE CIRCLED DIGIT FIVE]
 1584             case '\u277A': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
 1585             case '\u2784': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
 1586             case '\u278E': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
 1587             case '\uFF15': // ?  [FULLWIDTH DIGIT FIVE]
 1588               output[outputPos++] = '5';
 1589               break;
 1590             case '\u248C': // ?  [DIGIT FIVE FULL STOP]
 1591               output[outputPos++] = '5';
 1592               output[outputPos++] = '.';
 1593               break;
 1594             case '\u2478': // ?  [PARENTHESIZED DIGIT FIVE]
 1595               output[outputPos++] = '(';
 1596               output[outputPos++] = '5';
 1597               output[outputPos++] = ')';
 1598               break;
 1599             case '\u2076': // ?  [SUPERSCRIPT SIX]
 1600             case '\u2086': // ?  [SUBSCRIPT SIX]
 1601             case '\u2465': // ?  [CIRCLED DIGIT SIX]
 1602             case '\u24FA': // ?  [DOUBLE CIRCLED DIGIT SIX]
 1603             case '\u277B': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
 1604             case '\u2785': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
 1605             case '\u278F': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
 1606             case '\uFF16': // ?  [FULLWIDTH DIGIT SIX]
 1607               output[outputPos++] = '6';
 1608               break;
 1609             case '\u248D': // ?  [DIGIT SIX FULL STOP]
 1610               output[outputPos++] = '6';
 1611               output[outputPos++] = '.';
 1612               break;
 1613             case '\u2479': // ?  [PARENTHESIZED DIGIT SIX]
 1614               output[outputPos++] = '(';
 1615               output[outputPos++] = '6';
 1616               output[outputPos++] = ')';
 1617               break;
 1618             case '\u2077': // ?  [SUPERSCRIPT SEVEN]
 1619             case '\u2087': // ?  [SUBSCRIPT SEVEN]
 1620             case '\u2466': // ?  [CIRCLED DIGIT SEVEN]
 1621             case '\u24FB': // ?  [DOUBLE CIRCLED DIGIT SEVEN]
 1622             case '\u277C': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
 1623             case '\u2786': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
 1624             case '\u2790': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
 1625             case '\uFF17': // ?  [FULLWIDTH DIGIT SEVEN]
 1626               output[outputPos++] = '7';
 1627               break;
 1628             case '\u248E': // ?  [DIGIT SEVEN FULL STOP]
 1629               output[outputPos++] = '7';
 1630               output[outputPos++] = '.';
 1631               break;
 1632             case '\u247A': // ?  [PARENTHESIZED DIGIT SEVEN]
 1633               output[outputPos++] = '(';
 1634               output[outputPos++] = '7';
 1635               output[outputPos++] = ')';
 1636               break;
 1637             case '\u2078': // ?  [SUPERSCRIPT EIGHT]
 1638             case '\u2088': // ?  [SUBSCRIPT EIGHT]
 1639             case '\u2467': // ?  [CIRCLED DIGIT EIGHT]
 1640             case '\u24FC': // ?  [DOUBLE CIRCLED DIGIT EIGHT]
 1641             case '\u277D': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
 1642             case '\u2787': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
 1643             case '\u2791': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
 1644             case '\uFF18': // ?  [FULLWIDTH DIGIT EIGHT]
 1645               output[outputPos++] = '8';
 1646               break;
 1647             case '\u248F': // ?  [DIGIT EIGHT FULL STOP]
 1648               output[outputPos++] = '8';
 1649               output[outputPos++] = '.';
 1650               break;
 1651             case '\u247B': // ?  [PARENTHESIZED DIGIT EIGHT]
 1652               output[outputPos++] = '(';
 1653               output[outputPos++] = '8';
 1654               output[outputPos++] = ')';
 1655               break;
 1656             case '\u2079': // ?  [SUPERSCRIPT NINE]
 1657             case '\u2089': // ?  [SUBSCRIPT NINE]
 1658             case '\u2468': // ?  [CIRCLED DIGIT NINE]
 1659             case '\u24FD': // ?  [DOUBLE CIRCLED DIGIT NINE]
 1660             case '\u277E': // ?  [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
 1661             case '\u2788': // ?  [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
 1662             case '\u2792': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
 1663             case '\uFF19': // ?  [FULLWIDTH DIGIT NINE]
 1664               output[outputPos++] = '9';
 1665               break;
 1666             case '\u2490': // ?  [DIGIT NINE FULL STOP]
 1667               output[outputPos++] = '9';
 1668               output[outputPos++] = '.';
 1669               break;
 1670             case '\u247C': // ?  [PARENTHESIZED DIGIT NINE]
 1671               output[outputPos++] = '(';
 1672               output[outputPos++] = '9';
 1673               output[outputPos++] = ')';
 1674               break;
 1675             case '\u2469': // ?  [CIRCLED NUMBER TEN]
 1676             case '\u24FE': // ?  [DOUBLE CIRCLED NUMBER TEN]
 1677             case '\u277F': // ?  [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
 1678             case '\u2789': // ?  [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
 1679             case '\u2793': // ?  [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
 1680               output[outputPos++] = '1';
 1681               output[outputPos++] = '0';
 1682               break;
 1683             case '\u2491': // ?  [NUMBER TEN FULL STOP]
 1684               output[outputPos++] = '1';
 1685               output[outputPos++] = '0';
 1686               output[outputPos++] = '.';
 1687               break;
 1688             case '\u247D': // ?  [PARENTHESIZED NUMBER TEN]
 1689               output[outputPos++] = '(';
 1690               output[outputPos++] = '1';
 1691               output[outputPos++] = '0';
 1692               output[outputPos++] = ')';
 1693               break;
 1694             case '\u246A': // ?  [CIRCLED NUMBER ELEVEN]
 1695             case '\u24EB': // ?  [NEGATIVE CIRCLED NUMBER ELEVEN]
 1696               output[outputPos++] = '1';
 1697               output[outputPos++] = '1';
 1698               break;
 1699             case '\u2492': // ?  [NUMBER ELEVEN FULL STOP]
 1700               output[outputPos++] = '1';
 1701               output[outputPos++] = '1';
 1702               output[outputPos++] = '.';
 1703               break;
 1704             case '\u247E': // ?  [PARENTHESIZED NUMBER ELEVEN]
 1705               output[outputPos++] = '(';
 1706               output[outputPos++] = '1';
 1707               output[outputPos++] = '1';
 1708               output[outputPos++] = ')';
 1709               break;
 1710             case '\u246B': // ?  [CIRCLED NUMBER TWELVE]
 1711             case '\u24EC': // ?  [NEGATIVE CIRCLED NUMBER TWELVE]
 1712               output[outputPos++] = '1';
 1713               output[outputPos++] = '2';
 1714               break;
 1715             case '\u2493': // ?  [NUMBER TWELVE FULL STOP]
 1716               output[outputPos++] = '1';
 1717               output[outputPos++] = '2';
 1718               output[outputPos++] = '.';
 1719               break;
 1720             case '\u247F': // ?  [PARENTHESIZED NUMBER TWELVE]
 1721               output[outputPos++] = '(';
 1722               output[outputPos++] = '1';
 1723               output[outputPos++] = '2';
 1724               output[outputPos++] = ')';
 1725               break;
 1726             case '\u246C': // ?  [CIRCLED NUMBER THIRTEEN]
 1727             case '\u24ED': // ?  [NEGATIVE CIRCLED NUMBER THIRTEEN]
 1728               output[outputPos++] = '1';
 1729               output[outputPos++] = '3';
 1730               break;
 1731             case '\u2494': // ?  [NUMBER THIRTEEN FULL STOP]
 1732               output[outputPos++] = '1';
 1733               output[outputPos++] = '3';
 1734               output[outputPos++] = '.';
 1735               break;
 1736             case '\u2480': // ?  [PARENTHESIZED NUMBER THIRTEEN]
 1737               output[outputPos++] = '(';
 1738               output[outputPos++] = '1';
 1739               output[outputPos++] = '3';
 1740               output[outputPos++] = ')';
 1741               break;
 1742             case '\u246D': // ?  [CIRCLED NUMBER FOURTEEN]
 1743             case '\u24EE': // ?  [NEGATIVE CIRCLED NUMBER FOURTEEN]
 1744               output[outputPos++] = '1';
 1745               output[outputPos++] = '4';
 1746               break;
 1747             case '\u2495': // ?  [NUMBER FOURTEEN FULL STOP]
 1748               output[outputPos++] = '1';
 1749               output[outputPos++] = '4';
 1750               output[outputPos++] = '.';
 1751               break;
 1752             case '\u2481': // ?  [PARENTHESIZED NUMBER FOURTEEN]
 1753               output[outputPos++] = '(';
 1754               output[outputPos++] = '1';
 1755               output[outputPos++] = '4';
 1756               output[outputPos++] = ')';
 1757               break;
 1758             case '\u246E': // ?  [CIRCLED NUMBER FIFTEEN]
 1759             case '\u24EF': // ?  [NEGATIVE CIRCLED NUMBER FIFTEEN]
 1760               output[outputPos++] = '1';
 1761               output[outputPos++] = '5';
 1762               break;
 1763             case '\u2496': // ?  [NUMBER FIFTEEN FULL STOP]
 1764               output[outputPos++] = '1';
 1765               output[outputPos++] = '5';
 1766               output[outputPos++] = '.';
 1767               break;
 1768             case '\u2482': // ?  [PARENTHESIZED NUMBER FIFTEEN]
 1769               output[outputPos++] = '(';
 1770               output[outputPos++] = '1';
 1771               output[outputPos++] = '5';
 1772               output[outputPos++] = ')';
 1773               break;
 1774             case '\u246F': // ?  [CIRCLED NUMBER SIXTEEN]
 1775             case '\u24F0': // ?  [NEGATIVE CIRCLED NUMBER SIXTEEN]
 1776               output[outputPos++] = '1';
 1777               output[outputPos++] = '6';
 1778               break;
 1779             case '\u2497': // ?  [NUMBER SIXTEEN FULL STOP]
 1780               output[outputPos++] = '1';
 1781               output[outputPos++] = '6';
 1782               output[outputPos++] = '.';
 1783               break;
 1784             case '\u2483': // ?  [PARENTHESIZED NUMBER SIXTEEN]
 1785               output[outputPos++] = '(';
 1786               output[outputPos++] = '1';
 1787               output[outputPos++] = '6';
 1788               output[outputPos++] = ')';
 1789               break;
 1790             case '\u2470': // ?  [CIRCLED NUMBER SEVENTEEN]
 1791             case '\u24F1': // ?  [NEGATIVE CIRCLED NUMBER SEVENTEEN]
 1792               output[outputPos++] = '1';
 1793               output[outputPos++] = '7';
 1794               break;
 1795             case '\u2498': // ?  [NUMBER SEVENTEEN FULL STOP]
 1796               output[outputPos++] = '1';
 1797               output[outputPos++] = '7';
 1798               output[outputPos++] = '.';
 1799               break;
 1800             case '\u2484': // ?  [PARENTHESIZED NUMBER SEVENTEEN]
 1801               output[outputPos++] = '(';
 1802               output[outputPos++] = '1';
 1803               output[outputPos++] = '7';
 1804               output[outputPos++] = ')';
 1805               break;
 1806             case '\u2471': // ?  [CIRCLED NUMBER EIGHTEEN]
 1807             case '\u24F2': // ?  [NEGATIVE CIRCLED NUMBER EIGHTEEN]
 1808               output[outputPos++] = '1';
 1809               output[outputPos++] = '8';
 1810               break;
 1811             case '\u2499': // ?  [NUMBER EIGHTEEN FULL STOP]
 1812               output[outputPos++] = '1';
 1813               output[outputPos++] = '8';
 1814               output[outputPos++] = '.';
 1815               break;
 1816             case '\u2485': // ?  [PARENTHESIZED NUMBER EIGHTEEN]
 1817               output[outputPos++] = '(';
 1818               output[outputPos++] = '1';
 1819               output[outputPos++] = '8';
 1820               output[outputPos++] = ')';
 1821               break;
 1822             case '\u2472': // ?  [CIRCLED NUMBER NINETEEN]
 1823             case '\u24F3': // ?  [NEGATIVE CIRCLED NUMBER NINETEEN]
 1824               output[outputPos++] = '1';
 1825               output[outputPos++] = '9';
 1826               break;
 1827             case '\u249A': // ?  [NUMBER NINETEEN FULL STOP]
 1828               output[outputPos++] = '1';
 1829               output[outputPos++] = '9';
 1830               output[outputPos++] = '.';
 1831               break;
 1832             case '\u2486': // ?  [PARENTHESIZED NUMBER NINETEEN]
 1833               output[outputPos++] = '(';
 1834               output[outputPos++] = '1';
 1835               output[outputPos++] = '9';
 1836               output[outputPos++] = ')';
 1837               break;
 1838             case '\u2473': // ?  [CIRCLED NUMBER TWENTY]
 1839             case '\u24F4': // ?  [NEGATIVE CIRCLED NUMBER TWENTY]
 1840               output[outputPos++] = '2';
 1841               output[outputPos++] = '0';
 1842               break;
 1843             case '\u249B': // ?  [NUMBER TWENTY FULL STOP]
 1844               output[outputPos++] = '2';
 1845               output[outputPos++] = '0';
 1846               output[outputPos++] = '.';
 1847               break;
 1848             case '\u2487': // ?  [PARENTHESIZED NUMBER TWENTY]
 1849               output[outputPos++] = '(';
 1850               output[outputPos++] = '2';
 1851               output[outputPos++] = '0';
 1852               output[outputPos++] = ')';
 1853               break;
 1854             case '\u00AB': //   [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
 1855             case '\u00BB': //   [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
 1856             case '\u201C': // ?  [LEFT DOUBLE QUOTATION MARK]
 1857             case '\u201D': // ?  [RIGHT DOUBLE QUOTATION MARK]
 1858             case '\u201E': // ?  [DOUBLE LOW-9 QUOTATION MARK]
 1859             case '\u2033': // ?  [DOUBLE PRIME]
 1860             case '\u2036': // ?  [REVERSED DOUBLE PRIME]
 1861             case '\u275D': // ?  [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
 1862             case '\u275E': // ?  [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
 1863             case '\u276E': // ?  [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
 1864             case '\u276F': // ?  [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
 1865             case '\uFF02': // ?  [FULLWIDTH QUOTATION MARK]
 1866               output[outputPos++] = '"';
 1867               break;
 1868             case '\u2018': // ?  [LEFT SINGLE QUOTATION MARK]
 1869             case '\u2019': // ?  [RIGHT SINGLE QUOTATION MARK]
 1870             case '\u201A': // ?  [SINGLE LOW-9 QUOTATION MARK]
 1871             case '\u201B': // ?  [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
 1872             case '\u2032': // ?  [PRIME]
 1873             case '\u2035': // ?  [REVERSED PRIME]
 1874             case '\u2039': // ?  [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
 1875             case '\u203A': // ?  [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
 1876             case '\u275B': // ?  [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
 1877             case '\u275C': // ?  [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
 1878             case '\uFF07': // ?  [FULLWIDTH APOSTROPHE]
 1879               output[outputPos++] = '\'';
 1880               break;
 1881             case '\u2010': // ?  [HYPHEN]
 1882             case '\u2011': // ?  [NON-BREAKING HYPHEN]
 1883             case '\u2012': // ?  [FIGURE DASH]
 1884             case '\u2013': // ?  [EN DASH]
 1885             case '\u2014': // ?  [EM DASH]
 1886             case '\u207B': // ?  [SUPERSCRIPT MINUS]
 1887             case '\u208B': // ?  [SUBSCRIPT MINUS]
 1888             case '\uFF0D': // ?  [FULLWIDTH HYPHEN-MINUS]
 1889               output[outputPos++] = '-';
 1890               break;
 1891             case '\u2045': // ?  [LEFT SQUARE BRACKET WITH QUILL]
 1892             case '\u2772': // ?  [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
 1893             case '\uFF3B': // ?  [FULLWIDTH LEFT SQUARE BRACKET]
 1894               output[outputPos++] = '[';
 1895               break;
 1896             case '\u2046': // ?  [RIGHT SQUARE BRACKET WITH QUILL]
 1897             case '\u2773': // ?  [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
 1898             case '\uFF3D': // ?  [FULLWIDTH RIGHT SQUARE BRACKET]
 1899               output[outputPos++] = ']';
 1900               break;
 1901             case '\u207D': // ?  [SUPERSCRIPT LEFT PARENTHESIS]
 1902             case '\u208D': // ?  [SUBSCRIPT LEFT PARENTHESIS]
 1903             case '\u2768': // ?  [MEDIUM LEFT PARENTHESIS ORNAMENT]
 1904             case '\u276A': // ?  [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
 1905             case '\uFF08': // ?  [FULLWIDTH LEFT PARENTHESIS]
 1906               output[outputPos++] = '(';
 1907               break;
 1908             case '\u2E28': // ?  [LEFT DOUBLE PARENTHESIS]
 1909               output[outputPos++] = '(';
 1910               output[outputPos++] = '(';
 1911               break;
 1912             case '\u207E': // ?  [SUPERSCRIPT RIGHT PARENTHESIS]
 1913             case '\u208E': // ?  [SUBSCRIPT RIGHT PARENTHESIS]
 1914             case '\u2769': // ?  [MEDIUM RIGHT PARENTHESIS ORNAMENT]
 1915             case '\u276B': // ?  [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
 1916             case '\uFF09': // ?  [FULLWIDTH RIGHT PARENTHESIS]
 1917               output[outputPos++] = ')';
 1918               break;
 1919             case '\u2E29': // ?  [RIGHT DOUBLE PARENTHESIS]
 1920               output[outputPos++] = ')';
 1921               output[outputPos++] = ')';
 1922               break;
 1923             case '\u276C': // ?  [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
 1924             case '\u2770': // ?  [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
 1925             case '\uFF1C': // ?  [FULLWIDTH LESS-THAN SIGN]
 1926               output[outputPos++] = '<';
 1927               break;
 1928             case '\u276D': // ?  [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
 1929             case '\u2771': // ?  [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
 1930             case '\uFF1E': // ?  [FULLWIDTH GREATER-THAN SIGN]
 1931               output[outputPos++] = '>';
 1932               break;
 1933             case '\u2774': // ?  [MEDIUM LEFT CURLY BRACKET ORNAMENT]
 1934             case '\uFF5B': // ?  [FULLWIDTH LEFT CURLY BRACKET]
 1935               output[outputPos++] = '{';
 1936               break;
 1937             case '\u2775': // ?  [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
 1938             case '\uFF5D': // ?  [FULLWIDTH RIGHT CURLY BRACKET]
 1939               output[outputPos++] = '}';
 1940               break;
 1941             case '\u207A': // ?  [SUPERSCRIPT PLUS SIGN]
 1942             case '\u208A': // ?  [SUBSCRIPT PLUS SIGN]
 1943             case '\uFF0B': // ?  [FULLWIDTH PLUS SIGN]
 1944               output[outputPos++] = '+';
 1945               break;
 1946             case '\u207C': // ?  [SUPERSCRIPT EQUALS SIGN]
 1947             case '\u208C': // ?  [SUBSCRIPT EQUALS SIGN]
 1948             case '\uFF1D': // ?  [FULLWIDTH EQUALS SIGN]
 1949               output[outputPos++] = '=';
 1950               break;
 1951             case '\uFF01': // ?  [FULLWIDTH EXCLAMATION MARK]
 1952               output[outputPos++] = '!';
 1953               break;
 1954             case '\u203C': // ?  [DOUBLE EXCLAMATION MARK]
 1955               output[outputPos++] = '!';
 1956               output[outputPos++] = '!';
 1957               break;
 1958             case '\u2049': // ?  [EXCLAMATION QUESTION MARK]
 1959               output[outputPos++] = '!';
 1960               output[outputPos++] = '?';
 1961               break;
 1962             case '\uFF03': // ?  [FULLWIDTH NUMBER SIGN]
 1963               output[outputPos++] = '#';
 1964               break;
 1965             case '\uFF04': // ?  [FULLWIDTH DOLLAR SIGN]
 1966               output[outputPos++] = '$';
 1967               break;
 1968             case '\u2052': // ?  [COMMERCIAL MINUS SIGN]
 1969             case '\uFF05': // ?  [FULLWIDTH PERCENT SIGN]
 1970               output[outputPos++] = '%';
 1971               break;
 1972             case '\uFF06': // ?  [FULLWIDTH AMPERSAND]
 1973               output[outputPos++] = '&';
 1974               break;
 1975             case '\u204E': // ?  [LOW ASTERISK]
 1976             case '\uFF0A': // ?  [FULLWIDTH ASTERISK]
 1977               output[outputPos++] = '*';
 1978               break;
 1979             case '\uFF0C': // ?  [FULLWIDTH COMMA]
 1980               output[outputPos++] = ',';
 1981               break;
 1982             case '\uFF0E': // ?  [FULLWIDTH FULL STOP]
 1983               output[outputPos++] = '.';
 1984               break;
 1985             case '\u2044': // ?  [FRACTION SLASH]
 1986             case '\uFF0F': // ?  [FULLWIDTH SOLIDUS]
 1987               output[outputPos++] = '/';
 1988               break;
 1989             case '\uFF1A': // ?  [FULLWIDTH COLON]
 1990               output[outputPos++] = ':';
 1991               break;
 1992             case '\u204F': // ?  [REVERSED SEMICOLON]
 1993             case '\uFF1B': // ?  [FULLWIDTH SEMICOLON]
 1994               output[outputPos++] = ';';
 1995               break;
 1996             case '\uFF1F': // ?  [FULLWIDTH QUESTION MARK]
 1997               output[outputPos++] = '?';
 1998               break;
 1999             case '\u2047': // ?  [DOUBLE QUESTION MARK]
 2000               output[outputPos++] = '?';
 2001               output[outputPos++] = '?';
 2002               break;
 2003             case '\u2048': // ?  [QUESTION EXCLAMATION MARK]
 2004               output[outputPos++] = '?';
 2005               output[outputPos++] = '!';
 2006               break;
 2007             case '\uFF20': // ?  [FULLWIDTH COMMERCIAL AT]
 2008               output[outputPos++] = '@';
 2009               break;
 2010             case '\uFF3C': // ?  [FULLWIDTH REVERSE SOLIDUS]
 2011               output[outputPos++] = '\\';
 2012               break;
 2013             case '\u2038': // ?  [CARET]
 2014             case '\uFF3E': // ?  [FULLWIDTH CIRCUMFLEX ACCENT]
 2015               output[outputPos++] = '^';
 2016               break;
 2017             case '\uFF3F': // ?  [FULLWIDTH LOW LINE]
 2018               output[outputPos++] = '_';
 2019               break;
 2020             case '\u2053': // ?  [SWUNG DASH]
 2021             case '\uFF5E': // ?  [FULLWIDTH TILDE]
 2022               output[outputPos++] = '~';
 2023               break;
 2024             default:
 2025               output[outputPos++] = c;
 2026               break;
 2027           }
 2028         }
 2029       }
 2030     }
 2031   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]