Ruby 1.9.3p327(2012-11-10revision37606)
enc/trans/utf_16_32.c
Go to the documentation of this file.
00001 /* autogenerated. */
00002 /* src="transcode-tblgen.rb", len=28123, checksum=30477 */
00003 /* src="utf_16_32.trans", len=15312, checksum=29120 */
00004 
00005 #include "transcode_data.h"
00006 
00007 
00008 
00009 static const unsigned char
00010 utf_16_32_byte_array[1288] = {
00011 #define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0
00012 220, 223,
00013       1,  1,  1,  1,
00014 
00015 #define from_UTF_16LE_00toFF_D8toDB_offsets 6
00016 0, 255,
00017       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00018       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00019       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00020       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00021       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00022       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00023       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00024       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00025       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00026       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00027       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00028       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00029       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00030       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00031       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00032       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00033 
00034 #define from_UTF_16LE_00toFF_offsets 264
00035 0, 255,
00036       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00037       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00038       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00039       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00040       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00041       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00042       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00043       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00044       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00045       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00046       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00047       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00048       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00049       0,  0,  0,  0,  0,  0,  0,  0,    1,  1,  1,  1,  2,  2,  2,  2,
00050       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00051       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00052 
00053 #define from_UTF_32LE_00toFF_00toD7_00_offsets 522
00054 0, 0,
00055       0,
00056 
00057 #define from_UTF_32LE_00toFF_00toD7_offsets 525
00058 0, 16,
00059       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00060       0,
00061 
00062 #define from_UTF_32LE_00toFF_D8toDF_offsets 544
00063 1, 16,
00064       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00065 
00066 #define from_UTF_32LE_00toFF_offsets 562
00067 0, 255,
00068       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00069       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00070       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00071       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00072       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00073       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00074       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00075       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00076       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00077       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00078       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00079       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00080       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00081       0,  0,  0,  0,  0,  0,  0,  0,    1,  1,  1,  1,  1,  1,  1,  1,
00082       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00083       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00084 
00085 #define from_UTF_32BE_00_offsets 820
00086 0, 16,
00087       0,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00088       1,
00089 
00090 #define from_UTF_8_C2toDF_offsets 839
00091 128, 191,
00092       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00093       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00094       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00095       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00096 
00097 #define from_UTF_8_E0_offsets 905
00098 160, 191,
00099       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00100       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00101 
00102 #define from_UTF_8_ED_offsets 939
00103 128, 159,
00104       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00105       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00106 
00107 #define from_UTF_8_F0_offsets 973
00108 144, 191,
00109       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00110       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00111       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00112 
00113 #define from_UTF_8_F4_offsets 1023
00114 128, 143,
00115       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00116 
00117 #define from_UTF_8_offsets 1041
00118 0, 244,
00119       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00120       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00121       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00122       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00123       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00124       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00125       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00126       0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
00127       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00128       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00129       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00130       1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
00131       1,  1,  2,  2,  2,  2,  2,  2,    2,  2,  2,  2,  2,  2,  2,  2,
00132       2,  2,  2,  2,  2,  2,  2,  2,    2,  2,  2,  2,  2,  2,  2,  2,
00133       3,  4,  4,  4,  4,  4,  4,  4,    4,  4,  4,  4,  4,  5,  4,  4,
00134       6,  7,  7,  7,  8,
00135 
00136 };
00137 static const unsigned int
00138 utf_16_32_word_array[106] = {
00139 #define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0)
00140      INVALID,   FUNso,
00141 
00142 #define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2)
00143     from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
00144     from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
00145 
00146 #define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4)
00147      from_UTF_16LE_00toFF_D8toDB_00toFF,
00148 
00149 #define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5)
00150     from_UTF_16LE_00toFF_D8toDB_offsets,
00151     from_UTF_16LE_00toFF_D8toDB_infos,
00152 
00153 #define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7)
00154                            FUNso, from_UTF_16LE_00toFF_D8toDB,
00155                          INVALID,
00156 
00157 #define from_UTF_16LE_00toFF WORDINDEX2INFO(10)
00158     from_UTF_16LE_00toFF_offsets,
00159     from_UTF_16LE_00toFF_infos,
00160 
00161 #define from_UTF_16LE_infos WORDINDEX2INFO(12)
00162      from_UTF_16LE_00toFF,
00163 
00164 #define from_UTF_16LE WORDINDEX2INFO(13)
00165     from_UTF_16LE_00toFF_D8toDB_offsets,
00166     from_UTF_16LE_infos,
00167 
00168 #define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15)
00169        FUNso, INVALID,
00170 
00171 #define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17)
00172     from_UTF_32LE_00toFF_00toD7_00_offsets,
00173     from_UTF_32LE_00toFF_00toD7_00_infos,
00174 
00175 #define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19)
00176      from_UTF_32LE_00toFF_00toD7_00,                        INVALID,
00177 
00178 #define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21)
00179     from_UTF_32LE_00toFF_00toD7_offsets,
00180     from_UTF_32LE_00toFF_00toD7_infos,
00181 
00182 #define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23)
00183                             INVALID, from_UTF_32LE_00toFF_00toD7_00,
00184 
00185 #define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25)
00186     from_UTF_32LE_00toFF_D8toDF_offsets,
00187     from_UTF_32LE_00toFF_D8toDF_infos,
00188 
00189 #define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27)
00190      from_UTF_32LE_00toFF_00toD7, from_UTF_32LE_00toFF_D8toDF,
00191 
00192 #define from_UTF_32LE_00toFF WORDINDEX2INFO(29)
00193     from_UTF_32LE_00toFF_offsets,
00194     from_UTF_32LE_00toFF_infos,
00195 
00196 #define from_UTF_32LE_infos WORDINDEX2INFO(31)
00197      from_UTF_32LE_00toFF,
00198 
00199 #define from_UTF_32LE WORDINDEX2INFO(32)
00200     from_UTF_16LE_00toFF_D8toDB_offsets,
00201     from_UTF_32LE_infos,
00202 
00203 #define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34)
00204      FUNso,
00205 
00206 #define from_UTF_16BE_00toD7 WORDINDEX2INFO(35)
00207     from_UTF_16LE_00toFF_D8toDB_offsets,
00208     from_UTF_16BE_00toD7_infos,
00209 
00210 #define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37)
00211                   INVALID, from_UTF_16BE_00toD7,
00212 
00213 #define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39)
00214     from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
00215     from_UTF_16BE_D8toDB_00toFF_infos,
00216 
00217 #define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41)
00218      from_UTF_16BE_D8toDB_00toFF,
00219 
00220 #define from_UTF_16BE_D8toDB WORDINDEX2INFO(42)
00221     from_UTF_16LE_00toFF_D8toDB_offsets,
00222     from_UTF_16BE_D8toDB_infos,
00223 
00224 #define from_UTF_16BE_infos WORDINDEX2INFO(44)
00225      from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB,
00226                   INVALID,
00227 
00228 #define from_UTF_16BE WORDINDEX2INFO(47)
00229     from_UTF_16LE_00toFF_offsets,
00230     from_UTF_16BE_infos,
00231 
00232 #define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49)
00233      from_UTF_16BE_00toD7,              INVALID,
00234 
00235 #define from_UTF_32BE_00_00 WORDINDEX2INFO(51)
00236     from_UTF_32LE_00toFF_offsets,
00237     from_UTF_32BE_00_00_infos,
00238 
00239 #define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53)
00240      from_UTF_16BE_00toD7,
00241 
00242 #define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54)
00243     from_UTF_16LE_00toFF_D8toDB_offsets,
00244     from_UTF_32BE_00_01to10_infos,
00245 
00246 #define from_UTF_32BE_00_infos WORDINDEX2INFO(56)
00247          from_UTF_32BE_00_00, from_UTF_32BE_00_01to10,
00248                      INVALID,
00249 
00250 #define from_UTF_32BE_00 WORDINDEX2INFO(59)
00251     from_UTF_32BE_00_offsets,
00252     from_UTF_32BE_00_infos,
00253 
00254 #define from_UTF_32BE_infos WORDINDEX2INFO(61)
00255      from_UTF_32BE_00,          INVALID,
00256 
00257 #define from_UTF_32BE WORDINDEX2INFO(63)
00258     from_UTF_32LE_00toFF_00toD7_00_offsets,
00259     from_UTF_32BE_infos,
00260 
00261 #define from_UTF_16_00toFF_infos WORDINDEX2INFO(65)
00262      FUNsi,
00263 
00264 #define from_UTF_16_00toFF WORDINDEX2INFO(66)
00265     from_UTF_16LE_00toFF_D8toDB_offsets,
00266     from_UTF_16_00toFF_infos,
00267 
00268 #define from_UTF_16_infos WORDINDEX2INFO(68)
00269      from_UTF_16_00toFF,
00270 
00271 #define from_UTF_16 WORDINDEX2INFO(69)
00272     from_UTF_16LE_00toFF_D8toDB_offsets,
00273     from_UTF_16_infos,
00274 
00275 #define from_UTF_32_00toFF_infos WORDINDEX2INFO(71)
00276      from_UTF_16,
00277 
00278 #define from_UTF_32_00toFF WORDINDEX2INFO(72)
00279     from_UTF_16LE_00toFF_D8toDB_offsets,
00280     from_UTF_32_00toFF_infos,
00281 
00282 #define from_UTF_32_infos WORDINDEX2INFO(74)
00283      from_UTF_32_00toFF,
00284 
00285 #define from_UTF_32 WORDINDEX2INFO(75)
00286     from_UTF_16LE_00toFF_D8toDB_offsets,
00287     from_UTF_32_infos,
00288 
00289 #define from_UTF_8_C2toDF WORDINDEX2INFO(77)
00290     from_UTF_8_C2toDF_offsets,
00291     from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
00292 
00293 #define from_UTF_8_E0_infos WORDINDEX2INFO(79)
00294                INVALID, from_UTF_8_C2toDF,
00295 
00296 #define from_UTF_8_E0 WORDINDEX2INFO(81)
00297     from_UTF_8_E0_offsets,
00298     from_UTF_8_E0_infos,
00299 
00300 #define from_UTF_8_E1toEC WORDINDEX2INFO(83)
00301     from_UTF_8_C2toDF_offsets,
00302     from_UTF_8_E0_infos,
00303 
00304 #define from_UTF_8_ED WORDINDEX2INFO(85)
00305     from_UTF_8_ED_offsets,
00306     from_UTF_8_E0_infos,
00307 
00308 #define from_UTF_8_F0_infos WORDINDEX2INFO(87)
00309                INVALID, from_UTF_8_E1toEC,
00310 
00311 #define from_UTF_8_F0 WORDINDEX2INFO(89)
00312     from_UTF_8_F0_offsets,
00313     from_UTF_8_F0_infos,
00314 
00315 #define from_UTF_8_F1toF3 WORDINDEX2INFO(91)
00316     from_UTF_8_C2toDF_offsets,
00317     from_UTF_8_F0_infos,
00318 
00319 #define from_UTF_8_F4 WORDINDEX2INFO(93)
00320     from_UTF_8_F4_offsets,
00321     from_UTF_8_F0_infos,
00322 
00323 #define from_UTF_8_infos WORDINDEX2INFO(95)
00324                  FUNso,           INVALID,
00325      from_UTF_8_C2toDF,     from_UTF_8_E0,
00326      from_UTF_8_E1toEC,     from_UTF_8_ED,
00327          from_UTF_8_F0, from_UTF_8_F1toF3,
00328          from_UTF_8_F4,
00329 
00330 #define from_UTF_8 WORDINDEX2INFO(104)
00331     from_UTF_8_offsets,
00332     from_UTF_8_infos,
00333 
00334 };
00335 #define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 106, ((int)sizeof(unsigned int))
00336 
00337 
00338 static ssize_t
00339 fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00340 {
00341     if (!s[0] && s[1]<0x80) {
00342         o[0] = s[1];
00343         return 1;
00344     }
00345     else if (s[0]<0x08) {
00346         o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
00347         o[1] = 0x80 | (s[1]&0x3F);
00348         return 2;
00349     }
00350     else if ((s[0]&0xF8)!=0xD8) {
00351         o[0] = 0xE0 | (s[0]>>4);
00352         o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
00353         o[2] = 0x80 | (s[1]&0x3F);
00354         return 3;
00355     }
00356     else {
00357         unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
00358         o[0] = 0xF0 | (u>>2);
00359         o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
00360         o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
00361         o[3] = 0x80 | (s[3]&0x3F);
00362         return 4;
00363     }
00364 }
00365 
00366 static ssize_t
00367 fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00368 {
00369     if (!(s[0]&0x80)) {
00370         o[0] = 0x00;
00371         o[1] = s[0];
00372         return 2;
00373     }
00374     else if ((s[0]&0xE0)==0xC0) {
00375         o[0] = (s[0]>>2)&0x07;
00376         o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00377         return 2;
00378     }
00379     else if ((s[0]&0xF0)==0xE0) {
00380         o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
00381         o[1] = (s[1]<<6) | (s[2]^0x80);
00382         return 2;
00383     }
00384     else {
00385         int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
00386         o[0] = 0xD8 | (w>>2);
00387         o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
00388         o[2] = 0xDC | ((s[2]>>2)&0x03);
00389         o[3] = (s[2]<<6) | (s[3]&~0x80);
00390         return 4;
00391     }
00392 }
00393 
00394 static ssize_t
00395 fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00396 {
00397     if (!s[1] && s[0]<0x80) {
00398         o[0] = s[0];
00399         return 1;
00400     }
00401     else if (s[1]<0x08) {
00402         o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
00403         o[1] = 0x80 | (s[0]&0x3F);
00404         return 2;
00405     }
00406     else if ((s[1]&0xF8)!=0xD8) {
00407         o[0] = 0xE0 | (s[1]>>4);
00408         o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00409         o[2] = 0x80 | (s[0]&0x3F);
00410         return 3;
00411     }
00412     else {
00413         unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
00414         o[0] = 0xF0 | u>>2;
00415         o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
00416         o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
00417         o[3] = 0x80 | (s[2]&0x3F);
00418         return 4;
00419     }
00420 }
00421 
00422 static ssize_t
00423 fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00424 {
00425     if (!(s[0]&0x80)) {
00426         o[1] = 0x00;
00427         o[0] = s[0];
00428         return 2;
00429     }
00430     else if ((s[0]&0xE0)==0xC0) {
00431         o[1] = (s[0]>>2)&0x07;
00432         o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00433         return 2;
00434     }
00435     else if ((s[0]&0xF0)==0xE0) {
00436         o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
00437         o[0] = (s[1]<<6) | (s[2]^0x80);
00438         return 2;
00439     }
00440     else {
00441         int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
00442         o[1] = 0xD8 | (w>>2);
00443         o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
00444         o[3] = 0xDC | ((s[2]>>2)&0x03);
00445         o[2] = (s[2]<<6) | (s[3]&~0x80);
00446         return 4;
00447     }
00448 }
00449 
00450 static ssize_t
00451 fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00452 {
00453     if (!s[1]) {
00454         if (s[2]==0 && s[3]<0x80) {
00455             o[0] = s[3];
00456             return 1;
00457         }
00458         else if (s[2]<0x08) {
00459             o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6);
00460             o[1] = 0x80 | (s[3]&0x3F);
00461             return 2;
00462         }
00463         else {
00464             o[0] = 0xE0 | (s[2]>>4);
00465             o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
00466             o[2] = 0x80 | (s[3]&0x3F);
00467             return 3;
00468         }
00469     }
00470     else {
00471         o[0] = 0xF0 | (s[1]>>2);
00472         o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4);
00473         o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
00474         o[3] = 0x80 | (s[3]&0x3F);
00475         return 4;
00476     }
00477 }
00478 
00479 static ssize_t
00480 fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00481 {
00482     o[0] = 0;
00483     if (!(s[0]&0x80)) {
00484         o[1] = o[2] = 0x00;
00485         o[3] = s[0];
00486     }
00487     else if ((s[0]&0xE0)==0xC0) {
00488         o[1] = 0x00;
00489         o[2] = (s[0]>>2)&0x07;
00490         o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00491     }
00492     else if ((s[0]&0xF0)==0xE0) {
00493         o[1] = 0x00;
00494         o[2] = (s[0]<<4) | ((s[1]>>2)^0x20);
00495         o[3] = (s[1]<<6) | (s[2]^0x80);
00496     }
00497     else {
00498         o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
00499         o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
00500         o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
00501     }
00502     return 4;
00503 }
00504 
00505 static ssize_t
00506 fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00507 {
00508     if (!s[2]) {
00509         if (s[1]==0 && s[0]<0x80) {
00510             o[0] = s[0];
00511             return 1;
00512         }
00513         else if (s[1]<0x08) {
00514             o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
00515             o[1] = 0x80 | (s[0]&0x3F);
00516             return 2;
00517         }
00518         else {
00519             o[0] = 0xE0 | (s[1]>>4);
00520             o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00521             o[2] = 0x80 | (s[0]&0x3F);
00522             return 3;
00523         }
00524     }
00525     else {
00526         o[0] = 0xF0 | (s[2]>>2);
00527         o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4);
00528         o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
00529         o[3] = 0x80 | (s[0]&0x3F);
00530         return 4;
00531     }
00532 }
00533 
00534 static ssize_t
00535 fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00536 {
00537     o[3] = 0;
00538     if (!(s[0]&0x80)) {
00539         o[2] = o[1] = 0x00;
00540         o[0] = s[0];
00541     }
00542     else if ((s[0]&0xE0)==0xC0) {
00543         o[2] = 0x00;
00544         o[1] = (s[0]>>2)&0x07;
00545         o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
00546     }
00547     else if ((s[0]&0xF0)==0xE0) {
00548         o[2] = 0x00;
00549         o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
00550         o[0] = (s[1]<<6) | (s[2]^0x80);
00551     }
00552     else {
00553         o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
00554         o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
00555         o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
00556     }
00557     return 4;
00558 }
00559 
00560 static int
00561 state_init(void *statep)
00562 {
00563     unsigned char *sp = statep;
00564     *sp = 0;
00565     return 0;
00566 }
00567 
00568 static VALUE
00569 fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
00570 {
00571     #define BE 1
00572     #define LE 2
00573     unsigned char *sp = statep;
00574     switch (*sp) {
00575     case 0:
00576         if (s[0] == 0xFE && s[1] == 0xFF) {
00577             *sp = BE;
00578             return ZERObt;
00579         }
00580         else if (s[0] == 0xFF && s[1] == 0xFE) {
00581             *sp = LE;
00582             return ZERObt;
00583         }
00584         break;
00585     case BE:
00586         if (s[0] < 0xD8 || 0xDF < s[0]) {
00587             return (VALUE)FUNso;
00588         }
00589         else if (s[0] <= 0xDB) {
00590             return (VALUE)from_UTF_16BE_D8toDB_00toFF;
00591         }
00592         break;
00593     case LE:
00594         if (s[1] < 0xD8 || 0xDF < s[1]) {
00595             return (VALUE)FUNso;
00596         }
00597         else if (s[1] <= 0xDB) {
00598             return (VALUE)from_UTF_16LE_00toFF_D8toDB;
00599         }
00600         break;
00601     }
00602     return (VALUE)INVALID;
00603 }
00604 
00605 static ssize_t
00606 fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00607 {
00608     unsigned char *sp = statep;
00609     switch (*sp) {
00610     case BE:
00611         return fun_so_from_utf_16be(statep, s, l, o, osize);
00612     case LE:
00613         return fun_so_from_utf_16le(statep, s, l, o, osize);
00614     }
00615     return 0;
00616 }
00617 
00618 static VALUE
00619 fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l)
00620 {
00621     unsigned char *sp = statep;
00622     switch (*sp) {
00623     case 0:
00624         if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) {
00625             *sp = BE;
00626             return ZERObt;
00627         }
00628         else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) {
00629             *sp = LE;
00630             return ZERObt;
00631         }
00632         break;
00633     case BE:
00634         if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) ||
00635               (s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2]))))
00636             return (VALUE)FUNso;
00637         break;
00638     case LE:
00639         if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) ||
00640               (s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1]))))
00641             return (VALUE)FUNso;
00642         break;
00643     }
00644     return (VALUE)INVALID;
00645 }
00646 
00647 static ssize_t
00648 fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00649 {
00650     unsigned char *sp = statep;
00651     switch (*sp) {
00652     case BE:
00653         return fun_so_from_utf_32be(statep, s, l, o, osize);
00654     case LE:
00655         return fun_so_from_utf_32le(statep, s, l, o, osize);
00656     }
00657     return 0;
00658 }
00659 
00660 static ssize_t
00661 fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00662 {
00663     unsigned char *sp = statep;
00664     if (*sp == 0) {
00665         *o++ = 0xFE;
00666         *o++ = 0xFF;
00667         *sp = 1;
00668         return 2 + fun_so_to_utf_16be(statep, s, l, o, osize);
00669     }
00670     return fun_so_to_utf_16be(statep, s, l, o, osize);
00671 }
00672 
00673 static ssize_t
00674 fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
00675 {
00676     unsigned char *sp = statep;
00677     if (*sp == 0) {
00678         *o++ = 0x00;
00679         *o++ = 0x00;
00680         *o++ = 0xFE;
00681         *o++ = 0xFF;
00682         *sp = 1;
00683         return 4 + fun_so_to_utf_32be(statep, s, l, o, osize);
00684     }
00685     return fun_so_to_utf_32be(statep, s, l, o, osize);
00686 }
00687 
00688 static const rb_transcoder
00689 rb_from_UTF_16BE = {
00690     "UTF-16BE", "UTF-8", from_UTF_16BE,
00691     TRANSCODE_TABLE_INFO,
00692     2, /* input_unit_length */
00693     4, /* max_input */
00694     4, /* max_output */
00695     asciicompat_decoder, /* asciicompat_type */
00696     0, NULL, NULL, /* state_size, state_init, state_fini */
00697     NULL, NULL, NULL, fun_so_from_utf_16be
00698 };
00699 
00700 static const rb_transcoder
00701 rb_to_UTF_16BE = {
00702     "UTF-8", "UTF-16BE", from_UTF_8,
00703     TRANSCODE_TABLE_INFO,
00704     1, /* input_unit_length */
00705     4, /* max_input */
00706     4, /* max_output */
00707     asciicompat_encoder, /* asciicompat_type */
00708     0, NULL, NULL, /* state_size, state_init, state_fini */
00709     NULL, NULL, NULL, fun_so_to_utf_16be
00710 };
00711 
00712 static const rb_transcoder
00713 rb_from_UTF_16LE = {
00714     "UTF-16LE", "UTF-8", from_UTF_16LE,
00715     TRANSCODE_TABLE_INFO,
00716     2, /* input_unit_length */
00717     4, /* max_input */
00718     4, /* max_output */
00719     asciicompat_decoder, /* asciicompat_type */
00720     0, NULL, NULL, /* state_size, state_init, state_fini */
00721     NULL, NULL, NULL, fun_so_from_utf_16le
00722 };
00723 
00724 static const rb_transcoder
00725 rb_to_UTF_16LE = {
00726     "UTF-8", "UTF-16LE", from_UTF_8,
00727     TRANSCODE_TABLE_INFO,
00728     1, /* input_unit_length */
00729     4, /* max_input */
00730     4, /* max_output */
00731     asciicompat_encoder, /* asciicompat_type */
00732     0, NULL, NULL, /* state_size, state_init, state_fini */
00733     NULL, NULL, NULL, fun_so_to_utf_16le
00734 };
00735 
00736 static const rb_transcoder
00737 rb_from_UTF_32BE = {
00738     "UTF-32BE", "UTF-8", from_UTF_32BE,
00739     TRANSCODE_TABLE_INFO,
00740     4, /* input_unit_length */
00741     4, /* max_input */
00742     4, /* max_output */
00743     asciicompat_decoder, /* asciicompat_type */
00744     0, NULL, NULL, /* state_size, state_init, state_fini */
00745     NULL, NULL, NULL, fun_so_from_utf_32be
00746 };
00747 
00748 static const rb_transcoder
00749 rb_to_UTF_32BE = {
00750     "UTF-8", "UTF-32BE", from_UTF_8,
00751     TRANSCODE_TABLE_INFO,
00752     1, /* input_unit_length */
00753     4, /* max_input */
00754     4, /* max_output */
00755     asciicompat_encoder, /* asciicompat_type */
00756     0, NULL, NULL, /* state_size, state_init, state_fini */
00757     NULL, NULL, NULL, fun_so_to_utf_32be
00758 };
00759 
00760 static const rb_transcoder
00761 rb_from_UTF_32LE = {
00762     "UTF-32LE", "UTF-8", from_UTF_32LE,
00763     TRANSCODE_TABLE_INFO,
00764     4, /* input_unit_length */
00765     4, /* max_input */
00766     4, /* max_output */
00767     asciicompat_decoder, /* asciicompat_type */
00768     0, NULL, NULL, /* state_size, state_init, state_fini */
00769     NULL, NULL, NULL, fun_so_from_utf_32le
00770 };
00771 
00772 static const rb_transcoder
00773 rb_to_UTF_32LE = {
00774     "UTF-8", "UTF-32LE", from_UTF_8,
00775     TRANSCODE_TABLE_INFO,
00776     1, /* input_unit_length */
00777     4, /* max_input */
00778     4, /* max_output */
00779     asciicompat_encoder, /* asciicompat_type */
00780     0, NULL, NULL, /* state_size, state_init, state_fini */
00781     NULL, NULL, NULL, fun_so_to_utf_32le
00782 };
00783 
00784 static const rb_transcoder
00785 rb_from_UTF_16 = {
00786     "UTF-16", "UTF-8", from_UTF_16,
00787     TRANSCODE_TABLE_INFO,
00788     2, /* input_unit_length */
00789     4, /* max_input */
00790     4, /* max_output */
00791     asciicompat_decoder, /* asciicompat_type */
00792     1, state_init, NULL, /* state_size, state_init, state_fini */
00793     NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16
00794 };
00795 
00796 static const rb_transcoder
00797 rb_from_UTF_32 = {
00798     "UTF-32", "UTF-8", from_UTF_32,
00799     TRANSCODE_TABLE_INFO,
00800     4, /* input_unit_length */
00801     4, /* max_input */
00802     4, /* max_output */
00803     asciicompat_decoder, /* asciicompat_type */
00804     1, state_init, NULL, /* state_size, state_init, state_fini */
00805     NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32
00806 };
00807 
00808 static const rb_transcoder
00809 rb_to_UTF_16 = {
00810     "UTF-8", "UTF-16", from_UTF_8,
00811     TRANSCODE_TABLE_INFO,
00812     1, /* input_unit_length */
00813     4, /* max_input */
00814     4, /* max_output */
00815     asciicompat_encoder, /* asciicompat_type */
00816     1, state_init, NULL, /* state_size, state_init, state_fini */
00817     NULL, NULL, NULL, fun_so_to_utf_16
00818 };
00819 
00820 static const rb_transcoder
00821 rb_to_UTF_32 = {
00822     "UTF-8", "UTF-32", from_UTF_8,
00823     TRANSCODE_TABLE_INFO,
00824     1, /* input_unit_length */
00825     4, /* max_input */
00826     4, /* max_output */
00827     asciicompat_encoder, /* asciicompat_type */
00828     1, state_init, NULL, /* state_size, state_init, state_fini */
00829     NULL, NULL, NULL, fun_so_to_utf_32
00830 };
00831 
00832 void
00833 Init_utf_16_32(void)
00834 {
00835     rb_register_transcoder(&rb_from_UTF_16BE);
00836     rb_register_transcoder(&rb_to_UTF_16BE);
00837     rb_register_transcoder(&rb_from_UTF_16LE);
00838     rb_register_transcoder(&rb_to_UTF_16LE);
00839     rb_register_transcoder(&rb_from_UTF_32BE);
00840     rb_register_transcoder(&rb_to_UTF_32BE);
00841     rb_register_transcoder(&rb_from_UTF_32LE);
00842     rb_register_transcoder(&rb_to_UTF_32LE);
00843     rb_register_transcoder(&rb_from_UTF_16);
00844     rb_register_transcoder(&rb_to_UTF_16);
00845     rb_register_transcoder(&rb_from_UTF_32);
00846     rb_register_transcoder(&rb_to_UTF_32);
00847 }
00848 
00849