Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /* autogenerated. */ 00002 /* src="transcode-tblgen.rb", len=28123, checksum=30477 */ 00003 /* src="utf_16_32.trans", len=15312, checksum=29120 */ 00004 00005 #include "transcode_data.h" 00006 00007 00008 00009 static const unsigned char 00010 utf_16_32_byte_array[1288] = { 00011 #define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0 00012 220, 223, 00013 1, 1, 1, 1, 00014 00015 #define from_UTF_16LE_00toFF_D8toDB_offsets 6 00016 0, 255, 00017 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00018 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00019 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00020 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00021 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00022 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00023 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00024 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00025 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00033 00034 #define from_UTF_16LE_00toFF_offsets 264 00035 0, 255, 00036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00047 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00048 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00049 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 00050 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00051 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00052 00053 #define from_UTF_32LE_00toFF_00toD7_00_offsets 522 00054 0, 0, 00055 0, 00056 00057 #define from_UTF_32LE_00toFF_00toD7_offsets 525 00058 0, 16, 00059 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00060 0, 00061 00062 #define from_UTF_32LE_00toFF_D8toDF_offsets 544 00063 1, 16, 00064 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00065 00066 #define from_UTF_32LE_00toFF_offsets 562 00067 0, 255, 00068 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00069 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00070 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00071 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00072 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00073 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00074 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00075 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00076 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00077 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00078 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00079 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00080 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00081 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 00082 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00084 00085 #define from_UTF_32BE_00_offsets 820 00086 0, 16, 00087 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00088 1, 00089 00090 #define from_UTF_8_C2toDF_offsets 839 00091 128, 191, 00092 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00093 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00094 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00095 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00096 00097 #define from_UTF_8_E0_offsets 905 00098 160, 191, 00099 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00101 00102 #define from_UTF_8_ED_offsets 939 00103 128, 159, 00104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00106 00107 #define from_UTF_8_F0_offsets 973 00108 144, 191, 00109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00112 00113 #define from_UTF_8_F4_offsets 1023 00114 128, 143, 00115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00116 00117 #define from_UTF_8_offsets 1041 00118 0, 244, 00119 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00120 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00121 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00123 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00131 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00132 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00133 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 00134 6, 7, 7, 7, 8, 00135 00136 }; 00137 static const unsigned int 00138 utf_16_32_word_array[106] = { 00139 #define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0) 00140 INVALID, FUNso, 00141 00142 #define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2) 00143 from_UTF_16LE_00toFF_D8toDB_00toFF_offsets, 00144 from_UTF_16LE_00toFF_D8toDB_00toFF_infos, 00145 00146 #define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4) 00147 from_UTF_16LE_00toFF_D8toDB_00toFF, 00148 00149 #define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5) 00150 from_UTF_16LE_00toFF_D8toDB_offsets, 00151 from_UTF_16LE_00toFF_D8toDB_infos, 00152 00153 #define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7) 00154 FUNso, from_UTF_16LE_00toFF_D8toDB, 00155 INVALID, 00156 00157 #define from_UTF_16LE_00toFF WORDINDEX2INFO(10) 00158 from_UTF_16LE_00toFF_offsets, 00159 from_UTF_16LE_00toFF_infos, 00160 00161 #define from_UTF_16LE_infos WORDINDEX2INFO(12) 00162 from_UTF_16LE_00toFF, 00163 00164 #define from_UTF_16LE WORDINDEX2INFO(13) 00165 from_UTF_16LE_00toFF_D8toDB_offsets, 00166 from_UTF_16LE_infos, 00167 00168 #define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15) 00169 FUNso, INVALID, 00170 00171 #define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17) 00172 from_UTF_32LE_00toFF_00toD7_00_offsets, 00173 from_UTF_32LE_00toFF_00toD7_00_infos, 00174 00175 #define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19) 00176 from_UTF_32LE_00toFF_00toD7_00, INVALID, 00177 00178 #define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21) 00179 from_UTF_32LE_00toFF_00toD7_offsets, 00180 from_UTF_32LE_00toFF_00toD7_infos, 00181 00182 #define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23) 00183 INVALID, from_UTF_32LE_00toFF_00toD7_00, 00184 00185 #define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25) 00186 from_UTF_32LE_00toFF_D8toDF_offsets, 00187 from_UTF_32LE_00toFF_D8toDF_infos, 00188 00189 #define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27) 00190 from_UTF_32LE_00toFF_00toD7, from_UTF_32LE_00toFF_D8toDF, 00191 00192 #define from_UTF_32LE_00toFF WORDINDEX2INFO(29) 00193 from_UTF_32LE_00toFF_offsets, 00194 from_UTF_32LE_00toFF_infos, 00195 00196 #define from_UTF_32LE_infos WORDINDEX2INFO(31) 00197 from_UTF_32LE_00toFF, 00198 00199 #define from_UTF_32LE WORDINDEX2INFO(32) 00200 from_UTF_16LE_00toFF_D8toDB_offsets, 00201 from_UTF_32LE_infos, 00202 00203 #define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34) 00204 FUNso, 00205 00206 #define from_UTF_16BE_00toD7 WORDINDEX2INFO(35) 00207 from_UTF_16LE_00toFF_D8toDB_offsets, 00208 from_UTF_16BE_00toD7_infos, 00209 00210 #define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37) 00211 INVALID, from_UTF_16BE_00toD7, 00212 00213 #define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39) 00214 from_UTF_16LE_00toFF_D8toDB_00toFF_offsets, 00215 from_UTF_16BE_D8toDB_00toFF_infos, 00216 00217 #define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41) 00218 from_UTF_16BE_D8toDB_00toFF, 00219 00220 #define from_UTF_16BE_D8toDB WORDINDEX2INFO(42) 00221 from_UTF_16LE_00toFF_D8toDB_offsets, 00222 from_UTF_16BE_D8toDB_infos, 00223 00224 #define from_UTF_16BE_infos WORDINDEX2INFO(44) 00225 from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB, 00226 INVALID, 00227 00228 #define from_UTF_16BE WORDINDEX2INFO(47) 00229 from_UTF_16LE_00toFF_offsets, 00230 from_UTF_16BE_infos, 00231 00232 #define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49) 00233 from_UTF_16BE_00toD7, INVALID, 00234 00235 #define from_UTF_32BE_00_00 WORDINDEX2INFO(51) 00236 from_UTF_32LE_00toFF_offsets, 00237 from_UTF_32BE_00_00_infos, 00238 00239 #define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53) 00240 from_UTF_16BE_00toD7, 00241 00242 #define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54) 00243 from_UTF_16LE_00toFF_D8toDB_offsets, 00244 from_UTF_32BE_00_01to10_infos, 00245 00246 #define from_UTF_32BE_00_infos WORDINDEX2INFO(56) 00247 from_UTF_32BE_00_00, from_UTF_32BE_00_01to10, 00248 INVALID, 00249 00250 #define from_UTF_32BE_00 WORDINDEX2INFO(59) 00251 from_UTF_32BE_00_offsets, 00252 from_UTF_32BE_00_infos, 00253 00254 #define from_UTF_32BE_infos WORDINDEX2INFO(61) 00255 from_UTF_32BE_00, INVALID, 00256 00257 #define from_UTF_32BE WORDINDEX2INFO(63) 00258 from_UTF_32LE_00toFF_00toD7_00_offsets, 00259 from_UTF_32BE_infos, 00260 00261 #define from_UTF_16_00toFF_infos WORDINDEX2INFO(65) 00262 FUNsi, 00263 00264 #define from_UTF_16_00toFF WORDINDEX2INFO(66) 00265 from_UTF_16LE_00toFF_D8toDB_offsets, 00266 from_UTF_16_00toFF_infos, 00267 00268 #define from_UTF_16_infos WORDINDEX2INFO(68) 00269 from_UTF_16_00toFF, 00270 00271 #define from_UTF_16 WORDINDEX2INFO(69) 00272 from_UTF_16LE_00toFF_D8toDB_offsets, 00273 from_UTF_16_infos, 00274 00275 #define from_UTF_32_00toFF_infos WORDINDEX2INFO(71) 00276 from_UTF_16, 00277 00278 #define from_UTF_32_00toFF WORDINDEX2INFO(72) 00279 from_UTF_16LE_00toFF_D8toDB_offsets, 00280 from_UTF_32_00toFF_infos, 00281 00282 #define from_UTF_32_infos WORDINDEX2INFO(74) 00283 from_UTF_32_00toFF, 00284 00285 #define from_UTF_32 WORDINDEX2INFO(75) 00286 from_UTF_16LE_00toFF_D8toDB_offsets, 00287 from_UTF_32_infos, 00288 00289 #define from_UTF_8_C2toDF WORDINDEX2INFO(77) 00290 from_UTF_8_C2toDF_offsets, 00291 from_UTF_16LE_00toFF_D8toDB_00toFF_infos, 00292 00293 #define from_UTF_8_E0_infos WORDINDEX2INFO(79) 00294 INVALID, from_UTF_8_C2toDF, 00295 00296 #define from_UTF_8_E0 WORDINDEX2INFO(81) 00297 from_UTF_8_E0_offsets, 00298 from_UTF_8_E0_infos, 00299 00300 #define from_UTF_8_E1toEC WORDINDEX2INFO(83) 00301 from_UTF_8_C2toDF_offsets, 00302 from_UTF_8_E0_infos, 00303 00304 #define from_UTF_8_ED WORDINDEX2INFO(85) 00305 from_UTF_8_ED_offsets, 00306 from_UTF_8_E0_infos, 00307 00308 #define from_UTF_8_F0_infos WORDINDEX2INFO(87) 00309 INVALID, from_UTF_8_E1toEC, 00310 00311 #define from_UTF_8_F0 WORDINDEX2INFO(89) 00312 from_UTF_8_F0_offsets, 00313 from_UTF_8_F0_infos, 00314 00315 #define from_UTF_8_F1toF3 WORDINDEX2INFO(91) 00316 from_UTF_8_C2toDF_offsets, 00317 from_UTF_8_F0_infos, 00318 00319 #define from_UTF_8_F4 WORDINDEX2INFO(93) 00320 from_UTF_8_F4_offsets, 00321 from_UTF_8_F0_infos, 00322 00323 #define from_UTF_8_infos WORDINDEX2INFO(95) 00324 FUNso, INVALID, 00325 from_UTF_8_C2toDF, from_UTF_8_E0, 00326 from_UTF_8_E1toEC, from_UTF_8_ED, 00327 from_UTF_8_F0, from_UTF_8_F1toF3, 00328 from_UTF_8_F4, 00329 00330 #define from_UTF_8 WORDINDEX2INFO(104) 00331 from_UTF_8_offsets, 00332 from_UTF_8_infos, 00333 00334 }; 00335 #define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 106, ((int)sizeof(unsigned int)) 00336 00337 00338 static ssize_t 00339 fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00340 { 00341 if (!s[0] && s[1]<0x80) { 00342 o[0] = s[1]; 00343 return 1; 00344 } 00345 else if (s[0]<0x08) { 00346 o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6); 00347 o[1] = 0x80 | (s[1]&0x3F); 00348 return 2; 00349 } 00350 else if ((s[0]&0xF8)!=0xD8) { 00351 o[0] = 0xE0 | (s[0]>>4); 00352 o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6); 00353 o[2] = 0x80 | (s[1]&0x3F); 00354 return 3; 00355 } 00356 else { 00357 unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1; 00358 o[0] = 0xF0 | (u>>2); 00359 o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F); 00360 o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6); 00361 o[3] = 0x80 | (s[3]&0x3F); 00362 return 4; 00363 } 00364 } 00365 00366 static ssize_t 00367 fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00368 { 00369 if (!(s[0]&0x80)) { 00370 o[0] = 0x00; 00371 o[1] = s[0]; 00372 return 2; 00373 } 00374 else if ((s[0]&0xE0)==0xC0) { 00375 o[0] = (s[0]>>2)&0x07; 00376 o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F); 00377 return 2; 00378 } 00379 else if ((s[0]&0xF0)==0xE0) { 00380 o[0] = (s[0]<<4) | ((s[1]>>2)^0x20); 00381 o[1] = (s[1]<<6) | (s[2]^0x80); 00382 return 2; 00383 } 00384 else { 00385 int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; 00386 o[0] = 0xD8 | (w>>2); 00387 o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); 00388 o[2] = 0xDC | ((s[2]>>2)&0x03); 00389 o[3] = (s[2]<<6) | (s[3]&~0x80); 00390 return 4; 00391 } 00392 } 00393 00394 static ssize_t 00395 fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00396 { 00397 if (!s[1] && s[0]<0x80) { 00398 o[0] = s[0]; 00399 return 1; 00400 } 00401 else if (s[1]<0x08) { 00402 o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); 00403 o[1] = 0x80 | (s[0]&0x3F); 00404 return 2; 00405 } 00406 else if ((s[1]&0xF8)!=0xD8) { 00407 o[0] = 0xE0 | (s[1]>>4); 00408 o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); 00409 o[2] = 0x80 | (s[0]&0x3F); 00410 return 3; 00411 } 00412 else { 00413 unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1; 00414 o[0] = 0xF0 | u>>2; 00415 o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F); 00416 o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6); 00417 o[3] = 0x80 | (s[2]&0x3F); 00418 return 4; 00419 } 00420 } 00421 00422 static ssize_t 00423 fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00424 { 00425 if (!(s[0]&0x80)) { 00426 o[1] = 0x00; 00427 o[0] = s[0]; 00428 return 2; 00429 } 00430 else if ((s[0]&0xE0)==0xC0) { 00431 o[1] = (s[0]>>2)&0x07; 00432 o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); 00433 return 2; 00434 } 00435 else if ((s[0]&0xF0)==0xE0) { 00436 o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); 00437 o[0] = (s[1]<<6) | (s[2]^0x80); 00438 return 2; 00439 } 00440 else { 00441 int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; 00442 o[1] = 0xD8 | (w>>2); 00443 o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); 00444 o[3] = 0xDC | ((s[2]>>2)&0x03); 00445 o[2] = (s[2]<<6) | (s[3]&~0x80); 00446 return 4; 00447 } 00448 } 00449 00450 static ssize_t 00451 fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00452 { 00453 if (!s[1]) { 00454 if (s[2]==0 && s[3]<0x80) { 00455 o[0] = s[3]; 00456 return 1; 00457 } 00458 else if (s[2]<0x08) { 00459 o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6); 00460 o[1] = 0x80 | (s[3]&0x3F); 00461 return 2; 00462 } 00463 else { 00464 o[0] = 0xE0 | (s[2]>>4); 00465 o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); 00466 o[2] = 0x80 | (s[3]&0x3F); 00467 return 3; 00468 } 00469 } 00470 else { 00471 o[0] = 0xF0 | (s[1]>>2); 00472 o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4); 00473 o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); 00474 o[3] = 0x80 | (s[3]&0x3F); 00475 return 4; 00476 } 00477 } 00478 00479 static ssize_t 00480 fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00481 { 00482 o[0] = 0; 00483 if (!(s[0]&0x80)) { 00484 o[1] = o[2] = 0x00; 00485 o[3] = s[0]; 00486 } 00487 else if ((s[0]&0xE0)==0xC0) { 00488 o[1] = 0x00; 00489 o[2] = (s[0]>>2)&0x07; 00490 o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F); 00491 } 00492 else if ((s[0]&0xF0)==0xE0) { 00493 o[1] = 0x00; 00494 o[2] = (s[0]<<4) | ((s[1]>>2)^0x20); 00495 o[3] = (s[1]<<6) | (s[2]^0x80); 00496 } 00497 else { 00498 o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); 00499 o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); 00500 o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F); 00501 } 00502 return 4; 00503 } 00504 00505 static ssize_t 00506 fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00507 { 00508 if (!s[2]) { 00509 if (s[1]==0 && s[0]<0x80) { 00510 o[0] = s[0]; 00511 return 1; 00512 } 00513 else if (s[1]<0x08) { 00514 o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); 00515 o[1] = 0x80 | (s[0]&0x3F); 00516 return 2; 00517 } 00518 else { 00519 o[0] = 0xE0 | (s[1]>>4); 00520 o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); 00521 o[2] = 0x80 | (s[0]&0x3F); 00522 return 3; 00523 } 00524 } 00525 else { 00526 o[0] = 0xF0 | (s[2]>>2); 00527 o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4); 00528 o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); 00529 o[3] = 0x80 | (s[0]&0x3F); 00530 return 4; 00531 } 00532 } 00533 00534 static ssize_t 00535 fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00536 { 00537 o[3] = 0; 00538 if (!(s[0]&0x80)) { 00539 o[2] = o[1] = 0x00; 00540 o[0] = s[0]; 00541 } 00542 else if ((s[0]&0xE0)==0xC0) { 00543 o[2] = 0x00; 00544 o[1] = (s[0]>>2)&0x07; 00545 o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); 00546 } 00547 else if ((s[0]&0xF0)==0xE0) { 00548 o[2] = 0x00; 00549 o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); 00550 o[0] = (s[1]<<6) | (s[2]^0x80); 00551 } 00552 else { 00553 o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); 00554 o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); 00555 o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F); 00556 } 00557 return 4; 00558 } 00559 00560 static int 00561 state_init(void *statep) 00562 { 00563 unsigned char *sp = statep; 00564 *sp = 0; 00565 return 0; 00566 } 00567 00568 static VALUE 00569 fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l) 00570 { 00571 #define BE 1 00572 #define LE 2 00573 unsigned char *sp = statep; 00574 switch (*sp) { 00575 case 0: 00576 if (s[0] == 0xFE && s[1] == 0xFF) { 00577 *sp = BE; 00578 return ZERObt; 00579 } 00580 else if (s[0] == 0xFF && s[1] == 0xFE) { 00581 *sp = LE; 00582 return ZERObt; 00583 } 00584 break; 00585 case BE: 00586 if (s[0] < 0xD8 || 0xDF < s[0]) { 00587 return (VALUE)FUNso; 00588 } 00589 else if (s[0] <= 0xDB) { 00590 return (VALUE)from_UTF_16BE_D8toDB_00toFF; 00591 } 00592 break; 00593 case LE: 00594 if (s[1] < 0xD8 || 0xDF < s[1]) { 00595 return (VALUE)FUNso; 00596 } 00597 else if (s[1] <= 0xDB) { 00598 return (VALUE)from_UTF_16LE_00toFF_D8toDB; 00599 } 00600 break; 00601 } 00602 return (VALUE)INVALID; 00603 } 00604 00605 static ssize_t 00606 fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00607 { 00608 unsigned char *sp = statep; 00609 switch (*sp) { 00610 case BE: 00611 return fun_so_from_utf_16be(statep, s, l, o, osize); 00612 case LE: 00613 return fun_so_from_utf_16le(statep, s, l, o, osize); 00614 } 00615 return 0; 00616 } 00617 00618 static VALUE 00619 fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l) 00620 { 00621 unsigned char *sp = statep; 00622 switch (*sp) { 00623 case 0: 00624 if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) { 00625 *sp = BE; 00626 return ZERObt; 00627 } 00628 else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) { 00629 *sp = LE; 00630 return ZERObt; 00631 } 00632 break; 00633 case BE: 00634 if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) || 00635 (s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2])))) 00636 return (VALUE)FUNso; 00637 break; 00638 case LE: 00639 if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) || 00640 (s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1])))) 00641 return (VALUE)FUNso; 00642 break; 00643 } 00644 return (VALUE)INVALID; 00645 } 00646 00647 static ssize_t 00648 fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00649 { 00650 unsigned char *sp = statep; 00651 switch (*sp) { 00652 case BE: 00653 return fun_so_from_utf_32be(statep, s, l, o, osize); 00654 case LE: 00655 return fun_so_from_utf_32le(statep, s, l, o, osize); 00656 } 00657 return 0; 00658 } 00659 00660 static ssize_t 00661 fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00662 { 00663 unsigned char *sp = statep; 00664 if (*sp == 0) { 00665 *o++ = 0xFE; 00666 *o++ = 0xFF; 00667 *sp = 1; 00668 return 2 + fun_so_to_utf_16be(statep, s, l, o, osize); 00669 } 00670 return fun_so_to_utf_16be(statep, s, l, o, osize); 00671 } 00672 00673 static ssize_t 00674 fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize) 00675 { 00676 unsigned char *sp = statep; 00677 if (*sp == 0) { 00678 *o++ = 0x00; 00679 *o++ = 0x00; 00680 *o++ = 0xFE; 00681 *o++ = 0xFF; 00682 *sp = 1; 00683 return 4 + fun_so_to_utf_32be(statep, s, l, o, osize); 00684 } 00685 return fun_so_to_utf_32be(statep, s, l, o, osize); 00686 } 00687 00688 static const rb_transcoder 00689 rb_from_UTF_16BE = { 00690 "UTF-16BE", "UTF-8", from_UTF_16BE, 00691 TRANSCODE_TABLE_INFO, 00692 2, /* input_unit_length */ 00693 4, /* max_input */ 00694 4, /* max_output */ 00695 asciicompat_decoder, /* asciicompat_type */ 00696 0, NULL, NULL, /* state_size, state_init, state_fini */ 00697 NULL, NULL, NULL, fun_so_from_utf_16be 00698 }; 00699 00700 static const rb_transcoder 00701 rb_to_UTF_16BE = { 00702 "UTF-8", "UTF-16BE", from_UTF_8, 00703 TRANSCODE_TABLE_INFO, 00704 1, /* input_unit_length */ 00705 4, /* max_input */ 00706 4, /* max_output */ 00707 asciicompat_encoder, /* asciicompat_type */ 00708 0, NULL, NULL, /* state_size, state_init, state_fini */ 00709 NULL, NULL, NULL, fun_so_to_utf_16be 00710 }; 00711 00712 static const rb_transcoder 00713 rb_from_UTF_16LE = { 00714 "UTF-16LE", "UTF-8", from_UTF_16LE, 00715 TRANSCODE_TABLE_INFO, 00716 2, /* input_unit_length */ 00717 4, /* max_input */ 00718 4, /* max_output */ 00719 asciicompat_decoder, /* asciicompat_type */ 00720 0, NULL, NULL, /* state_size, state_init, state_fini */ 00721 NULL, NULL, NULL, fun_so_from_utf_16le 00722 }; 00723 00724 static const rb_transcoder 00725 rb_to_UTF_16LE = { 00726 "UTF-8", "UTF-16LE", from_UTF_8, 00727 TRANSCODE_TABLE_INFO, 00728 1, /* input_unit_length */ 00729 4, /* max_input */ 00730 4, /* max_output */ 00731 asciicompat_encoder, /* asciicompat_type */ 00732 0, NULL, NULL, /* state_size, state_init, state_fini */ 00733 NULL, NULL, NULL, fun_so_to_utf_16le 00734 }; 00735 00736 static const rb_transcoder 00737 rb_from_UTF_32BE = { 00738 "UTF-32BE", "UTF-8", from_UTF_32BE, 00739 TRANSCODE_TABLE_INFO, 00740 4, /* input_unit_length */ 00741 4, /* max_input */ 00742 4, /* max_output */ 00743 asciicompat_decoder, /* asciicompat_type */ 00744 0, NULL, NULL, /* state_size, state_init, state_fini */ 00745 NULL, NULL, NULL, fun_so_from_utf_32be 00746 }; 00747 00748 static const rb_transcoder 00749 rb_to_UTF_32BE = { 00750 "UTF-8", "UTF-32BE", from_UTF_8, 00751 TRANSCODE_TABLE_INFO, 00752 1, /* input_unit_length */ 00753 4, /* max_input */ 00754 4, /* max_output */ 00755 asciicompat_encoder, /* asciicompat_type */ 00756 0, NULL, NULL, /* state_size, state_init, state_fini */ 00757 NULL, NULL, NULL, fun_so_to_utf_32be 00758 }; 00759 00760 static const rb_transcoder 00761 rb_from_UTF_32LE = { 00762 "UTF-32LE", "UTF-8", from_UTF_32LE, 00763 TRANSCODE_TABLE_INFO, 00764 4, /* input_unit_length */ 00765 4, /* max_input */ 00766 4, /* max_output */ 00767 asciicompat_decoder, /* asciicompat_type */ 00768 0, NULL, NULL, /* state_size, state_init, state_fini */ 00769 NULL, NULL, NULL, fun_so_from_utf_32le 00770 }; 00771 00772 static const rb_transcoder 00773 rb_to_UTF_32LE = { 00774 "UTF-8", "UTF-32LE", from_UTF_8, 00775 TRANSCODE_TABLE_INFO, 00776 1, /* input_unit_length */ 00777 4, /* max_input */ 00778 4, /* max_output */ 00779 asciicompat_encoder, /* asciicompat_type */ 00780 0, NULL, NULL, /* state_size, state_init, state_fini */ 00781 NULL, NULL, NULL, fun_so_to_utf_32le 00782 }; 00783 00784 static const rb_transcoder 00785 rb_from_UTF_16 = { 00786 "UTF-16", "UTF-8", from_UTF_16, 00787 TRANSCODE_TABLE_INFO, 00788 2, /* input_unit_length */ 00789 4, /* max_input */ 00790 4, /* max_output */ 00791 asciicompat_decoder, /* asciicompat_type */ 00792 1, state_init, NULL, /* state_size, state_init, state_fini */ 00793 NULL, fun_si_from_utf_16, NULL, fun_so_from_utf_16 00794 }; 00795 00796 static const rb_transcoder 00797 rb_from_UTF_32 = { 00798 "UTF-32", "UTF-8", from_UTF_32, 00799 TRANSCODE_TABLE_INFO, 00800 4, /* input_unit_length */ 00801 4, /* max_input */ 00802 4, /* max_output */ 00803 asciicompat_decoder, /* asciicompat_type */ 00804 1, state_init, NULL, /* state_size, state_init, state_fini */ 00805 NULL, fun_si_from_utf_32, NULL, fun_so_from_utf_32 00806 }; 00807 00808 static const rb_transcoder 00809 rb_to_UTF_16 = { 00810 "UTF-8", "UTF-16", from_UTF_8, 00811 TRANSCODE_TABLE_INFO, 00812 1, /* input_unit_length */ 00813 4, /* max_input */ 00814 4, /* max_output */ 00815 asciicompat_encoder, /* asciicompat_type */ 00816 1, state_init, NULL, /* state_size, state_init, state_fini */ 00817 NULL, NULL, NULL, fun_so_to_utf_16 00818 }; 00819 00820 static const rb_transcoder 00821 rb_to_UTF_32 = { 00822 "UTF-8", "UTF-32", from_UTF_8, 00823 TRANSCODE_TABLE_INFO, 00824 1, /* input_unit_length */ 00825 4, /* max_input */ 00826 4, /* max_output */ 00827 asciicompat_encoder, /* asciicompat_type */ 00828 1, state_init, NULL, /* state_size, state_init, state_fini */ 00829 NULL, NULL, NULL, fun_so_to_utf_32 00830 }; 00831 00832 void 00833 Init_utf_16_32(void) 00834 { 00835 rb_register_transcoder(&rb_from_UTF_16BE); 00836 rb_register_transcoder(&rb_to_UTF_16BE); 00837 rb_register_transcoder(&rb_from_UTF_16LE); 00838 rb_register_transcoder(&rb_to_UTF_16LE); 00839 rb_register_transcoder(&rb_from_UTF_32BE); 00840 rb_register_transcoder(&rb_to_UTF_32BE); 00841 rb_register_transcoder(&rb_from_UTF_32LE); 00842 rb_register_transcoder(&rb_to_UTF_32LE); 00843 rb_register_transcoder(&rb_from_UTF_16); 00844 rb_register_transcoder(&rb_to_UTF_16); 00845 rb_register_transcoder(&rb_from_UTF_32); 00846 rb_register_transcoder(&rb_to_UTF_32); 00847 } 00848 00849