Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /* -*- mode:c; c-file-style:"ruby" -*- */ 00002 /********************************************************************** 00003 00004 iconv.c - 00005 00006 $Author: nobu $ 00007 created at: Wed Dec 1 20:28:09 JST 1999 00008 00009 All the files in this distribution are covered under the Ruby's 00010 license (see the file COPYING). 00011 00012 Documentation by Yukihiro Matsumoto and Gavin Sinclair. 00013 00014 **********************************************************************/ 00015 00016 #include "ruby/ruby.h" 00017 #include <errno.h> 00018 #include <iconv.h> 00019 #include <assert.h> 00020 #include "ruby/st.h" 00021 #include "ruby/encoding.h" 00022 00023 /* 00024 * Document-class: Iconv 00025 * 00026 * == Summary 00027 * 00028 * Ruby extension for charset conversion. 00029 * 00030 * == Abstract 00031 * 00032 * Iconv is a wrapper class for the UNIX 95 <tt>iconv()</tt> function family, 00033 * which translates string between various encoding systems. 00034 * 00035 * See Open Group's on-line documents for more details. 00036 * * <tt>iconv.h</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.h.html 00037 * * <tt>iconv_open()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_open.html 00038 * * <tt>iconv()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv.html 00039 * * <tt>iconv_close()</tt>: http://www.opengroup.org/onlinepubs/007908799/xsh/iconv_close.html 00040 * 00041 * Which coding systems are available is platform-dependent. 00042 * 00043 * == Examples 00044 * 00045 * 1. Simple conversion between two charsets. 00046 * 00047 * converted_text = Iconv.conv('iso-8859-15', 'utf-8', text) 00048 * 00049 * 2. Instantiate a new Iconv and use method Iconv#iconv. 00050 * 00051 * cd = Iconv.new(to, from) 00052 * begin 00053 * input.each { |s| output << cd.iconv(s) } 00054 * output << cd.iconv(nil) # Don't forget this! 00055 * ensure 00056 * cd.close 00057 * end 00058 * 00059 * 3. Invoke Iconv.open with a block. 00060 * 00061 * Iconv.open(to, from) do |cd| 00062 * input.each { |s| output << cd.iconv(s) } 00063 * output << cd.iconv(nil) 00064 * end 00065 * 00066 * 4. Shorthand for (3). 00067 * 00068 * Iconv.iconv(to, from, *input.to_a) 00069 * 00070 * == Attentions 00071 * 00072 * Even if some extentions of implementation dependent are useful, 00073 * DON'T USE those extentions in libraries and scripts to widely distribute. 00074 * If you want to use those feature, use String#encode. 00075 */ 00076 00077 /* Invalid value for iconv_t is -1 but 0 for VALUE, I hope VALUE is 00078 big enough to keep iconv_t */ 00079 #define VALUE2ICONV(v) ((iconv_t)((VALUE)(v) ^ -1)) 00080 #define ICONV2VALUE(c) ((VALUE)(c) ^ -1) 00081 00082 struct iconv_env_t 00083 { 00084 iconv_t cd; 00085 int argc; 00086 VALUE *argv; 00087 VALUE ret; 00088 int toidx; 00089 VALUE (*append)_((VALUE, VALUE)); 00090 }; 00091 00092 struct rb_iconv_opt_t 00093 { 00094 VALUE transliterate; 00095 VALUE discard_ilseq; 00096 }; 00097 00098 static ID id_transliterate, id_discard_ilseq; 00099 00100 static VALUE rb_eIconvInvalidEncoding; 00101 static VALUE rb_eIconvFailure; 00102 static VALUE rb_eIconvIllegalSeq; 00103 static VALUE rb_eIconvInvalidChar; 00104 static VALUE rb_eIconvOutOfRange; 00105 static VALUE rb_eIconvBrokenLibrary; 00106 00107 static ID rb_success, rb_failed; 00108 static VALUE iconv_fail _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)); 00109 static VALUE iconv_fail_retry _((VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg)); 00110 static VALUE iconv_failure_initialize _((VALUE error, VALUE mesg, VALUE success, VALUE failed)); 00111 static VALUE iconv_failure_success _((VALUE self)); 00112 static VALUE iconv_failure_failed _((VALUE self)); 00113 00114 static iconv_t iconv_create _((VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx)); 00115 static void iconv_dfree _((void *cd)); 00116 static VALUE iconv_free _((VALUE cd)); 00117 static VALUE iconv_try _((iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen)); 00118 static VALUE rb_str_derive _((VALUE str, const char* ptr, long len)); 00119 static VALUE iconv_convert _((iconv_t cd, VALUE str, long start, long length, int toidx, 00120 struct iconv_env_t* env)); 00121 static VALUE iconv_s_allocate _((VALUE klass)); 00122 static VALUE iconv_initialize _((int argc, VALUE *argv, VALUE self)); 00123 static VALUE iconv_s_open _((int argc, VALUE *argv, VALUE self)); 00124 static VALUE iconv_s_convert _((struct iconv_env_t* env)); 00125 static VALUE iconv_s_iconv _((int argc, VALUE *argv, VALUE self)); 00126 static VALUE iconv_init_state _((VALUE cd)); 00127 static VALUE iconv_finish _((VALUE self)); 00128 static VALUE iconv_iconv _((int argc, VALUE *argv, VALUE self)); 00129 static VALUE iconv_conv _((int argc, VALUE *argv, VALUE self)); 00130 00131 static VALUE charset_map; 00132 00133 /* 00134 * Document-method: charset_map 00135 * call-seq: Iconv.charset_map 00136 * 00137 * Returns the map from canonical name to system dependent name. 00138 */ 00139 static VALUE 00140 charset_map_get(void) 00141 { 00142 return charset_map; 00143 } 00144 00145 static VALUE 00146 strip_glibc_option(VALUE *code) 00147 { 00148 VALUE val = StringValue(*code); 00149 const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val); 00150 const char *slash = memchr(ptr, '/', pend - ptr); 00151 00152 if (slash && slash < pend - 1 && slash[1] == '/') { 00153 VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash); 00154 val = rb_str_subseq(val, 0, slash - ptr); 00155 *code = val; 00156 return opt; 00157 } 00158 return 0; 00159 } 00160 00161 static char * 00162 map_charset(VALUE *code) 00163 { 00164 VALUE val = StringValue(*code); 00165 00166 if (RHASH_SIZE(charset_map)) { 00167 st_data_t data; 00168 VALUE key = rb_funcall2(val, rb_intern("downcase"), 0, 0); 00169 StringValuePtr(key); 00170 if (st_lookup(RHASH_TBL(charset_map), key, &data)) { 00171 *code = (VALUE)data; 00172 } 00173 } 00174 return StringValuePtr(*code); 00175 } 00176 00177 NORETURN(static void rb_iconv_sys_fail(const char *s)); 00178 static void 00179 rb_iconv_sys_fail(const char *s) 00180 { 00181 if (errno == 0) { 00182 rb_exc_raise(iconv_fail(rb_eIconvBrokenLibrary, Qnil, Qnil, NULL, s)); 00183 } 00184 rb_sys_fail(s); 00185 } 00186 00187 #define rb_sys_fail(s) rb_iconv_sys_fail(s) 00188 00189 static iconv_t 00190 iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx) 00191 { 00192 VALUE toopt = strip_glibc_option(&to); 00193 VALUE fromopt = strip_glibc_option(&from); 00194 VALUE toenc = 0, fromenc = 0; 00195 const char* tocode = map_charset(&to); 00196 const char* fromcode = map_charset(&from); 00197 iconv_t cd; 00198 int retry = 0; 00199 00200 *idx = rb_enc_find_index(tocode); 00201 00202 if (toopt) { 00203 toenc = rb_str_plus(to, toopt); 00204 tocode = RSTRING_PTR(toenc); 00205 } 00206 if (fromopt) { 00207 fromenc = rb_str_plus(from, fromopt); 00208 fromcode = RSTRING_PTR(fromenc); 00209 } 00210 while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) { 00211 int inval = 0; 00212 switch (errno) { 00213 case EMFILE: 00214 case ENFILE: 00215 case ENOMEM: 00216 if (!retry++) { 00217 rb_gc(); 00218 continue; 00219 } 00220 break; 00221 case EINVAL: 00222 retry = 0; 00223 inval = 1; 00224 if (toenc) { 00225 tocode = RSTRING_PTR(to); 00226 rb_str_resize(toenc, 0); 00227 toenc = 0; 00228 continue; 00229 } 00230 if (fromenc) { 00231 fromcode = RSTRING_PTR(from); 00232 rb_str_resize(fromenc, 0); 00233 fromenc = 0; 00234 continue; 00235 } 00236 break; 00237 } 00238 { 00239 const char *s = inval ? "invalid encoding " : "iconv"; 00240 volatile VALUE msg = rb_str_new(0, strlen(s) + RSTRING_LEN(to) + 00241 RSTRING_LEN(from) + 8); 00242 00243 sprintf(RSTRING_PTR(msg), "%s(\"%s\", \"%s\")", 00244 s, RSTRING_PTR(to), RSTRING_PTR(from)); 00245 s = RSTRING_PTR(msg); 00246 rb_str_set_len(msg, strlen(s)); 00247 if (!inval) rb_sys_fail(s); 00248 rb_exc_raise(iconv_fail(rb_eIconvInvalidEncoding, Qnil, 00249 rb_ary_new3(2, to, from), NULL, s)); 00250 } 00251 } 00252 00253 if (toopt || fromopt) { 00254 if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) { 00255 fromopt = 0; 00256 } 00257 if (toopt && fromopt) { 00258 rb_warning("encoding option isn't portable: %s, %s", 00259 RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2); 00260 } 00261 else { 00262 rb_warning("encoding option isn't portable: %s", 00263 (toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2); 00264 } 00265 } 00266 00267 if (opt) { 00268 #ifdef ICONV_SET_TRANSLITERATE 00269 if (opt->transliterate != Qundef) { 00270 int flag = RTEST(opt->transliterate); 00271 rb_warning("encoding option isn't portable: transliterate"); 00272 if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag)) 00273 rb_sys_fail("ICONV_SET_TRANSLITERATE"); 00274 } 00275 #endif 00276 #ifdef ICONV_SET_DISCARD_ILSEQ 00277 if (opt->discard_ilseq != Qundef) { 00278 int flag = RTEST(opt->discard_ilseq); 00279 rb_warning("encoding option isn't portable: discard_ilseq"); 00280 if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag)) 00281 rb_sys_fail("ICONV_SET_DISCARD_ILSEQ"); 00282 } 00283 #endif 00284 } 00285 00286 return cd; 00287 } 00288 00289 static void 00290 iconv_dfree(void *cd) 00291 { 00292 iconv_close(VALUE2ICONV(cd)); 00293 } 00294 00295 #define ICONV_FREE iconv_dfree 00296 00297 static VALUE 00298 iconv_free(VALUE cd) 00299 { 00300 if (cd && iconv_close(VALUE2ICONV(cd)) == -1) 00301 rb_sys_fail("iconv_close"); 00302 return Qnil; 00303 } 00304 00305 static VALUE 00306 check_iconv(VALUE obj) 00307 { 00308 Check_Type(obj, T_DATA); 00309 if (RDATA(obj)->dfree != ICONV_FREE) { 00310 rb_raise(rb_eArgError, "Iconv expected (%s)", rb_class2name(CLASS_OF(obj))); 00311 } 00312 return (VALUE)DATA_PTR(obj); 00313 } 00314 00315 static VALUE 00316 iconv_try(iconv_t cd, const char **inptr, size_t *inlen, char **outptr, size_t *outlen) 00317 { 00318 #ifdef ICONV_INPTR_CONST 00319 #define ICONV_INPTR_CAST 00320 #else 00321 #define ICONV_INPTR_CAST (char **) 00322 #endif 00323 size_t ret; 00324 00325 errno = 0; 00326 ret = iconv(cd, ICONV_INPTR_CAST inptr, inlen, outptr, outlen); 00327 if (ret == (size_t)-1) { 00328 if (!*inlen) 00329 return Qfalse; 00330 switch (errno) { 00331 case E2BIG: 00332 /* try the left in next loop */ 00333 break; 00334 case EILSEQ: 00335 return rb_eIconvIllegalSeq; 00336 case EINVAL: 00337 return rb_eIconvInvalidChar; 00338 case 0: 00339 return rb_eIconvBrokenLibrary; 00340 default: 00341 rb_sys_fail("iconv"); 00342 } 00343 } 00344 else if (*inlen > 0) { 00345 /* something goes wrong */ 00346 return rb_eIconvIllegalSeq; 00347 } 00348 else if (ret) { 00349 return Qnil; /* conversion */ 00350 } 00351 return Qfalse; 00352 } 00353 00354 #define FAILED_MAXLEN 16 00355 00356 static VALUE 00357 iconv_failure_initialize(VALUE error, VALUE mesg, VALUE success, VALUE failed) 00358 { 00359 rb_call_super(1, &mesg); 00360 rb_ivar_set(error, rb_success, success); 00361 rb_ivar_set(error, rb_failed, failed); 00362 return error; 00363 } 00364 00365 static VALUE 00366 iconv_fail(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg) 00367 { 00368 VALUE args[3]; 00369 00370 if (mesg && *mesg) { 00371 args[0] = rb_str_new2(mesg); 00372 } 00373 else if (TYPE(failed) != T_STRING || RSTRING_LEN(failed) < FAILED_MAXLEN) { 00374 args[0] = rb_inspect(failed); 00375 } 00376 else { 00377 args[0] = rb_inspect(rb_str_substr(failed, 0, FAILED_MAXLEN)); 00378 rb_str_cat2(args[0], "..."); 00379 } 00380 args[1] = success; 00381 args[2] = failed; 00382 if (env) { 00383 args[1] = env->append(rb_obj_dup(env->ret), success); 00384 if (env->argc > 0) { 00385 *(env->argv) = failed; 00386 args[2] = rb_ary_new4(env->argc, env->argv); 00387 } 00388 } 00389 return rb_class_new_instance(3, args, error); 00390 } 00391 00392 static VALUE 00393 iconv_fail_retry(VALUE error, VALUE success, VALUE failed, struct iconv_env_t* env, const char *mesg) 00394 { 00395 error = iconv_fail(error, success, failed, env, mesg); 00396 if (!rb_block_given_p()) rb_exc_raise(error); 00397 rb_set_errinfo(error); 00398 return rb_yield(failed); 00399 } 00400 00401 static VALUE 00402 rb_str_derive(VALUE str, const char* ptr, long len) 00403 { 00404 VALUE ret; 00405 00406 if (NIL_P(str)) 00407 return rb_str_new(ptr, len); 00408 if (RSTRING_PTR(str) + RSTRING_LEN(str) == ptr + len) 00409 ret = rb_str_subseq(str, ptr - RSTRING_PTR(str), len); 00410 else 00411 ret = rb_str_new(ptr, len); 00412 OBJ_INFECT(ret, str); 00413 return ret; 00414 } 00415 00416 static VALUE 00417 iconv_convert(iconv_t cd, VALUE str, long start, long length, int toidx, struct iconv_env_t* env) 00418 { 00419 VALUE ret = Qfalse; 00420 VALUE error = Qfalse; 00421 VALUE rescue; 00422 const char *inptr, *instart; 00423 size_t inlen; 00424 /* I believe ONE CHARACTER never exceed this. */ 00425 char buffer[BUFSIZ]; 00426 char *outptr; 00427 size_t outlen; 00428 00429 if (cd == (iconv_t)-1) 00430 rb_raise(rb_eArgError, "closed iconv"); 00431 00432 if (NIL_P(str)) { 00433 /* Reset output pointer or something. */ 00434 inptr = ""; 00435 inlen = 0; 00436 outptr = buffer; 00437 outlen = sizeof(buffer); 00438 error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen); 00439 if (RTEST(error)) { 00440 unsigned int i; 00441 rescue = iconv_fail_retry(error, Qnil, Qnil, env, 0); 00442 if (TYPE(rescue) == T_ARRAY) { 00443 str = RARRAY_LEN(rescue) > 0 ? RARRAY_PTR(rescue)[0] : Qnil; 00444 } 00445 if (FIXNUM_P(str) && (i = FIX2INT(str)) <= 0xff) { 00446 char c = i; 00447 str = rb_str_new(&c, 1); 00448 } 00449 else if (!NIL_P(str)) { 00450 StringValue(str); 00451 } 00452 } 00453 00454 inptr = NULL; 00455 length = 0; 00456 } 00457 else { 00458 long slen; 00459 00460 StringValue(str); 00461 slen = RSTRING_LEN(str); 00462 inptr = RSTRING_PTR(str); 00463 00464 inptr += start; 00465 if (length < 0 || length > start + slen) 00466 length = slen - start; 00467 } 00468 instart = inptr; 00469 inlen = length; 00470 00471 do { 00472 char errmsg[50]; 00473 const char *tmpstart = inptr; 00474 outptr = buffer; 00475 outlen = sizeof(buffer); 00476 00477 errmsg[0] = 0; 00478 error = iconv_try(cd, &inptr, &inlen, &outptr, &outlen); 00479 00480 if ( 00481 #if SIGNEDNESS_OF_SIZE_T < 0 00482 0 <= outlen && 00483 #endif 00484 outlen <= sizeof(buffer)) { 00485 outlen = sizeof(buffer) - outlen; 00486 if (NIL_P(error) || /* something converted */ 00487 outlen > (size_t)(inptr - tmpstart) || /* input can't contain output */ 00488 (outlen < (size_t)(inptr - tmpstart) && inlen > 0) || /* something skipped */ 00489 memcmp(buffer, tmpstart, outlen)) /* something differs */ 00490 { 00491 if (NIL_P(str)) { 00492 ret = rb_str_new(buffer, outlen); 00493 if (toidx >= 0) rb_enc_associate_index(ret, toidx); 00494 } 00495 else { 00496 if (ret) { 00497 ret = rb_str_buf_cat(ret, instart, tmpstart - instart); 00498 } 00499 else { 00500 ret = rb_str_new(instart, tmpstart - instart); 00501 if (toidx >= 0) rb_enc_associate_index(ret, toidx); 00502 OBJ_INFECT(ret, str); 00503 } 00504 ret = rb_str_buf_cat(ret, buffer, outlen); 00505 instart = inptr; 00506 } 00507 } 00508 else if (!inlen) { 00509 inptr = tmpstart + outlen; 00510 } 00511 } 00512 else { 00513 /* Some iconv() have a bug, return *outlen out of range */ 00514 sprintf(errmsg, "bug?(output length = %ld)", (long)(sizeof(buffer) - outlen)); 00515 error = rb_eIconvOutOfRange; 00516 } 00517 00518 if (RTEST(error)) { 00519 long len = 0; 00520 00521 if (!ret) { 00522 ret = rb_str_derive(str, instart, inptr - instart); 00523 if (toidx >= 0) rb_enc_associate_index(ret, toidx); 00524 } 00525 else if (inptr > instart) { 00526 rb_str_cat(ret, instart, inptr - instart); 00527 } 00528 str = rb_str_derive(str, inptr, inlen); 00529 rescue = iconv_fail_retry(error, ret, str, env, errmsg); 00530 if (TYPE(rescue) == T_ARRAY) { 00531 if ((len = RARRAY_LEN(rescue)) > 0) 00532 rb_str_concat(ret, RARRAY_PTR(rescue)[0]); 00533 if (len > 1 && !NIL_P(str = RARRAY_PTR(rescue)[1])) { 00534 StringValue(str); 00535 inlen = length = RSTRING_LEN(str); 00536 instart = inptr = RSTRING_PTR(str); 00537 continue; 00538 } 00539 } 00540 else if (!NIL_P(rescue)) { 00541 rb_str_concat(ret, rescue); 00542 } 00543 break; 00544 } 00545 } while (inlen > 0); 00546 00547 if (!ret) { 00548 ret = rb_str_derive(str, instart, inptr - instart); 00549 if (toidx >= 0) rb_enc_associate_index(ret, toidx); 00550 } 00551 else if (inptr > instart) { 00552 rb_str_cat(ret, instart, inptr - instart); 00553 } 00554 return ret; 00555 } 00556 00557 static VALUE 00558 iconv_s_allocate(VALUE klass) 00559 { 00560 return Data_Wrap_Struct(klass, 0, ICONV_FREE, 0); 00561 } 00562 00563 static VALUE 00564 get_iconv_opt_i(VALUE i, VALUE arg) 00565 { 00566 struct rb_iconv_opt_t *opt = (struct rb_iconv_opt_t *)arg; 00567 VALUE name, val; 00568 00569 (void)opt; 00570 i = rb_Array(i); 00571 name = rb_ary_entry(i, 0); 00572 val = rb_ary_entry(i, 1); 00573 do { 00574 if (SYMBOL_P(name)) { 00575 ID id = SYM2ID(name); 00576 if (id == id_transliterate) { 00577 #ifdef ICONV_SET_TRANSLITERATE 00578 opt->transliterate = val; 00579 #else 00580 rb_notimplement(); 00581 #endif 00582 break; 00583 } 00584 if (id == id_discard_ilseq) { 00585 #ifdef ICONV_SET_DISCARD_ILSEQ 00586 opt->discard_ilseq = val; 00587 #else 00588 rb_notimplement(); 00589 #endif 00590 break; 00591 } 00592 } 00593 else { 00594 const char *s = StringValueCStr(name); 00595 if (strcmp(s, "transliterate") == 0) { 00596 #ifdef ICONV_SET_TRANSLITERATE 00597 opt->transliterate = val; 00598 #else 00599 rb_notimplement(); 00600 #endif 00601 break; 00602 } 00603 if (strcmp(s, "discard_ilseq") == 0) { 00604 #ifdef ICONV_SET_DISCARD_ILSEQ 00605 opt->discard_ilseq = val; 00606 #else 00607 rb_notimplement(); 00608 #endif 00609 break; 00610 } 00611 } 00612 name = rb_inspect(name); 00613 rb_raise(rb_eArgError, "unknown option - %s", StringValueCStr(name)); 00614 } while (0); 00615 return Qnil; 00616 } 00617 00618 static void 00619 get_iconv_opt(struct rb_iconv_opt_t *opt, VALUE options) 00620 { 00621 opt->transliterate = Qundef; 00622 opt->discard_ilseq = Qundef; 00623 if (!NIL_P(options)) { 00624 rb_block_call(options, rb_intern("each"), 0, 0, get_iconv_opt_i, (VALUE)opt); 00625 } 00626 } 00627 00628 #define iconv_ctl(self, func, val) (\ 00629 iconvctl(VALUE2ICONV(check_iconv(self)), func, (void *)&(val)) ? \ 00630 rb_sys_fail(#func) : (void)0) 00631 00632 /* 00633 * Document-method: new 00634 * call-seq: Iconv.new(to, from, [options]) 00635 * 00636 * Creates new code converter from a coding-system designated with +from+ 00637 * to another one designated with +to+. 00638 * 00639 * === Parameters 00640 * 00641 * +to+:: encoding name for destination 00642 * +from+:: encoding name for source 00643 * +options+:: options for converter 00644 * 00645 * === Exceptions 00646 * 00647 * TypeError:: if +to+ or +from+ aren't String 00648 * InvalidEncoding:: if designated converter couldn't find out 00649 * SystemCallError:: if <tt>iconv_open(3)</tt> fails 00650 */ 00651 static VALUE 00652 iconv_initialize(int argc, VALUE *argv, VALUE self) 00653 { 00654 VALUE to, from, options; 00655 struct rb_iconv_opt_t opt; 00656 int idx; 00657 00658 rb_scan_args(argc, argv, "21", &to, &from, &options); 00659 get_iconv_opt(&opt, options); 00660 iconv_free(check_iconv(self)); 00661 DATA_PTR(self) = NULL; 00662 DATA_PTR(self) = (void *)ICONV2VALUE(iconv_create(to, from, &opt, &idx)); 00663 if (idx >= 0) ENCODING_SET(self, idx); 00664 return self; 00665 } 00666 00667 /* 00668 * Document-method: open 00669 * call-seq: Iconv.open(to, from) { |iconv| ... } 00670 * 00671 * Equivalent to Iconv.new except that when it is called with a block, it 00672 * yields with the new instance and closes it, and returns the result which 00673 * returned from the block. 00674 */ 00675 static VALUE 00676 iconv_s_open(int argc, VALUE *argv, VALUE self) 00677 { 00678 VALUE to, from, options, cd; 00679 struct rb_iconv_opt_t opt; 00680 int idx; 00681 00682 rb_scan_args(argc, argv, "21", &to, &from, &options); 00683 get_iconv_opt(&opt, options); 00684 cd = ICONV2VALUE(iconv_create(to, from, &opt, &idx)); 00685 00686 self = Data_Wrap_Struct(self, NULL, ICONV_FREE, (void *)cd); 00687 if (idx >= 0) ENCODING_SET(self, idx); 00688 00689 if (rb_block_given_p()) { 00690 return rb_ensure(rb_yield, self, (VALUE(*)())iconv_finish, self); 00691 } 00692 else { 00693 return self; 00694 } 00695 } 00696 00697 static VALUE 00698 iconv_s_convert(struct iconv_env_t* env) 00699 { 00700 VALUE last = 0; 00701 00702 for (; env->argc > 0; --env->argc, ++env->argv) { 00703 VALUE s = iconv_convert(env->cd, last = *(env->argv), 00704 0, -1, env->toidx, env); 00705 env->append(env->ret, s); 00706 } 00707 00708 if (!NIL_P(last)) { 00709 VALUE s = iconv_convert(env->cd, Qnil, 0, 0, env->toidx, env); 00710 if (RSTRING_LEN(s)) 00711 env->append(env->ret, s); 00712 } 00713 00714 return env->ret; 00715 } 00716 00717 /* 00718 * Document-method: Iconv::iconv 00719 * call-seq: Iconv.iconv(to, from, *strs) 00720 * 00721 * Shorthand for 00722 * Iconv.open(to, from) { |cd| 00723 * (strs + [nil]).collect { |s| cd.iconv(s) } 00724 * } 00725 * 00726 * === Parameters 00727 * 00728 * <tt>to, from</tt>:: see Iconv.new 00729 * <tt>strs</tt>:: strings to be converted 00730 * 00731 * === Exceptions 00732 * 00733 * Exceptions thrown by Iconv.new, Iconv.open and Iconv#iconv. 00734 */ 00735 static VALUE 00736 iconv_s_iconv(int argc, VALUE *argv, VALUE self) 00737 { 00738 struct iconv_env_t arg; 00739 00740 if (argc < 2) /* needs `to' and `from' arguments at least */ 00741 rb_raise(rb_eArgError, "wrong number of arguments (%d for %d)", argc, 2); 00742 00743 arg.argc = argc -= 2; 00744 arg.argv = argv + 2; 00745 arg.append = rb_ary_push; 00746 arg.ret = rb_ary_new2(argc); 00747 arg.cd = iconv_create(argv[0], argv[1], NULL, &arg.toidx); 00748 return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd)); 00749 } 00750 00751 /* 00752 * Document-method: Iconv::conv 00753 * call-seq: Iconv.conv(to, from, str) 00754 * 00755 * Shorthand for 00756 * Iconv.iconv(to, from, str).join 00757 * See Iconv.iconv. 00758 */ 00759 static VALUE 00760 iconv_s_conv(VALUE self, VALUE to, VALUE from, VALUE str) 00761 { 00762 struct iconv_env_t arg; 00763 00764 arg.argc = 1; 00765 arg.argv = &str; 00766 arg.append = rb_str_append; 00767 arg.ret = rb_str_new(0, 0); 00768 arg.cd = iconv_create(to, from, NULL, &arg.toidx); 00769 return rb_ensure(iconv_s_convert, (VALUE)&arg, iconv_free, ICONV2VALUE(arg.cd)); 00770 } 00771 00772 /* 00773 * Document-method: list 00774 * call-seq: Iconv.list {|*aliases| ... } 00775 * 00776 * Iterates each alias sets. 00777 */ 00778 00779 #ifdef HAVE_ICONVLIST 00780 struct iconv_name_list 00781 { 00782 unsigned int namescount; 00783 const char *const *names; 00784 VALUE array; 00785 }; 00786 00787 static VALUE 00788 list_iconv_i(VALUE ptr) 00789 { 00790 struct iconv_name_list *p = (struct iconv_name_list *)ptr; 00791 unsigned int i, namescount = p->namescount; 00792 const char *const *names = p->names; 00793 VALUE ary = rb_ary_new2(namescount); 00794 00795 for (i = 0; i < namescount; i++) { 00796 rb_ary_push(ary, rb_str_new2(names[i])); 00797 } 00798 if (p->array) { 00799 return rb_ary_push(p->array, ary); 00800 } 00801 return rb_yield(ary); 00802 } 00803 00804 static int 00805 list_iconv(unsigned int namescount, const char *const *names, void *data) 00806 { 00807 int *state = data; 00808 struct iconv_name_list list; 00809 00810 list.namescount = namescount; 00811 list.names = names; 00812 list.array = ((VALUE *)data)[1]; 00813 rb_protect(list_iconv_i, (VALUE)&list, state); 00814 return *state; 00815 } 00816 #endif 00817 00818 #if defined(HAVE_ICONVLIST) || defined(HAVE___ICONV_FREE_LIST) 00819 static VALUE 00820 iconv_s_list(void) 00821 { 00822 #ifdef HAVE_ICONVLIST 00823 int state; 00824 VALUE args[2]; 00825 00826 args[1] = rb_block_given_p() ? 0 : rb_ary_new(); 00827 iconvlist(list_iconv, args); 00828 state = *(int *)args; 00829 if (state) rb_jump_tag(state); 00830 if (args[1]) return args[1]; 00831 #elif defined(HAVE___ICONV_FREE_LIST) 00832 char **list; 00833 size_t sz, i; 00834 VALUE ary; 00835 00836 if (__iconv_get_list(&list, &sz)) return Qnil; 00837 00838 ary = rb_ary_new2(sz); 00839 for (i = 0; i < sz; i++) { 00840 rb_ary_push(ary, rb_str_new2(list[i])); 00841 } 00842 __iconv_free_list(list, sz); 00843 00844 if (!rb_block_given_p()) 00845 return ary; 00846 for (i = 0; i < RARRAY_LEN(ary); i++) { 00847 rb_yield(RARRAY_PTR(ary)[i]); 00848 } 00849 #endif 00850 return Qnil; 00851 } 00852 #else 00853 #define iconv_s_list rb_f_notimplement 00854 #endif 00855 00856 /* 00857 * Document-method: close 00858 * 00859 * Finishes conversion. 00860 * 00861 * After calling this, calling Iconv#iconv will cause an exception, but 00862 * multiple calls of #close are guaranteed to end successfully. 00863 * 00864 * Returns a string containing the byte sequence to change the output buffer to 00865 * its initial shift state. 00866 */ 00867 static VALUE 00868 iconv_init_state(VALUE self) 00869 { 00870 iconv_t cd = VALUE2ICONV((VALUE)DATA_PTR(self)); 00871 DATA_PTR(self) = NULL; 00872 return iconv_convert(cd, Qnil, 0, 0, ENCODING_GET(self), NULL); 00873 } 00874 00875 static VALUE 00876 iconv_finish(VALUE self) 00877 { 00878 VALUE cd = check_iconv(self); 00879 00880 if (!cd) return Qnil; 00881 return rb_ensure(iconv_init_state, self, iconv_free, cd); 00882 } 00883 00884 /* 00885 * Document-method: Iconv#iconv 00886 * call-seq: iconv(str, start=0, length=-1) 00887 * 00888 * Converts string and returns the result. 00889 * * If +str+ is a String, converts <tt>str[start, length]</tt> and returns the converted string. 00890 * * If +str+ is +nil+, places converter itself into initial shift state and 00891 * just returns a string containing the byte sequence to change the output 00892 * buffer to its initial shift state. 00893 * * Otherwise, raises an exception. 00894 * 00895 * === Parameters 00896 * 00897 * str:: string to be converted, or nil 00898 * start:: starting offset 00899 * length:: conversion length; nil or -1 means whole the string from start 00900 * 00901 * === Exceptions 00902 * 00903 * * IconvIllegalSequence 00904 * * IconvInvalidCharacter 00905 * * IconvOutOfRange 00906 * 00907 * === Examples 00908 * 00909 * See the Iconv documentation. 00910 */ 00911 static VALUE 00912 iconv_iconv(int argc, VALUE *argv, VALUE self) 00913 { 00914 VALUE str, n1, n2; 00915 VALUE cd = check_iconv(self); 00916 long start = 0, length = 0, slen = 0; 00917 00918 rb_scan_args(argc, argv, "12", &str, &n1, &n2); 00919 if (!NIL_P(str)) { 00920 VALUE n = rb_str_length(StringValue(str)); 00921 slen = NUM2LONG(n); 00922 } 00923 if (argc != 2 || !RTEST(rb_range_beg_len(n1, &start, &length, slen, 0))) { 00924 if (NIL_P(n1) || ((start = NUM2LONG(n1)) < 0 ? (start += slen) >= 0 : start < slen)) { 00925 length = NIL_P(n2) ? -1 : NUM2LONG(n2); 00926 } 00927 } 00928 if (start > 0 || length > 0) { 00929 rb_encoding *enc = rb_enc_get(str); 00930 const char *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str); 00931 const char *ps = s; 00932 if (start > 0) { 00933 start = (ps = rb_enc_nth(s, e, start, enc)) - s; 00934 } 00935 if (length > 0) { 00936 length = rb_enc_nth(ps, e, length, enc) - ps; 00937 } 00938 } 00939 00940 return iconv_convert(VALUE2ICONV(cd), str, start, length, ENCODING_GET(self), NULL); 00941 } 00942 00943 /* 00944 * Document-method: conv 00945 * call-seq: conv(str...) 00946 * 00947 * Equivalent to 00948 * 00949 * iconv(nil, str..., nil).join 00950 */ 00951 static VALUE 00952 iconv_conv(int argc, VALUE *argv, VALUE self) 00953 { 00954 iconv_t cd = VALUE2ICONV(check_iconv(self)); 00955 VALUE str, s; 00956 int toidx = ENCODING_GET(self); 00957 00958 str = iconv_convert(cd, Qnil, 0, 0, toidx, NULL); 00959 if (argc > 0) { 00960 do { 00961 s = iconv_convert(cd, *argv++, 0, -1, toidx, NULL); 00962 if (RSTRING_LEN(s)) 00963 rb_str_buf_append(str, s); 00964 } while (--argc); 00965 s = iconv_convert(cd, Qnil, 0, 0, toidx, NULL); 00966 if (RSTRING_LEN(s)) 00967 rb_str_buf_append(str, s); 00968 } 00969 00970 return str; 00971 } 00972 00973 #ifdef ICONV_TRIVIALP 00974 /* 00975 * Document-method: trivial? 00976 * call-seq: trivial? 00977 * 00978 * Returns trivial flag. 00979 */ 00980 static VALUE 00981 iconv_trivialp(VALUE self) 00982 { 00983 int trivial = 0; 00984 iconv_ctl(self, ICONV_TRIVIALP, trivial); 00985 if (trivial) return Qtrue; 00986 return Qfalse; 00987 } 00988 #else 00989 #define iconv_trivialp rb_f_notimplement 00990 #endif 00991 00992 #ifdef ICONV_GET_TRANSLITERATE 00993 /* 00994 * Document-method: transliterate? 00995 * call-seq: transliterate? 00996 * 00997 * Returns transliterate flag. 00998 */ 00999 static VALUE 01000 iconv_get_transliterate(VALUE self) 01001 { 01002 int trans = 0; 01003 iconv_ctl(self, ICONV_GET_TRANSLITERATE, trans); 01004 if (trans) return Qtrue; 01005 return Qfalse; 01006 } 01007 #else 01008 #define iconv_get_transliterate rb_f_notimplement 01009 #endif 01010 01011 #ifdef ICONV_SET_TRANSLITERATE 01012 /* 01013 * Document-method: transliterate= 01014 * call-seq: cd.transliterate = flag 01015 * 01016 * Sets transliterate flag. 01017 */ 01018 static VALUE 01019 iconv_set_transliterate(VALUE self, VALUE transliterate) 01020 { 01021 int trans = RTEST(transliterate); 01022 iconv_ctl(self, ICONV_SET_TRANSLITERATE, trans); 01023 return self; 01024 } 01025 #else 01026 #define iconv_set_transliterate rb_f_notimplement 01027 #endif 01028 01029 #ifdef ICONV_GET_DISCARD_ILSEQ 01030 /* 01031 * Document-method: discard_ilseq? 01032 * call-seq: discard_ilseq? 01033 * 01034 * Returns discard_ilseq flag. 01035 */ 01036 static VALUE 01037 iconv_get_discard_ilseq(VALUE self) 01038 { 01039 int dis = 0; 01040 iconv_ctl(self, ICONV_GET_DISCARD_ILSEQ, dis); 01041 if (dis) return Qtrue; 01042 return Qfalse; 01043 } 01044 #else 01045 #define iconv_get_discard_ilseq rb_f_notimplement 01046 #endif 01047 01048 #ifdef ICONV_SET_DISCARD_ILSEQ 01049 /* 01050 * Document-method: discard_ilseq= 01051 * call-seq: cd.discard_ilseq = flag 01052 * 01053 * Sets discard_ilseq flag. 01054 */ 01055 static VALUE 01056 iconv_set_discard_ilseq(VALUE self, VALUE discard_ilseq) 01057 { 01058 int dis = RTEST(discard_ilseq); 01059 iconv_ctl(self, ICONV_SET_DISCARD_ILSEQ, dis); 01060 return self; 01061 } 01062 #else 01063 #define iconv_set_discard_ilseq rb_f_notimplement 01064 #endif 01065 01066 /* 01067 * Document-method: ctlmethods 01068 * call-seq: Iconv.ctlmethods => array 01069 * 01070 * Returns available iconvctl() method list. 01071 */ 01072 static VALUE 01073 iconv_s_ctlmethods(VALUE klass) 01074 { 01075 VALUE ary = rb_ary_new(); 01076 #ifdef ICONV_TRIVIALP 01077 rb_ary_push(ary, ID2SYM(rb_intern("trivial?"))); 01078 #endif 01079 #ifdef ICONV_GET_TRANSLITERATE 01080 rb_ary_push(ary, ID2SYM(rb_intern("transliterate?"))); 01081 #endif 01082 #ifdef ICONV_SET_TRANSLITERATE 01083 rb_ary_push(ary, ID2SYM(rb_intern("transliterate="))); 01084 #endif 01085 #ifdef ICONV_GET_DISCARD_ILSEQ 01086 rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq?"))); 01087 #endif 01088 #ifdef ICONV_SET_DISCARD_ILSEQ 01089 rb_ary_push(ary, ID2SYM(rb_intern("discard_ilseq="))); 01090 #endif 01091 return ary; 01092 } 01093 01094 /* 01095 * Document-class: Iconv::Failure 01096 * 01097 * Base attributes for Iconv exceptions. 01098 */ 01099 01100 /* 01101 * Document-method: success 01102 * call-seq: success 01103 * 01104 * Returns string(s) translated successfully until the exception occurred. 01105 * * In the case of failure occurred within Iconv.iconv, returned 01106 * value is an array of strings translated successfully preceding 01107 * failure and the last element is string on the way. 01108 */ 01109 static VALUE 01110 iconv_failure_success(VALUE self) 01111 { 01112 return rb_attr_get(self, rb_success); 01113 } 01114 01115 /* 01116 * Document-method: failed 01117 * call-seq: failed 01118 * 01119 * Returns substring of the original string passed to Iconv that starts at the 01120 * character caused the exception. 01121 */ 01122 static VALUE 01123 iconv_failure_failed(VALUE self) 01124 { 01125 return rb_attr_get(self, rb_failed); 01126 } 01127 01128 /* 01129 * Document-method: inspect 01130 * call-seq: inspect 01131 * 01132 * Returns inspected string like as: #<_class_: _success_, _failed_> 01133 */ 01134 static VALUE 01135 iconv_failure_inspect(VALUE self) 01136 { 01137 const char *cname = rb_class2name(CLASS_OF(self)); 01138 VALUE success = rb_attr_get(self, rb_success); 01139 VALUE failed = rb_attr_get(self, rb_failed); 01140 VALUE str = rb_str_buf_cat2(rb_str_new2("#<"), cname); 01141 str = rb_str_buf_cat(str, ": ", 2); 01142 str = rb_str_buf_append(str, rb_inspect(success)); 01143 str = rb_str_buf_cat(str, ", ", 2); 01144 str = rb_str_buf_append(str, rb_inspect(failed)); 01145 return rb_str_buf_cat(str, ">", 1); 01146 } 01147 01148 /* 01149 * Document-class: Iconv::InvalidEncoding 01150 * 01151 * Requested coding-system is not available on this system. 01152 */ 01153 01154 /* 01155 * Document-class: Iconv::IllegalSequence 01156 * 01157 * Input conversion stopped due to an input byte that does not belong to 01158 * the input codeset, or the output codeset does not contain the 01159 * character. 01160 */ 01161 01162 /* 01163 * Document-class: Iconv::InvalidCharacter 01164 * 01165 * Input conversion stopped due to an incomplete character or shift 01166 * sequence at the end of the input buffer. 01167 */ 01168 01169 /* 01170 * Document-class: Iconv::OutOfRange 01171 * 01172 * Iconv library internal error. Must not occur. 01173 */ 01174 01175 /* 01176 * Document-class: Iconv::BrokenLibrary 01177 * 01178 * Detected a bug of underlying iconv(3) libray. 01179 * * returns an error without setting errno properly 01180 */ 01181 01182 static void 01183 warn_deprecated(void) 01184 { 01185 static const char message[] = 01186 ": iconv will be deprecated in the future, use String#encode instead.\n"; 01187 VALUE msg = Qnil, caller = rb_make_backtrace(); 01188 long i; 01189 01190 for (i = 1; i < RARRAY_LEN(caller); ++i) { 01191 VALUE s = RARRAY_PTR(caller)[i]; 01192 if (strncmp(RSTRING_PTR(s), "<internal:", 10) != 0) { 01193 msg = s; 01194 break; 01195 } 01196 } 01197 if (NIL_P(msg)) { 01198 msg = rb_str_new_cstr(message + 2); 01199 } 01200 else { 01201 rb_str_cat(msg, message, sizeof(message) - 1); 01202 } 01203 rb_io_puts(1, &msg, rb_stderr); 01204 } 01205 01206 void 01207 Init_iconv(void) 01208 { 01209 VALUE rb_cIconv = rb_define_class("Iconv", rb_cData); 01210 01211 if (!NIL_P(ruby_verbose)) { 01212 warn_deprecated(); 01213 } 01214 rb_define_alloc_func(rb_cIconv, iconv_s_allocate); 01215 rb_define_singleton_method(rb_cIconv, "open", iconv_s_open, -1); 01216 rb_define_singleton_method(rb_cIconv, "iconv", iconv_s_iconv, -1); 01217 rb_define_singleton_method(rb_cIconv, "conv", iconv_s_conv, 3); 01218 rb_define_singleton_method(rb_cIconv, "list", iconv_s_list, 0); 01219 rb_define_singleton_method(rb_cIconv, "ctlmethods", iconv_s_ctlmethods, 0); 01220 rb_define_method(rb_cIconv, "initialize", iconv_initialize, -1); 01221 rb_define_method(rb_cIconv, "close", iconv_finish, 0); 01222 rb_define_method(rb_cIconv, "iconv", iconv_iconv, -1); 01223 rb_define_method(rb_cIconv, "conv", iconv_conv, -1); 01224 rb_define_method(rb_cIconv, "trivial?", iconv_trivialp, 0); 01225 rb_define_method(rb_cIconv, "transliterate?", iconv_get_transliterate, 0); 01226 rb_define_method(rb_cIconv, "transliterate=", iconv_set_transliterate, 1); 01227 rb_define_method(rb_cIconv, "discard_ilseq?", iconv_get_discard_ilseq, 0); 01228 rb_define_method(rb_cIconv, "discard_ilseq=", iconv_set_discard_ilseq, 1); 01229 01230 rb_eIconvFailure = rb_define_module_under(rb_cIconv, "Failure"); 01231 rb_define_method(rb_eIconvFailure, "initialize", iconv_failure_initialize, 3); 01232 rb_define_method(rb_eIconvFailure, "success", iconv_failure_success, 0); 01233 rb_define_method(rb_eIconvFailure, "failed", iconv_failure_failed, 0); 01234 rb_define_method(rb_eIconvFailure, "inspect", iconv_failure_inspect, 0); 01235 01236 rb_eIconvInvalidEncoding = rb_define_class_under(rb_cIconv, "InvalidEncoding", rb_eArgError); 01237 rb_eIconvIllegalSeq = rb_define_class_under(rb_cIconv, "IllegalSequence", rb_eArgError); 01238 rb_eIconvInvalidChar = rb_define_class_under(rb_cIconv, "InvalidCharacter", rb_eArgError); 01239 rb_eIconvOutOfRange = rb_define_class_under(rb_cIconv, "OutOfRange", rb_eRuntimeError); 01240 rb_eIconvBrokenLibrary = rb_define_class_under(rb_cIconv, "BrokenLibrary", rb_eRuntimeError); 01241 rb_include_module(rb_eIconvInvalidEncoding, rb_eIconvFailure); 01242 rb_include_module(rb_eIconvIllegalSeq, rb_eIconvFailure); 01243 rb_include_module(rb_eIconvInvalidChar, rb_eIconvFailure); 01244 rb_include_module(rb_eIconvOutOfRange, rb_eIconvFailure); 01245 rb_include_module(rb_eIconvBrokenLibrary, rb_eIconvFailure); 01246 01247 rb_success = rb_intern("success"); 01248 rb_failed = rb_intern("failed"); 01249 id_transliterate = rb_intern("transliterate"); 01250 id_discard_ilseq = rb_intern("discard_ilseq"); 01251 01252 rb_gc_register_address(&charset_map); 01253 charset_map = rb_hash_new(); 01254 rb_define_singleton_method(rb_cIconv, "charset_map", charset_map_get, 0); 01255 } 01256 01257