Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /********************************************************************** 00002 00003 marshal.c - 00004 00005 $Author: drbrain $ 00006 created at: Thu Apr 27 16:30:01 JST 1995 00007 00008 Copyright (C) 1993-2007 Yukihiro Matsumoto 00009 00010 **********************************************************************/ 00011 00012 #include "ruby/ruby.h" 00013 #include "ruby/io.h" 00014 #include "ruby/st.h" 00015 #include "ruby/util.h" 00016 #include "ruby/encoding.h" 00017 #include "internal.h" 00018 00019 #include <math.h> 00020 #ifdef HAVE_FLOAT_H 00021 #include <float.h> 00022 #endif 00023 #ifdef HAVE_IEEEFP_H 00024 #include <ieeefp.h> 00025 #endif 00026 00027 #define BITSPERSHORT (2*CHAR_BIT) 00028 #define SHORTMASK ((1<<BITSPERSHORT)-1) 00029 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT) 00030 00031 #if SIZEOF_SHORT == SIZEOF_BDIGITS 00032 #define SHORTLEN(x) (x) 00033 #else 00034 static long 00035 shortlen(long len, BDIGIT *ds) 00036 { 00037 BDIGIT num; 00038 int offset = 0; 00039 00040 num = ds[len-1]; 00041 while (num) { 00042 num = SHORTDN(num); 00043 offset++; 00044 } 00045 return (len - 1)*sizeof(BDIGIT)/2 + offset; 00046 } 00047 #define SHORTLEN(x) shortlen((x),d) 00048 #endif 00049 00050 #define MARSHAL_MAJOR 4 00051 #define MARSHAL_MINOR 8 00052 00053 #define TYPE_NIL '0' 00054 #define TYPE_TRUE 'T' 00055 #define TYPE_FALSE 'F' 00056 #define TYPE_FIXNUM 'i' 00057 00058 #define TYPE_EXTENDED 'e' 00059 #define TYPE_UCLASS 'C' 00060 #define TYPE_OBJECT 'o' 00061 #define TYPE_DATA 'd' 00062 #define TYPE_USERDEF 'u' 00063 #define TYPE_USRMARSHAL 'U' 00064 #define TYPE_FLOAT 'f' 00065 #define TYPE_BIGNUM 'l' 00066 #define TYPE_STRING '"' 00067 #define TYPE_REGEXP '/' 00068 #define TYPE_ARRAY '[' 00069 #define TYPE_HASH '{' 00070 #define TYPE_HASH_DEF '}' 00071 #define TYPE_STRUCT 'S' 00072 #define TYPE_MODULE_OLD 'M' 00073 #define TYPE_CLASS 'c' 00074 #define TYPE_MODULE 'm' 00075 00076 #define TYPE_SYMBOL ':' 00077 #define TYPE_SYMLINK ';' 00078 00079 #define TYPE_IVAR 'I' 00080 #define TYPE_LINK '@' 00081 00082 static ID s_dump, s_load, s_mdump, s_mload; 00083 static ID s_dump_data, s_load_data, s_alloc, s_call; 00084 static ID s_getbyte, s_read, s_write, s_binmode; 00085 00086 typedef struct { 00087 VALUE newclass; 00088 VALUE oldclass; 00089 VALUE (*dumper)(VALUE); 00090 VALUE (*loader)(VALUE, VALUE); 00091 } marshal_compat_t; 00092 00093 static st_table *compat_allocator_tbl; 00094 static VALUE compat_allocator_tbl_wrapper; 00095 00096 static int 00097 mark_marshal_compat_i(st_data_t key, st_data_t value) 00098 { 00099 marshal_compat_t *p = (marshal_compat_t *)value; 00100 rb_gc_mark(p->newclass); 00101 rb_gc_mark(p->oldclass); 00102 return ST_CONTINUE; 00103 } 00104 00105 static void 00106 mark_marshal_compat_t(void *tbl) 00107 { 00108 if (!tbl) return; 00109 st_foreach(tbl, mark_marshal_compat_i, 0); 00110 } 00111 00112 void 00113 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE)) 00114 { 00115 marshal_compat_t *compat; 00116 rb_alloc_func_t allocator = rb_get_alloc_func(newclass); 00117 00118 if (!allocator) { 00119 rb_raise(rb_eTypeError, "no allocator"); 00120 } 00121 00122 compat = ALLOC(marshal_compat_t); 00123 compat->newclass = Qnil; 00124 compat->oldclass = Qnil; 00125 compat->newclass = newclass; 00126 compat->oldclass = oldclass; 00127 compat->dumper = dumper; 00128 compat->loader = loader; 00129 00130 st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat); 00131 } 00132 00133 #define MARSHAL_INFECTION (FL_TAINT|FL_UNTRUSTED) 00134 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1]; 00135 00136 struct dump_arg { 00137 VALUE str, dest; 00138 st_table *symbols; 00139 st_table *data; 00140 st_table *compat_tbl; 00141 st_table *encodings; 00142 int infection; 00143 }; 00144 00145 struct dump_call_arg { 00146 VALUE obj; 00147 struct dump_arg *arg; 00148 int limit; 00149 }; 00150 00151 static void 00152 check_dump_arg(struct dump_arg *arg, ID sym) 00153 { 00154 if (!arg->symbols) { 00155 rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s", 00156 rb_id2name(sym)); 00157 } 00158 } 00159 00160 static void clear_dump_arg(struct dump_arg *arg); 00161 00162 static void 00163 mark_dump_arg(void *ptr) 00164 { 00165 struct dump_arg *p = ptr; 00166 if (!p->symbols) 00167 return; 00168 rb_mark_set(p->data); 00169 rb_mark_hash(p->compat_tbl); 00170 rb_gc_mark(p->str); 00171 } 00172 00173 static void 00174 free_dump_arg(void *ptr) 00175 { 00176 clear_dump_arg(ptr); 00177 xfree(ptr); 00178 } 00179 00180 static size_t 00181 memsize_dump_arg(const void *ptr) 00182 { 00183 return ptr ? sizeof(struct dump_arg) : 0; 00184 } 00185 00186 static const rb_data_type_t dump_arg_data = { 00187 "dump_arg", 00188 {mark_dump_arg, free_dump_arg, memsize_dump_arg,}, 00189 }; 00190 00191 static const char * 00192 must_not_be_anonymous(const char *type, VALUE path) 00193 { 00194 char *n = RSTRING_PTR(path); 00195 00196 if (!rb_enc_asciicompat(rb_enc_get(path))) { 00197 /* cannot occur? */ 00198 rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type); 00199 } 00200 if (n[0] == '#') { 00201 rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type, 00202 (int)RSTRING_LEN(path), n); 00203 } 00204 return n; 00205 } 00206 00207 static VALUE 00208 class2path(VALUE klass) 00209 { 00210 VALUE path = rb_class_path(klass); 00211 const char *n; 00212 00213 n = must_not_be_anonymous((TYPE(klass) == T_CLASS ? "class" : "module"), path); 00214 if (rb_path_to_class(path) != rb_class_real(klass)) { 00215 rb_raise(rb_eTypeError, "%s can't be referred to", n); 00216 } 00217 return path; 00218 } 00219 00220 static void w_long(long, struct dump_arg*); 00221 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg); 00222 00223 static void 00224 w_nbyte(const char *s, long n, struct dump_arg *arg) 00225 { 00226 VALUE buf = arg->str; 00227 rb_str_buf_cat(buf, s, n); 00228 RBASIC(buf)->flags |= arg->infection; 00229 if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) { 00230 rb_io_write(arg->dest, buf); 00231 rb_str_resize(buf, 0); 00232 } 00233 } 00234 00235 static void 00236 w_byte(char c, struct dump_arg *arg) 00237 { 00238 w_nbyte(&c, 1, arg); 00239 } 00240 00241 static void 00242 w_bytes(const char *s, long n, struct dump_arg *arg) 00243 { 00244 w_long(n, arg); 00245 w_nbyte(s, n, arg); 00246 } 00247 00248 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg)) 00249 00250 static void 00251 w_short(int x, struct dump_arg *arg) 00252 { 00253 w_byte((char)((x >> 0) & 0xff), arg); 00254 w_byte((char)((x >> 8) & 0xff), arg); 00255 } 00256 00257 static void 00258 w_long(long x, struct dump_arg *arg) 00259 { 00260 char buf[sizeof(long)+1]; 00261 int i, len = 0; 00262 00263 #if SIZEOF_LONG > 4 00264 if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) { 00265 /* big long does not fit in 4 bytes */ 00266 rb_raise(rb_eTypeError, "long too big to dump"); 00267 } 00268 #endif 00269 00270 if (x == 0) { 00271 w_byte(0, arg); 00272 return; 00273 } 00274 if (0 < x && x < 123) { 00275 w_byte((char)(x + 5), arg); 00276 return; 00277 } 00278 if (-124 < x && x < 0) { 00279 w_byte((char)((x - 5)&0xff), arg); 00280 return; 00281 } 00282 for (i=1;i<(int)sizeof(long)+1;i++) { 00283 buf[i] = (char)(x & 0xff); 00284 x = RSHIFT(x,8); 00285 if (x == 0) { 00286 buf[0] = i; 00287 break; 00288 } 00289 if (x == -1) { 00290 buf[0] = -i; 00291 break; 00292 } 00293 } 00294 len = i; 00295 for (i=0;i<=len;i++) { 00296 w_byte(buf[i], arg); 00297 } 00298 } 00299 00300 #ifdef DBL_MANT_DIG 00301 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */ 00302 00303 #if DBL_MANT_DIG > 32 00304 #define MANT_BITS 32 00305 #elif DBL_MANT_DIG > 24 00306 #define MANT_BITS 24 00307 #elif DBL_MANT_DIG > 16 00308 #define MANT_BITS 16 00309 #else 00310 #define MANT_BITS 8 00311 #endif 00312 00313 static double 00314 load_mantissa(double d, const char *buf, long len) 00315 { 00316 if (!len) return d; 00317 if (--len > 0 && !*buf++) { /* binary mantissa mark */ 00318 int e, s = d < 0, dig = 0; 00319 unsigned long m; 00320 00321 modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d); 00322 do { 00323 m = 0; 00324 switch (len) { 00325 default: m = *buf++ & 0xff; 00326 #if MANT_BITS > 24 00327 case 3: m = (m << 8) | (*buf++ & 0xff); 00328 #endif 00329 #if MANT_BITS > 16 00330 case 2: m = (m << 8) | (*buf++ & 0xff); 00331 #endif 00332 #if MANT_BITS > 8 00333 case 1: m = (m << 8) | (*buf++ & 0xff); 00334 #endif 00335 } 00336 dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS; 00337 d += ldexp((double)m, dig); 00338 } while ((len -= MANT_BITS / 8) > 0); 00339 d = ldexp(d, e - DECIMAL_MANT); 00340 if (s) d = -d; 00341 } 00342 return d; 00343 } 00344 #else 00345 #define load_mantissa(d, buf, len) (d) 00346 #endif 00347 00348 #ifdef DBL_DIG 00349 #define FLOAT_DIG (DBL_DIG+2) 00350 #else 00351 #define FLOAT_DIG 17 00352 #endif 00353 00354 static void 00355 w_float(double d, struct dump_arg *arg) 00356 { 00357 char *ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve); 00358 char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10]; 00359 00360 if (isinf(d)) { 00361 if (d < 0) w_cstr("-inf", arg); 00362 else w_cstr("inf", arg); 00363 } 00364 else if (isnan(d)) { 00365 w_cstr("nan", arg); 00366 } 00367 else if (d == 0.0) { 00368 if (1.0/d < 0) w_cstr("-0", arg); 00369 else w_cstr("0", arg); 00370 } 00371 else { 00372 int decpt, sign, digs, len = 0; 00373 char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e); 00374 if (sign) buf[len++] = '-'; 00375 digs = (int)(e - p); 00376 if (decpt < -3 || decpt > digs) { 00377 buf[len++] = p[0]; 00378 if (--digs > 0) buf[len++] = '.'; 00379 memcpy(buf + len, p + 1, digs); 00380 len += digs; 00381 len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1); 00382 } 00383 else if (decpt > 0) { 00384 memcpy(buf + len, p, decpt); 00385 len += decpt; 00386 if ((digs -= decpt) > 0) { 00387 buf[len++] = '.'; 00388 memcpy(buf + len, p + decpt, digs); 00389 len += digs; 00390 } 00391 } 00392 else { 00393 buf[len++] = '0'; 00394 buf[len++] = '.'; 00395 if (decpt) { 00396 memset(buf + len, '0', -decpt); 00397 len -= decpt; 00398 } 00399 memcpy(buf + len, p, digs); 00400 len += digs; 00401 } 00402 xfree(p); 00403 w_bytes(buf, len, arg); 00404 } 00405 } 00406 00407 static void 00408 w_symbol(ID id, struct dump_arg *arg) 00409 { 00410 VALUE sym; 00411 st_data_t num; 00412 int encidx = -1; 00413 00414 if (st_lookup(arg->symbols, id, &num)) { 00415 w_byte(TYPE_SYMLINK, arg); 00416 w_long((long)num, arg); 00417 } 00418 else { 00419 sym = rb_id2str(id); 00420 if (!sym) { 00421 rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, id); 00422 } 00423 encidx = rb_enc_get_index(sym); 00424 if (encidx == rb_usascii_encindex() || 00425 rb_enc_str_coderange(sym) == ENC_CODERANGE_7BIT) { 00426 encidx = -1; 00427 } 00428 else { 00429 w_byte(TYPE_IVAR, arg); 00430 } 00431 w_byte(TYPE_SYMBOL, arg); 00432 w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg); 00433 st_add_direct(arg->symbols, id, arg->symbols->num_entries); 00434 if (encidx != -1) { 00435 struct dump_call_arg c_arg; 00436 c_arg.limit = 1; 00437 c_arg.arg = arg; 00438 w_encoding(sym, 0, &c_arg); 00439 } 00440 } 00441 } 00442 00443 static void 00444 w_unique(VALUE s, struct dump_arg *arg) 00445 { 00446 must_not_be_anonymous("class", s); 00447 w_symbol(rb_intern_str(s), arg); 00448 } 00449 00450 static void w_object(VALUE,struct dump_arg*,int); 00451 00452 static int 00453 hash_each(VALUE key, VALUE value, struct dump_call_arg *arg) 00454 { 00455 w_object(key, arg->arg, arg->limit); 00456 w_object(value, arg->arg, arg->limit); 00457 return ST_CONTINUE; 00458 } 00459 00460 static void 00461 w_extended(VALUE klass, struct dump_arg *arg, int check) 00462 { 00463 if (check && FL_TEST(klass, FL_SINGLETON)) { 00464 if (RCLASS_M_TBL(klass)->num_entries || 00465 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) { 00466 rb_raise(rb_eTypeError, "singleton can't be dumped"); 00467 } 00468 klass = RCLASS_SUPER(klass); 00469 } 00470 while (BUILTIN_TYPE(klass) == T_ICLASS) { 00471 VALUE path = rb_class_name(RBASIC(klass)->klass); 00472 w_byte(TYPE_EXTENDED, arg); 00473 w_unique(path, arg); 00474 klass = RCLASS_SUPER(klass); 00475 } 00476 } 00477 00478 static void 00479 w_class(char type, VALUE obj, struct dump_arg *arg, int check) 00480 { 00481 VALUE path; 00482 st_data_t real_obj; 00483 VALUE klass; 00484 00485 if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) { 00486 obj = (VALUE)real_obj; 00487 } 00488 klass = CLASS_OF(obj); 00489 w_extended(klass, arg, check); 00490 w_byte(type, arg); 00491 path = class2path(rb_class_real(klass)); 00492 w_unique(path, arg); 00493 } 00494 00495 static void 00496 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg) 00497 { 00498 VALUE klass = CLASS_OF(obj); 00499 00500 w_extended(klass, arg, TRUE); 00501 klass = rb_class_real(klass); 00502 if (klass != super) { 00503 w_byte(TYPE_UCLASS, arg); 00504 w_unique(class2path(klass), arg); 00505 } 00506 } 00507 00508 static int 00509 w_obj_each(ID id, VALUE value, struct dump_call_arg *arg) 00510 { 00511 if (id == rb_id_encoding()) return ST_CONTINUE; 00512 if (id == rb_intern("E")) return ST_CONTINUE; 00513 w_symbol(id, arg->arg); 00514 w_object(value, arg->arg, arg->limit); 00515 return ST_CONTINUE; 00516 } 00517 00518 static void 00519 w_encoding(VALUE obj, long num, struct dump_call_arg *arg) 00520 { 00521 int encidx = rb_enc_get_index(obj); 00522 rb_encoding *enc = 0; 00523 st_data_t name; 00524 00525 if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) { 00526 w_long(num, arg->arg); 00527 return; 00528 } 00529 w_long(num + 1, arg->arg); 00530 00531 /* special treatment for US-ASCII and UTF-8 */ 00532 if (encidx == rb_usascii_encindex()) { 00533 w_symbol(rb_intern("E"), arg->arg); 00534 w_object(Qfalse, arg->arg, arg->limit + 1); 00535 return; 00536 } 00537 else if (encidx == rb_utf8_encindex()) { 00538 w_symbol(rb_intern("E"), arg->arg); 00539 w_object(Qtrue, arg->arg, arg->limit + 1); 00540 return; 00541 } 00542 00543 w_symbol(rb_id_encoding(), arg->arg); 00544 do { 00545 if (!arg->arg->encodings) 00546 arg->arg->encodings = st_init_strcasetable(); 00547 else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name)) 00548 break; 00549 name = (st_data_t)rb_str_new2(rb_enc_name(enc)); 00550 st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name); 00551 } while (0); 00552 w_object(name, arg->arg, arg->limit + 1); 00553 } 00554 00555 static void 00556 w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg) 00557 { 00558 long num = tbl ? tbl->num_entries : 0; 00559 00560 w_encoding(obj, num, arg); 00561 if (tbl) { 00562 st_foreach_safe(tbl, w_obj_each, (st_data_t)arg); 00563 } 00564 } 00565 00566 static void 00567 w_objivar(VALUE obj, struct dump_call_arg *arg) 00568 { 00569 VALUE *ptr; 00570 long i, len, num; 00571 00572 len = ROBJECT_NUMIV(obj); 00573 ptr = ROBJECT_IVPTR(obj); 00574 num = 0; 00575 for (i = 0; i < len; i++) 00576 if (ptr[i] != Qundef) 00577 num += 1; 00578 00579 w_encoding(obj, num, arg); 00580 if (num != 0) { 00581 rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg); 00582 } 00583 } 00584 00585 static void 00586 w_object(VALUE obj, struct dump_arg *arg, int limit) 00587 { 00588 struct dump_call_arg c_arg; 00589 st_table *ivtbl = 0; 00590 st_data_t num; 00591 int hasiv = 0; 00592 #define has_ivars(obj, ivtbl) (((ivtbl) = rb_generic_ivar_table(obj)) != 0 || \ 00593 (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj))) 00594 00595 if (limit == 0) { 00596 rb_raise(rb_eArgError, "exceed depth limit"); 00597 } 00598 00599 limit--; 00600 c_arg.limit = limit; 00601 c_arg.arg = arg; 00602 00603 if (st_lookup(arg->data, obj, &num)) { 00604 w_byte(TYPE_LINK, arg); 00605 w_long((long)num, arg); 00606 return; 00607 } 00608 00609 if (obj == Qnil) { 00610 w_byte(TYPE_NIL, arg); 00611 } 00612 else if (obj == Qtrue) { 00613 w_byte(TYPE_TRUE, arg); 00614 } 00615 else if (obj == Qfalse) { 00616 w_byte(TYPE_FALSE, arg); 00617 } 00618 else if (FIXNUM_P(obj)) { 00619 #if SIZEOF_LONG <= 4 00620 w_byte(TYPE_FIXNUM, arg); 00621 w_long(FIX2INT(obj), arg); 00622 #else 00623 if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) { 00624 w_byte(TYPE_FIXNUM, arg); 00625 w_long(FIX2LONG(obj), arg); 00626 } 00627 else { 00628 w_object(rb_int2big(FIX2LONG(obj)), arg, limit); 00629 } 00630 #endif 00631 } 00632 else if (SYMBOL_P(obj)) { 00633 w_symbol(SYM2ID(obj), arg); 00634 } 00635 else { 00636 arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION); 00637 00638 if (rb_respond_to(obj, s_mdump)) { 00639 volatile VALUE v; 00640 00641 st_add_direct(arg->data, obj, arg->data->num_entries); 00642 00643 v = rb_funcall(obj, s_mdump, 0, 0); 00644 check_dump_arg(arg, s_mdump); 00645 hasiv = has_ivars(obj, ivtbl); 00646 if (hasiv) w_byte(TYPE_IVAR, arg); 00647 w_class(TYPE_USRMARSHAL, obj, arg, FALSE); 00648 w_object(v, arg, limit); 00649 if (hasiv) w_ivar(obj, ivtbl, &c_arg); 00650 return; 00651 } 00652 if (rb_respond_to(obj, s_dump)) { 00653 VALUE v; 00654 st_table *ivtbl2 = 0; 00655 int hasiv2; 00656 00657 v = rb_funcall(obj, s_dump, 1, INT2NUM(limit)); 00658 check_dump_arg(arg, s_dump); 00659 if (TYPE(v) != T_STRING) { 00660 rb_raise(rb_eTypeError, "_dump() must return string"); 00661 } 00662 hasiv = has_ivars(obj, ivtbl); 00663 if (hasiv) w_byte(TYPE_IVAR, arg); 00664 if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) { 00665 w_byte(TYPE_IVAR, arg); 00666 } 00667 w_class(TYPE_USERDEF, obj, arg, FALSE); 00668 w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg); 00669 if (hasiv2) { 00670 w_ivar(v, ivtbl2, &c_arg); 00671 } 00672 else if (hasiv) { 00673 w_ivar(obj, ivtbl, &c_arg); 00674 } 00675 st_add_direct(arg->data, obj, arg->data->num_entries); 00676 return; 00677 } 00678 00679 st_add_direct(arg->data, obj, arg->data->num_entries); 00680 00681 hasiv = has_ivars(obj, ivtbl); 00682 { 00683 st_data_t compat_data; 00684 rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass); 00685 if (st_lookup(compat_allocator_tbl, 00686 (st_data_t)allocator, 00687 &compat_data)) { 00688 marshal_compat_t *compat = (marshal_compat_t*)compat_data; 00689 VALUE real_obj = obj; 00690 obj = compat->dumper(real_obj); 00691 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj); 00692 if (obj != real_obj && !ivtbl) hasiv = 0; 00693 } 00694 } 00695 if (hasiv) w_byte(TYPE_IVAR, arg); 00696 00697 switch (BUILTIN_TYPE(obj)) { 00698 case T_CLASS: 00699 if (FL_TEST(obj, FL_SINGLETON)) { 00700 rb_raise(rb_eTypeError, "singleton class can't be dumped"); 00701 } 00702 w_byte(TYPE_CLASS, arg); 00703 { 00704 volatile VALUE path = class2path(obj); 00705 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); 00706 } 00707 break; 00708 00709 case T_MODULE: 00710 w_byte(TYPE_MODULE, arg); 00711 { 00712 VALUE path = class2path(obj); 00713 w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg); 00714 } 00715 break; 00716 00717 case T_FLOAT: 00718 w_byte(TYPE_FLOAT, arg); 00719 w_float(RFLOAT_VALUE(obj), arg); 00720 break; 00721 00722 case T_BIGNUM: 00723 w_byte(TYPE_BIGNUM, arg); 00724 { 00725 char sign = RBIGNUM_SIGN(obj) ? '+' : '-'; 00726 long len = RBIGNUM_LEN(obj); 00727 BDIGIT *d = RBIGNUM_DIGITS(obj); 00728 00729 w_byte(sign, arg); 00730 w_long(SHORTLEN(len), arg); /* w_short? */ 00731 while (len--) { 00732 #if SIZEOF_BDIGITS > SIZEOF_SHORT 00733 BDIGIT num = *d; 00734 int i; 00735 00736 for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) { 00737 w_short(num & SHORTMASK, arg); 00738 num = SHORTDN(num); 00739 if (len == 0 && num == 0) break; 00740 } 00741 #else 00742 w_short(*d, arg); 00743 #endif 00744 d++; 00745 } 00746 } 00747 break; 00748 00749 case T_STRING: 00750 w_uclass(obj, rb_cString, arg); 00751 w_byte(TYPE_STRING, arg); 00752 w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg); 00753 break; 00754 00755 case T_REGEXP: 00756 w_uclass(obj, rb_cRegexp, arg); 00757 w_byte(TYPE_REGEXP, arg); 00758 { 00759 int opts = rb_reg_options(obj); 00760 w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg); 00761 w_byte((char)opts, arg); 00762 } 00763 break; 00764 00765 case T_ARRAY: 00766 w_uclass(obj, rb_cArray, arg); 00767 w_byte(TYPE_ARRAY, arg); 00768 { 00769 long i, len = RARRAY_LEN(obj); 00770 00771 w_long(len, arg); 00772 for (i=0; i<RARRAY_LEN(obj); i++) { 00773 w_object(RARRAY_PTR(obj)[i], arg, limit); 00774 if (len != RARRAY_LEN(obj)) { 00775 rb_raise(rb_eRuntimeError, "array modified during dump"); 00776 } 00777 } 00778 } 00779 break; 00780 00781 case T_HASH: 00782 w_uclass(obj, rb_cHash, arg); 00783 if (NIL_P(RHASH_IFNONE(obj))) { 00784 w_byte(TYPE_HASH, arg); 00785 } 00786 else if (FL_TEST(obj, FL_USER2)) { 00787 /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */ 00788 rb_raise(rb_eTypeError, "can't dump hash with default proc"); 00789 } 00790 else { 00791 w_byte(TYPE_HASH_DEF, arg); 00792 } 00793 w_long(RHASH_SIZE(obj), arg); 00794 rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg); 00795 if (!NIL_P(RHASH_IFNONE(obj))) { 00796 w_object(RHASH_IFNONE(obj), arg, limit); 00797 } 00798 break; 00799 00800 case T_STRUCT: 00801 w_class(TYPE_STRUCT, obj, arg, TRUE); 00802 { 00803 long len = RSTRUCT_LEN(obj); 00804 VALUE mem; 00805 long i; 00806 00807 w_long(len, arg); 00808 mem = rb_struct_members(obj); 00809 for (i=0; i<len; i++) { 00810 w_symbol(SYM2ID(RARRAY_PTR(mem)[i]), arg); 00811 w_object(RSTRUCT_PTR(obj)[i], arg, limit); 00812 } 00813 } 00814 break; 00815 00816 case T_OBJECT: 00817 w_class(TYPE_OBJECT, obj, arg, TRUE); 00818 w_objivar(obj, &c_arg); 00819 break; 00820 00821 case T_DATA: 00822 { 00823 VALUE v; 00824 00825 if (!rb_respond_to(obj, s_dump_data)) { 00826 rb_raise(rb_eTypeError, 00827 "no _dump_data is defined for class %s", 00828 rb_obj_classname(obj)); 00829 } 00830 v = rb_funcall(obj, s_dump_data, 0); 00831 check_dump_arg(arg, s_dump_data); 00832 w_class(TYPE_DATA, obj, arg, TRUE); 00833 w_object(v, arg, limit); 00834 } 00835 break; 00836 00837 default: 00838 rb_raise(rb_eTypeError, "can't dump %s", 00839 rb_obj_classname(obj)); 00840 break; 00841 } 00842 } 00843 if (hasiv) { 00844 w_ivar(obj, ivtbl, &c_arg); 00845 } 00846 } 00847 00848 static void 00849 clear_dump_arg(struct dump_arg *arg) 00850 { 00851 if (!arg->symbols) return; 00852 st_free_table(arg->symbols); 00853 arg->symbols = 0; 00854 st_free_table(arg->data); 00855 arg->data = 0; 00856 st_free_table(arg->compat_tbl); 00857 arg->compat_tbl = 0; 00858 if (arg->encodings) { 00859 st_free_table(arg->encodings); 00860 arg->encodings = 0; 00861 } 00862 } 00863 00864 /* 00865 * call-seq: 00866 * dump( obj [, anIO] , limit=-1 ) -> anIO 00867 * 00868 * Serializes obj and all descendant objects. If anIO is 00869 * specified, the serialized data will be written to it, otherwise the 00870 * data will be returned as a String. If limit is specified, the 00871 * traversal of subobjects will be limited to that depth. If limit is 00872 * negative, no checking of depth will be performed. 00873 * 00874 * class Klass 00875 * def initialize(str) 00876 * @str = str 00877 * end 00878 * def say_hello 00879 * @str 00880 * end 00881 * end 00882 * 00883 * (produces no output) 00884 * 00885 * o = Klass.new("hello\n") 00886 * data = Marshal.dump(o) 00887 * obj = Marshal.load(data) 00888 * obj.say_hello #=> "hello\n" 00889 * 00890 * Marshal can't dump following objects: 00891 * * anonymous Class/Module. 00892 * * objects which related to its system (ex: Dir, File::Stat, IO, File, Socket 00893 * and so on) 00894 * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread, 00895 * ThreadGroup, Continuation 00896 * * objects which defines singleton methods 00897 */ 00898 static VALUE 00899 marshal_dump(int argc, VALUE *argv) 00900 { 00901 VALUE obj, port, a1, a2; 00902 int limit = -1; 00903 struct dump_arg *arg; 00904 volatile VALUE wrapper; 00905 00906 port = Qnil; 00907 rb_scan_args(argc, argv, "12", &obj, &a1, &a2); 00908 if (argc == 3) { 00909 if (!NIL_P(a2)) limit = NUM2INT(a2); 00910 if (NIL_P(a1)) goto type_error; 00911 port = a1; 00912 } 00913 else if (argc == 2) { 00914 if (FIXNUM_P(a1)) limit = FIX2INT(a1); 00915 else if (NIL_P(a1)) goto type_error; 00916 else port = a1; 00917 } 00918 wrapper = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg); 00919 arg->dest = 0; 00920 arg->symbols = st_init_numtable(); 00921 arg->data = st_init_numtable(); 00922 arg->infection = 0; 00923 arg->compat_tbl = st_init_numtable(); 00924 arg->encodings = 0; 00925 arg->str = rb_str_buf_new(0); 00926 if (!NIL_P(port)) { 00927 if (!rb_respond_to(port, s_write)) { 00928 type_error: 00929 rb_raise(rb_eTypeError, "instance of IO needed"); 00930 } 00931 arg->dest = port; 00932 if (rb_respond_to(port, s_binmode)) { 00933 rb_funcall2(port, s_binmode, 0, 0); 00934 check_dump_arg(arg, s_binmode); 00935 } 00936 } 00937 else { 00938 port = arg->str; 00939 } 00940 00941 w_byte(MARSHAL_MAJOR, arg); 00942 w_byte(MARSHAL_MINOR, arg); 00943 00944 w_object(obj, arg, limit); 00945 if (arg->dest) { 00946 rb_io_write(arg->dest, arg->str); 00947 rb_str_resize(arg->str, 0); 00948 } 00949 clear_dump_arg(arg); 00950 RB_GC_GUARD(wrapper); 00951 00952 return port; 00953 } 00954 00955 struct load_arg { 00956 VALUE src; 00957 long offset; 00958 st_table *symbols; 00959 st_table *data; 00960 VALUE proc; 00961 st_table *compat_tbl; 00962 int infection; 00963 }; 00964 00965 static void 00966 check_load_arg(struct load_arg *arg, ID sym) 00967 { 00968 if (!arg->symbols) { 00969 rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s", 00970 rb_id2name(sym)); 00971 } 00972 } 00973 00974 static void clear_load_arg(struct load_arg *arg); 00975 00976 static void 00977 mark_load_arg(void *ptr) 00978 { 00979 struct load_arg *p = ptr; 00980 if (!p->symbols) 00981 return; 00982 rb_mark_tbl(p->data); 00983 rb_mark_hash(p->compat_tbl); 00984 } 00985 00986 static void 00987 free_load_arg(void *ptr) 00988 { 00989 clear_load_arg(ptr); 00990 xfree(ptr); 00991 } 00992 00993 static size_t 00994 memsize_load_arg(const void *ptr) 00995 { 00996 return ptr ? sizeof(struct load_arg) : 0; 00997 } 00998 00999 static const rb_data_type_t load_arg_data = { 01000 "load_arg", 01001 {mark_load_arg, free_load_arg, memsize_load_arg,}, 01002 }; 01003 01004 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg)) 01005 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg); 01006 static VALUE r_object(struct load_arg *arg); 01007 static ID r_symbol(struct load_arg *arg); 01008 static VALUE path2class(VALUE path); 01009 01010 static st_index_t 01011 r_prepare(struct load_arg *arg) 01012 { 01013 st_index_t idx = arg->data->num_entries; 01014 01015 st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef); 01016 return idx; 01017 } 01018 01019 static int 01020 r_byte(struct load_arg *arg) 01021 { 01022 int c; 01023 01024 if (TYPE(arg->src) == T_STRING) { 01025 if (RSTRING_LEN(arg->src) > arg->offset) { 01026 c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++]; 01027 } 01028 else { 01029 rb_raise(rb_eArgError, "marshal data too short"); 01030 } 01031 } 01032 else { 01033 VALUE src = arg->src; 01034 VALUE v = rb_funcall2(src, s_getbyte, 0, 0); 01035 check_load_arg(arg, s_getbyte); 01036 if (NIL_P(v)) rb_eof_error(); 01037 c = (unsigned char)NUM2CHR(v); 01038 } 01039 return c; 01040 } 01041 01042 static void 01043 long_toobig(int size) 01044 { 01045 rb_raise(rb_eTypeError, "long too big for this architecture (size " 01046 STRINGIZE(SIZEOF_LONG)", given %d)", size); 01047 } 01048 01049 #undef SIGN_EXTEND_CHAR 01050 #if __STDC__ 01051 # define SIGN_EXTEND_CHAR(c) ((signed char)(c)) 01052 #else /* not __STDC__ */ 01053 /* As in Harbison and Steele. */ 01054 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) 01055 #endif 01056 01057 static long 01058 r_long(struct load_arg *arg) 01059 { 01060 register long x; 01061 int c = SIGN_EXTEND_CHAR(r_byte(arg)); 01062 long i; 01063 01064 if (c == 0) return 0; 01065 if (c > 0) { 01066 if (4 < c && c < 128) { 01067 return c - 5; 01068 } 01069 if (c > (int)sizeof(long)) long_toobig(c); 01070 x = 0; 01071 for (i=0;i<c;i++) { 01072 x |= (long)r_byte(arg) << (8*i); 01073 } 01074 } 01075 else { 01076 if (-129 < c && c < -4) { 01077 return c + 5; 01078 } 01079 c = -c; 01080 if (c > (int)sizeof(long)) long_toobig(c); 01081 x = -1; 01082 for (i=0;i<c;i++) { 01083 x &= ~((long)0xff << (8*i)); 01084 x |= (long)r_byte(arg) << (8*i); 01085 } 01086 } 01087 return x; 01088 } 01089 01090 #define r_bytes(arg) r_bytes0(r_long(arg), (arg)) 01091 01092 static VALUE 01093 r_bytes0(long len, struct load_arg *arg) 01094 { 01095 VALUE str; 01096 01097 if (len == 0) return rb_str_new(0, 0); 01098 if (TYPE(arg->src) == T_STRING) { 01099 if (RSTRING_LEN(arg->src) - arg->offset >= len) { 01100 str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len); 01101 arg->offset += len; 01102 } 01103 else { 01104 too_short: 01105 rb_raise(rb_eArgError, "marshal data too short"); 01106 } 01107 } 01108 else { 01109 VALUE src = arg->src; 01110 VALUE n = LONG2NUM(len); 01111 str = rb_funcall2(src, s_read, 1, &n); 01112 check_load_arg(arg, s_read); 01113 if (NIL_P(str)) goto too_short; 01114 StringValue(str); 01115 if (RSTRING_LEN(str) != len) goto too_short; 01116 arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION); 01117 } 01118 return str; 01119 } 01120 01121 static int 01122 id2encidx(ID id, VALUE val) 01123 { 01124 if (id == rb_id_encoding()) { 01125 int idx = rb_enc_find_index(StringValueCStr(val)); 01126 return idx; 01127 } 01128 else if (id == rb_intern("E")) { 01129 if (val == Qfalse) return rb_usascii_encindex(); 01130 else if (val == Qtrue) return rb_utf8_encindex(); 01131 /* bogus ignore */ 01132 } 01133 return -1; 01134 } 01135 01136 static ID 01137 r_symlink(struct load_arg *arg) 01138 { 01139 st_data_t id; 01140 long num = r_long(arg); 01141 01142 if (st_lookup(arg->symbols, num, &id)) { 01143 return (ID)id; 01144 } 01145 rb_raise(rb_eArgError, "bad symbol"); 01146 } 01147 01148 static ID 01149 r_symreal(struct load_arg *arg, int ivar) 01150 { 01151 volatile VALUE s = r_bytes(arg); 01152 ID id; 01153 int idx = -1; 01154 st_index_t n = arg->symbols->num_entries; 01155 01156 st_insert(arg->symbols, (st_data_t)n, (st_data_t)0); 01157 if (ivar) { 01158 long num = r_long(arg); 01159 while (num-- > 0) { 01160 id = r_symbol(arg); 01161 idx = id2encidx(id, r_object(arg)); 01162 } 01163 } 01164 if (idx < 0) idx = rb_usascii_encindex(); 01165 rb_enc_associate_index(s, idx); 01166 id = rb_intern_str(s); 01167 st_insert(arg->symbols, (st_data_t)n, (st_data_t)id); 01168 01169 return id; 01170 } 01171 01172 static ID 01173 r_symbol(struct load_arg *arg) 01174 { 01175 int type, ivar = 0; 01176 01177 again: 01178 switch ((type = r_byte(arg))) { 01179 case TYPE_IVAR: 01180 ivar = 1; 01181 goto again; 01182 case TYPE_SYMBOL: 01183 return r_symreal(arg, ivar); 01184 case TYPE_SYMLINK: 01185 if (ivar) { 01186 rb_raise(rb_eArgError, "dump format error (symlink with encoding)"); 01187 } 01188 return r_symlink(arg); 01189 default: 01190 rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type); 01191 break; 01192 } 01193 } 01194 01195 static VALUE 01196 r_unique(struct load_arg *arg) 01197 { 01198 return rb_id2str(r_symbol(arg)); 01199 } 01200 01201 static VALUE 01202 r_string(struct load_arg *arg) 01203 { 01204 return r_bytes(arg); 01205 } 01206 01207 static VALUE 01208 r_entry0(VALUE v, st_index_t num, struct load_arg *arg) 01209 { 01210 st_data_t real_obj = (VALUE)Qundef; 01211 if (st_lookup(arg->compat_tbl, v, &real_obj)) { 01212 st_insert(arg->data, num, (st_data_t)real_obj); 01213 } 01214 else { 01215 st_insert(arg->data, num, (st_data_t)v); 01216 } 01217 if (arg->infection) { 01218 FL_SET(v, arg->infection); 01219 if ((VALUE)real_obj != Qundef) 01220 FL_SET((VALUE)real_obj, arg->infection); 01221 } 01222 return v; 01223 } 01224 01225 static VALUE 01226 r_leave(VALUE v, struct load_arg *arg) 01227 { 01228 st_data_t data; 01229 if (st_lookup(arg->compat_tbl, v, &data)) { 01230 VALUE real_obj = (VALUE)data; 01231 rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj)); 01232 st_data_t key = v; 01233 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) { 01234 marshal_compat_t *compat = (marshal_compat_t*)data; 01235 compat->loader(real_obj, v); 01236 } 01237 st_delete(arg->compat_tbl, &key, 0); 01238 v = real_obj; 01239 } 01240 if (arg->proc) { 01241 v = rb_funcall(arg->proc, s_call, 1, v); 01242 check_load_arg(arg, s_call); 01243 } 01244 return v; 01245 } 01246 01247 static void 01248 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg) 01249 { 01250 long len; 01251 01252 len = r_long(arg); 01253 if (len > 0) { 01254 do { 01255 ID id = r_symbol(arg); 01256 VALUE val = r_object(arg); 01257 int idx = id2encidx(id, val); 01258 if (idx >= 0) { 01259 rb_enc_associate_index(obj, idx); 01260 if (has_encoding) *has_encoding = TRUE; 01261 } 01262 else { 01263 rb_ivar_set(obj, id, val); 01264 } 01265 } while (--len > 0); 01266 } 01267 } 01268 01269 static VALUE 01270 path2class(VALUE path) 01271 { 01272 VALUE v = rb_path_to_class(path); 01273 01274 if (TYPE(v) != T_CLASS) { 01275 rb_raise(rb_eArgError, "%.*s does not refer to class", 01276 (int)RSTRING_LEN(path), RSTRING_PTR(path)); 01277 } 01278 return v; 01279 } 01280 01281 static VALUE 01282 path2module(VALUE path) 01283 { 01284 VALUE v = rb_path_to_class(path); 01285 01286 if (TYPE(v) != T_MODULE) { 01287 rb_raise(rb_eArgError, "%.*s does not refer to module", 01288 (int)RSTRING_LEN(path), RSTRING_PTR(path)); 01289 } 01290 return v; 01291 } 01292 01293 static VALUE 01294 obj_alloc_by_path(VALUE path, struct load_arg *arg) 01295 { 01296 VALUE klass; 01297 st_data_t data; 01298 rb_alloc_func_t allocator; 01299 01300 klass = path2class(path); 01301 01302 allocator = rb_get_alloc_func(klass); 01303 if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) { 01304 marshal_compat_t *compat = (marshal_compat_t*)data; 01305 VALUE real_obj = rb_obj_alloc(klass); 01306 VALUE obj = rb_obj_alloc(compat->oldclass); 01307 st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj); 01308 return obj; 01309 } 01310 01311 return rb_obj_alloc(klass); 01312 } 01313 01314 static VALUE 01315 r_object0(struct load_arg *arg, int *ivp, VALUE extmod) 01316 { 01317 VALUE v = Qnil; 01318 int type = r_byte(arg); 01319 long id; 01320 st_data_t link; 01321 01322 switch (type) { 01323 case TYPE_LINK: 01324 id = r_long(arg); 01325 if (!st_lookup(arg->data, (st_data_t)id, &link)) { 01326 rb_raise(rb_eArgError, "dump format error (unlinked)"); 01327 } 01328 v = (VALUE)link; 01329 if (arg->proc) { 01330 v = rb_funcall(arg->proc, s_call, 1, v); 01331 check_load_arg(arg, s_call); 01332 } 01333 break; 01334 01335 case TYPE_IVAR: 01336 { 01337 int ivar = TRUE; 01338 01339 v = r_object0(arg, &ivar, extmod); 01340 if (ivar) r_ivar(v, NULL, arg); 01341 } 01342 break; 01343 01344 case TYPE_EXTENDED: 01345 { 01346 VALUE m = path2module(r_unique(arg)); 01347 01348 if (NIL_P(extmod)) extmod = rb_ary_new2(0); 01349 rb_ary_push(extmod, m); 01350 01351 v = r_object0(arg, 0, extmod); 01352 while (RARRAY_LEN(extmod) > 0) { 01353 m = rb_ary_pop(extmod); 01354 rb_extend_object(v, m); 01355 } 01356 } 01357 break; 01358 01359 case TYPE_UCLASS: 01360 { 01361 VALUE c = path2class(r_unique(arg)); 01362 01363 if (FL_TEST(c, FL_SINGLETON)) { 01364 rb_raise(rb_eTypeError, "singleton can't be loaded"); 01365 } 01366 v = r_object0(arg, 0, extmod); 01367 if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) { 01368 format_error: 01369 rb_raise(rb_eArgError, "dump format error (user class)"); 01370 } 01371 if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) { 01372 VALUE tmp = rb_obj_alloc(c); 01373 01374 if (TYPE(v) != TYPE(tmp)) goto format_error; 01375 } 01376 RBASIC(v)->klass = c; 01377 } 01378 break; 01379 01380 case TYPE_NIL: 01381 v = Qnil; 01382 v = r_leave(v, arg); 01383 break; 01384 01385 case TYPE_TRUE: 01386 v = Qtrue; 01387 v = r_leave(v, arg); 01388 break; 01389 01390 case TYPE_FALSE: 01391 v = Qfalse; 01392 v = r_leave(v, arg); 01393 break; 01394 01395 case TYPE_FIXNUM: 01396 { 01397 long i = r_long(arg); 01398 v = LONG2FIX(i); 01399 } 01400 v = r_leave(v, arg); 01401 break; 01402 01403 case TYPE_FLOAT: 01404 { 01405 double d; 01406 VALUE str = r_bytes(arg); 01407 const char *ptr = RSTRING_PTR(str); 01408 01409 if (strcmp(ptr, "nan") == 0) { 01410 d = NAN; 01411 } 01412 else if (strcmp(ptr, "inf") == 0) { 01413 d = INFINITY; 01414 } 01415 else if (strcmp(ptr, "-inf") == 0) { 01416 d = -INFINITY; 01417 } 01418 else { 01419 char *e; 01420 d = strtod(ptr, &e); 01421 d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr)); 01422 } 01423 v = DBL2NUM(d); 01424 v = r_entry(v, arg); 01425 v = r_leave(v, arg); 01426 } 01427 break; 01428 01429 case TYPE_BIGNUM: 01430 { 01431 long len; 01432 BDIGIT *digits; 01433 volatile VALUE data; 01434 01435 NEWOBJ(big, struct RBignum); 01436 OBJSETUP(big, rb_cBignum, T_BIGNUM); 01437 RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+')); 01438 len = r_long(arg); 01439 data = r_bytes0(len * 2, arg); 01440 #if SIZEOF_BDIGITS == SIZEOF_SHORT 01441 rb_big_resize((VALUE)big, len); 01442 #else 01443 rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT)); 01444 #endif 01445 digits = RBIGNUM_DIGITS(big); 01446 MEMCPY(digits, RSTRING_PTR(data), char, len * 2); 01447 #if SIZEOF_BDIGITS > SIZEOF_SHORT 01448 MEMZERO((char *)digits + len * 2, char, 01449 RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2); 01450 #endif 01451 len = RBIGNUM_LEN(big); 01452 while (len > 0) { 01453 unsigned char *p = (unsigned char *)digits; 01454 BDIGIT num = 0; 01455 #if SIZEOF_BDIGITS > SIZEOF_SHORT 01456 int shift = 0; 01457 int i; 01458 01459 for (i=0; i<SIZEOF_BDIGITS; i++) { 01460 num |= (int)p[i] << shift; 01461 shift += 8; 01462 } 01463 #else 01464 num = p[0] | (p[1] << 8); 01465 #endif 01466 *digits++ = num; 01467 len--; 01468 } 01469 v = rb_big_norm((VALUE)big); 01470 v = r_entry(v, arg); 01471 v = r_leave(v, arg); 01472 } 01473 break; 01474 01475 case TYPE_STRING: 01476 v = r_entry(r_string(arg), arg); 01477 v = r_leave(v, arg); 01478 break; 01479 01480 case TYPE_REGEXP: 01481 { 01482 volatile VALUE str = r_bytes(arg); 01483 int options = r_byte(arg); 01484 int has_encoding = FALSE; 01485 st_index_t idx = r_prepare(arg); 01486 01487 if (ivp) { 01488 r_ivar(str, &has_encoding, arg); 01489 *ivp = FALSE; 01490 } 01491 if (!has_encoding) { 01492 /* 1.8 compatibility; remove escapes undefined in 1.8 */ 01493 char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr; 01494 long len = RSTRING_LEN(str); 01495 long bs = 0; 01496 for (; len-- > 0; *dst++ = *src++) { 01497 switch (*src) { 01498 case '\\': bs++; break; 01499 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 01500 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y': 01501 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K': 01502 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R': 01503 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y': 01504 if (bs & 1) --dst; 01505 default: bs = 0; break; 01506 } 01507 } 01508 rb_str_set_len(str, dst - ptr); 01509 } 01510 v = r_entry0(rb_reg_new_str(str, options), idx, arg); 01511 v = r_leave(v, arg); 01512 } 01513 break; 01514 01515 case TYPE_ARRAY: 01516 { 01517 volatile long len = r_long(arg); /* gcc 2.7.2.3 -O2 bug?? */ 01518 01519 v = rb_ary_new2(len); 01520 v = r_entry(v, arg); 01521 while (len--) { 01522 rb_ary_push(v, r_object(arg)); 01523 } 01524 v = r_leave(v, arg); 01525 } 01526 break; 01527 01528 case TYPE_HASH: 01529 case TYPE_HASH_DEF: 01530 { 01531 long len = r_long(arg); 01532 01533 v = rb_hash_new(); 01534 v = r_entry(v, arg); 01535 while (len--) { 01536 VALUE key = r_object(arg); 01537 VALUE value = r_object(arg); 01538 rb_hash_aset(v, key, value); 01539 } 01540 if (type == TYPE_HASH_DEF) { 01541 RHASH_IFNONE(v) = r_object(arg); 01542 } 01543 v = r_leave(v, arg); 01544 } 01545 break; 01546 01547 case TYPE_STRUCT: 01548 { 01549 VALUE mem, values; 01550 volatile long i; /* gcc 2.7.2.3 -O2 bug?? */ 01551 ID slot; 01552 st_index_t idx = r_prepare(arg); 01553 VALUE klass = path2class(r_unique(arg)); 01554 long len = r_long(arg); 01555 01556 v = rb_obj_alloc(klass); 01557 if (TYPE(v) != T_STRUCT) { 01558 rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass)); 01559 } 01560 mem = rb_struct_s_members(klass); 01561 if (RARRAY_LEN(mem) != len) { 01562 rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)", 01563 rb_class2name(klass)); 01564 } 01565 01566 v = r_entry0(v, idx, arg); 01567 values = rb_ary_new2(len); 01568 for (i=0; i<len; i++) { 01569 slot = r_symbol(arg); 01570 01571 if (RARRAY_PTR(mem)[i] != ID2SYM(slot)) { 01572 rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)", 01573 rb_class2name(klass), 01574 rb_id2name(slot), 01575 rb_id2name(SYM2ID(RARRAY_PTR(mem)[i]))); 01576 } 01577 rb_ary_push(values, r_object(arg)); 01578 } 01579 rb_struct_initialize(v, values); 01580 v = r_leave(v, arg); 01581 } 01582 break; 01583 01584 case TYPE_USERDEF: 01585 { 01586 VALUE klass = path2class(r_unique(arg)); 01587 VALUE data; 01588 01589 if (!rb_respond_to(klass, s_load)) { 01590 rb_raise(rb_eTypeError, "class %s needs to have method `_load'", 01591 rb_class2name(klass)); 01592 } 01593 data = r_string(arg); 01594 if (ivp) { 01595 r_ivar(data, NULL, arg); 01596 *ivp = FALSE; 01597 } 01598 v = rb_funcall(klass, s_load, 1, data); 01599 check_load_arg(arg, s_load); 01600 v = r_entry(v, arg); 01601 v = r_leave(v, arg); 01602 } 01603 break; 01604 01605 case TYPE_USRMARSHAL: 01606 { 01607 VALUE klass = path2class(r_unique(arg)); 01608 VALUE data; 01609 01610 v = rb_obj_alloc(klass); 01611 if (!NIL_P(extmod)) { 01612 while (RARRAY_LEN(extmod) > 0) { 01613 VALUE m = rb_ary_pop(extmod); 01614 rb_extend_object(v, m); 01615 } 01616 } 01617 if (!rb_respond_to(v, s_mload)) { 01618 rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'", 01619 rb_class2name(klass)); 01620 } 01621 v = r_entry(v, arg); 01622 data = r_object(arg); 01623 rb_funcall(v, s_mload, 1, data); 01624 check_load_arg(arg, s_mload); 01625 v = r_leave(v, arg); 01626 } 01627 break; 01628 01629 case TYPE_OBJECT: 01630 { 01631 st_index_t idx = r_prepare(arg); 01632 v = obj_alloc_by_path(r_unique(arg), arg); 01633 if (TYPE(v) != T_OBJECT) { 01634 rb_raise(rb_eArgError, "dump format error"); 01635 } 01636 v = r_entry0(v, idx, arg); 01637 r_ivar(v, NULL, arg); 01638 v = r_leave(v, arg); 01639 } 01640 break; 01641 01642 case TYPE_DATA: 01643 { 01644 VALUE klass = path2class(r_unique(arg)); 01645 if (rb_respond_to(klass, s_alloc)) { 01646 static int warn = TRUE; 01647 if (warn) { 01648 rb_warn("define `allocate' instead of `_alloc'"); 01649 warn = FALSE; 01650 } 01651 v = rb_funcall(klass, s_alloc, 0); 01652 check_load_arg(arg, s_alloc); 01653 } 01654 else { 01655 v = rb_obj_alloc(klass); 01656 } 01657 if (TYPE(v) != T_DATA) { 01658 rb_raise(rb_eArgError, "dump format error"); 01659 } 01660 v = r_entry(v, arg); 01661 if (!rb_respond_to(v, s_load_data)) { 01662 rb_raise(rb_eTypeError, 01663 "class %s needs to have instance method `_load_data'", 01664 rb_class2name(klass)); 01665 } 01666 rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod)); 01667 check_load_arg(arg, s_load_data); 01668 v = r_leave(v, arg); 01669 } 01670 break; 01671 01672 case TYPE_MODULE_OLD: 01673 { 01674 volatile VALUE str = r_bytes(arg); 01675 01676 v = rb_path_to_class(str); 01677 v = r_entry(v, arg); 01678 v = r_leave(v, arg); 01679 } 01680 break; 01681 01682 case TYPE_CLASS: 01683 { 01684 volatile VALUE str = r_bytes(arg); 01685 01686 v = path2class(str); 01687 v = r_entry(v, arg); 01688 v = r_leave(v, arg); 01689 } 01690 break; 01691 01692 case TYPE_MODULE: 01693 { 01694 volatile VALUE str = r_bytes(arg); 01695 01696 v = path2module(str); 01697 v = r_entry(v, arg); 01698 v = r_leave(v, arg); 01699 } 01700 break; 01701 01702 case TYPE_SYMBOL: 01703 if (ivp) { 01704 v = ID2SYM(r_symreal(arg, *ivp)); 01705 *ivp = FALSE; 01706 } 01707 else { 01708 v = ID2SYM(r_symreal(arg, 0)); 01709 } 01710 v = r_leave(v, arg); 01711 break; 01712 01713 case TYPE_SYMLINK: 01714 v = ID2SYM(r_symlink(arg)); 01715 break; 01716 01717 default: 01718 rb_raise(rb_eArgError, "dump format error(0x%x)", type); 01719 break; 01720 } 01721 return v; 01722 } 01723 01724 static VALUE 01725 r_object(struct load_arg *arg) 01726 { 01727 return r_object0(arg, 0, Qnil); 01728 } 01729 01730 static void 01731 clear_load_arg(struct load_arg *arg) 01732 { 01733 if (!arg->symbols) return; 01734 st_free_table(arg->symbols); 01735 arg->symbols = 0; 01736 st_free_table(arg->data); 01737 arg->data = 0; 01738 st_free_table(arg->compat_tbl); 01739 arg->compat_tbl = 0; 01740 } 01741 01742 /* 01743 * call-seq: 01744 * load( source [, proc] ) -> obj 01745 * restore( source [, proc] ) -> obj 01746 * 01747 * Returns the result of converting the serialized data in source into a 01748 * Ruby object (possibly with associated subordinate objects). source 01749 * may be either an instance of IO or an object that responds to 01750 * to_str. If proc is specified, it will be passed each object as it 01751 * is deserialized. 01752 */ 01753 static VALUE 01754 marshal_load(int argc, VALUE *argv) 01755 { 01756 VALUE port, proc; 01757 int major, minor, infection = 0; 01758 VALUE v; 01759 volatile VALUE wrapper; 01760 struct load_arg *arg; 01761 01762 rb_scan_args(argc, argv, "11", &port, &proc); 01763 v = rb_check_string_type(port); 01764 if (!NIL_P(v)) { 01765 infection = (int)FL_TEST(port, MARSHAL_INFECTION); /* original taintedness */ 01766 port = v; 01767 } 01768 else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) { 01769 if (rb_respond_to(port, s_binmode)) { 01770 rb_funcall2(port, s_binmode, 0, 0); 01771 } 01772 infection = (int)(FL_TAINT | FL_TEST(port, FL_UNTRUSTED)); 01773 } 01774 else { 01775 rb_raise(rb_eTypeError, "instance of IO needed"); 01776 } 01777 wrapper = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg); 01778 arg->infection = infection; 01779 arg->src = port; 01780 arg->offset = 0; 01781 arg->symbols = st_init_numtable(); 01782 arg->data = st_init_numtable(); 01783 arg->compat_tbl = st_init_numtable(); 01784 arg->proc = 0; 01785 01786 major = r_byte(arg); 01787 minor = r_byte(arg); 01788 if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) { 01789 clear_load_arg(arg); 01790 rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\ 01791 \tformat version %d.%d required; %d.%d given", 01792 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); 01793 } 01794 if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) { 01795 rb_warn("incompatible marshal file format (can be read)\n\ 01796 \tformat version %d.%d required; %d.%d given", 01797 MARSHAL_MAJOR, MARSHAL_MINOR, major, minor); 01798 } 01799 01800 if (!NIL_P(proc)) arg->proc = proc; 01801 v = r_object(arg); 01802 clear_load_arg(arg); 01803 RB_GC_GUARD(wrapper); 01804 01805 return v; 01806 } 01807 01808 /* 01809 * The marshaling library converts collections of Ruby objects into a 01810 * byte stream, allowing them to be stored outside the currently 01811 * active script. This data may subsequently be read and the original 01812 * objects reconstituted. 01813 * 01814 * Marshaled data has major and minor version numbers stored along 01815 * with the object information. In normal use, marshaling can only 01816 * load data written with the same major version number and an equal 01817 * or lower minor version number. If Ruby's ``verbose'' flag is set 01818 * (normally using -d, -v, -w, or --verbose) the major and minor 01819 * numbers must match exactly. Marshal versioning is independent of 01820 * Ruby's version numbers. You can extract the version by reading the 01821 * first two bytes of marshaled data. 01822 * 01823 * str = Marshal.dump("thing") 01824 * RUBY_VERSION #=> "1.9.0" 01825 * str[0].ord #=> 4 01826 * str[1].ord #=> 8 01827 * 01828 * Some objects cannot be dumped: if the objects to be dumped include 01829 * bindings, procedure or method objects, instances of class IO, or 01830 * singleton objects, a TypeError will be raised. 01831 * 01832 * If your class has special serialization needs (for example, if you 01833 * want to serialize in some specific format), or if it contains 01834 * objects that would otherwise not be serializable, you can implement 01835 * your own serialization strategy. 01836 * 01837 * There are two methods of doing this, your object can define either 01838 * marshal_dump and marshal_load or _dump and _load. marshal_dump will take 01839 * precedence over _dump if both are defined. marshal_dump may result in 01840 * smaller Marshal strings. 01841 * 01842 * == marshal_dump and marshal_load 01843 * 01844 * When dumping an object the method marshal_dump will be called. 01845 * marshal_dump must return a result containing the information necessary for 01846 * marshal_load to reconstitute the object. The result can be any object. 01847 * 01848 * When loading an object dumped using marshal_dump the object is first 01849 * allocated then marshal_load is called with the result from marshal_dump. 01850 * marshal_load must recreate the object from the information in the result. 01851 * 01852 * Example: 01853 * 01854 * class MyObj 01855 * def initialize name, version, data 01856 * @name = name 01857 * @version = version 01858 * @data = data 01859 * end 01860 * 01861 * def marshal_dump 01862 * [@name, @version] 01863 * end 01864 * 01865 * def marshal_load array 01866 * @name, @version = array 01867 * end 01868 * end 01869 * 01870 * == _dump and _load 01871 * 01872 * Use _dump and _load when you need to allocate the object you're restoring 01873 * yourself. 01874 * 01875 * When dumping an object the instance method _dump is called with an Integer 01876 * which indicates the maximum depth of objects to dump (a value of -1 implies 01877 * that you should disable depth checking). _dump must return a String 01878 * containing the information necessary to reconstitute the object. 01879 * 01880 * The class method _load should take a String and use it to return an object 01881 * of the same class. 01882 * 01883 * Example: 01884 * 01885 * class MyObj 01886 * def initialize name, version, data 01887 * @name = name 01888 * @version = version 01889 * @data = data 01890 * end 01891 * 01892 * def _dump level 01893 * [@name, @version].join ':' 01894 * end 01895 * 01896 * def self._load args 01897 * new(*args.split(':')) 01898 * end 01899 * end 01900 * 01901 * Since Marhsal.dump outputs a string you can have _dump return a Marshal 01902 * string which is Marshal.loaded in _load for complex objects. 01903 */ 01904 void 01905 Init_marshal(void) 01906 { 01907 #undef rb_intern 01908 #define rb_intern(str) rb_intern_const(str) 01909 01910 VALUE rb_mMarshal = rb_define_module("Marshal"); 01911 01912 s_dump = rb_intern("_dump"); 01913 s_load = rb_intern("_load"); 01914 s_mdump = rb_intern("marshal_dump"); 01915 s_mload = rb_intern("marshal_load"); 01916 s_dump_data = rb_intern("_dump_data"); 01917 s_load_data = rb_intern("_load_data"); 01918 s_alloc = rb_intern("_alloc"); 01919 s_call = rb_intern("call"); 01920 s_getbyte = rb_intern("getbyte"); 01921 s_read = rb_intern("read"); 01922 s_write = rb_intern("write"); 01923 s_binmode = rb_intern("binmode"); 01924 01925 rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1); 01926 rb_define_module_function(rb_mMarshal, "load", marshal_load, -1); 01927 rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1); 01928 01929 rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR)); 01930 rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR)); 01931 01932 compat_allocator_tbl = st_init_numtable(); 01933 compat_allocator_tbl_wrapper = 01934 Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl); 01935 rb_gc_register_mark_object(compat_allocator_tbl_wrapper); 01936 } 01937 01938 VALUE 01939 rb_marshal_dump(VALUE obj, VALUE port) 01940 { 01941 int argc = 1; 01942 VALUE argv[2]; 01943 01944 argv[0] = obj; 01945 argv[1] = port; 01946 if (!NIL_P(port)) argc = 2; 01947 return marshal_dump(argc, argv); 01948 } 01949 01950 VALUE 01951 rb_marshal_load(VALUE port) 01952 { 01953 return marshal_load(1, &port); 01954 } 01955