Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /********************************************************************** 00002 00003 pack.c - 00004 00005 $Author: naruse $ 00006 created at: Thu Feb 10 15:17:05 JST 1994 00007 00008 Copyright (C) 1993-2007 Yukihiro Matsumoto 00009 00010 **********************************************************************/ 00011 00012 #include "ruby/ruby.h" 00013 #include "ruby/encoding.h" 00014 #include <sys/types.h> 00015 #include <ctype.h> 00016 #include <errno.h> 00017 00018 #define GCC_VERSION_SINCE(major, minor, patchlevel) \ 00019 (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \ 00020 ((__GNUC__ > (major)) || \ 00021 (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \ 00022 (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel)))) 00023 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 00024 # define NATINT_PACK 00025 #endif 00026 00027 #ifdef DYNAMIC_ENDIAN 00028 /* for universal binary of NEXTSTEP and MacOS X */ 00029 /* useless since autoconf 2.63? */ 00030 static int 00031 is_bigendian(void) 00032 { 00033 static int init = 0; 00034 static int endian_value; 00035 char *p; 00036 00037 if (init) return endian_value; 00038 init = 1; 00039 p = (char*)&init; 00040 return endian_value = p[0]?0:1; 00041 } 00042 # define BIGENDIAN_P() (is_bigendian()) 00043 #elif defined(WORDS_BIGENDIAN) 00044 # define BIGENDIAN_P() 1 00045 #else 00046 # define BIGENDIAN_P() 0 00047 #endif 00048 00049 #ifdef NATINT_PACK 00050 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len)) 00051 #else 00052 # define NATINT_LEN(type,len) ((int)sizeof(type)) 00053 #endif 00054 00055 #if SIZEOF_LONG == 8 00056 # define INT64toNUM(x) LONG2NUM(x) 00057 # define UINT64toNUM(x) ULONG2NUM(x) 00058 #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8 00059 # define INT64toNUM(x) LL2NUM(x) 00060 # define UINT64toNUM(x) ULL2NUM(x) 00061 #endif 00062 00063 #define define_swapx(x, xtype) \ 00064 static xtype \ 00065 TOKEN_PASTE(swap,x)(xtype z) \ 00066 { \ 00067 xtype r; \ 00068 xtype *zp; \ 00069 unsigned char *s, *t; \ 00070 int i; \ 00071 \ 00072 zp = xmalloc(sizeof(xtype)); \ 00073 *zp = z; \ 00074 s = (unsigned char*)zp; \ 00075 t = xmalloc(sizeof(xtype)); \ 00076 for (i=0; i<sizeof(xtype); i++) { \ 00077 t[sizeof(xtype)-i-1] = s[i]; \ 00078 } \ 00079 r = *(xtype *)t; \ 00080 xfree(t); \ 00081 xfree(zp); \ 00082 return r; \ 00083 } 00084 00085 #if GCC_VERSION_SINCE(4,3,0) 00086 # define swap32(x) __builtin_bswap32(x) 00087 # define swap64(x) __builtin_bswap64(x) 00088 #endif 00089 00090 #ifndef swap16 00091 # define swap16(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF)) 00092 #endif 00093 00094 #ifndef swap32 00095 # define swap32(x) ((((x)&0xFF)<<24) \ 00096 |(((x)>>24)&0xFF) \ 00097 |(((x)&0x0000FF00)<<8) \ 00098 |(((x)&0x00FF0000)>>8) ) 00099 #endif 00100 00101 #ifndef swap64 00102 # ifdef HAVE_INT64_T 00103 # define byte_in_64bit(n) ((uint64_t)0xff << (n)) 00104 # define swap64(x) ((((x)&byte_in_64bit(0))<<56) \ 00105 |(((x)>>56)&0xFF) \ 00106 |(((x)&byte_in_64bit(8))<<40) \ 00107 |(((x)&byte_in_64bit(48))>>40) \ 00108 |(((x)&byte_in_64bit(16))<<24) \ 00109 |(((x)&byte_in_64bit(40))>>24) \ 00110 |(((x)&byte_in_64bit(24))<<8) \ 00111 |(((x)&byte_in_64bit(32))>>8)) 00112 # endif 00113 #endif 00114 00115 #if SIZEOF_SHORT == 2 00116 # define swaps(x) swap16(x) 00117 #elif SIZEOF_SHORT == 4 00118 # define swaps(x) swap32(x) 00119 #else 00120 define_swapx(s,short) 00121 #endif 00122 00123 #if SIZEOF_INT == 2 00124 # define swapi(x) swap16(x) 00125 #elif SIZEOF_INT == 4 00126 # define swapi(x) swap32(x) 00127 #else 00128 define_swapx(i,int) 00129 #endif 00130 00131 #if SIZEOF_LONG == 4 00132 # define swapl(x) swap32(x) 00133 #elif SIZEOF_LONG == 8 00134 # define swapl(x) swap64(x) 00135 #else 00136 define_swapx(l,long) 00137 #endif 00138 00139 #ifdef HAVE_LONG_LONG 00140 # if SIZEOF_LONG_LONG == 8 00141 # define swapll(x) swap64(x) 00142 # else 00143 define_swapx(ll,LONG_LONG) 00144 # endif 00145 #endif 00146 00147 #if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T) 00148 # define swapf(x) swap32(x) 00149 # define FLOAT_SWAPPER uint32_t 00150 #else 00151 define_swapx(f,float) 00152 #endif 00153 00154 #if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T) 00155 # define swapd(x) swap64(x) 00156 # define DOUBLE_SWAPPER uint64_t 00157 #elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T) 00158 static double 00159 swapd(const double d) 00160 { 00161 double dtmp = d; 00162 uint32_t utmp[2]; 00163 uint32_t utmp0; 00164 00165 utmp[0] = 0; utmp[1] = 0; 00166 memcpy(utmp,&dtmp,sizeof(double)); 00167 utmp0 = utmp[0]; 00168 utmp[0] = swap32(utmp[1]); 00169 utmp[1] = swap32(utmp0); 00170 memcpy(&dtmp,utmp,sizeof(double)); 00171 return dtmp; 00172 } 00173 #else 00174 define_swapx(d, double) 00175 #endif 00176 00177 #undef define_swapx 00178 00179 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x)) 00180 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x)) 00181 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x)) 00182 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x)) 00183 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x)) 00184 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x)) 00185 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x)) 00186 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x)) 00187 00188 #ifdef FLOAT_SWAPPER 00189 # define FLOAT_CONVWITH(y) FLOAT_SWAPPER y; 00190 # define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 00191 (y) = rb_htonf((FLOAT_SWAPPER)(y)), \ 00192 memcpy(&(x),&(y),sizeof(float)), \ 00193 (x)) 00194 # define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 00195 (y) = rb_htovf((FLOAT_SWAPPER)(y)), \ 00196 memcpy(&(x),&(y),sizeof(float)), \ 00197 (x)) 00198 # define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 00199 (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \ 00200 memcpy(&(x),&(y),sizeof(float)), \ 00201 (x)) 00202 # define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 00203 (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \ 00204 memcpy(&(x),&(y),sizeof(float)), \ 00205 (x)) 00206 #else 00207 # define FLOAT_CONVWITH(y) 00208 # define HTONF(x,y) rb_htonf(x) 00209 # define HTOVF(x,y) rb_htovf(x) 00210 # define NTOHF(x,y) rb_ntohf(x) 00211 # define VTOHF(x,y) rb_vtohf(x) 00212 #endif 00213 00214 #ifdef DOUBLE_SWAPPER 00215 # define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y; 00216 # define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 00217 (y) = rb_htond((DOUBLE_SWAPPER)(y)), \ 00218 memcpy(&(x),&(y),sizeof(double)), \ 00219 (x)) 00220 # define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 00221 (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \ 00222 memcpy(&(x),&(y),sizeof(double)), \ 00223 (x)) 00224 # define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 00225 (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \ 00226 memcpy(&(x),&(y),sizeof(double)), \ 00227 (x)) 00228 # define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 00229 (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \ 00230 memcpy(&(x),&(y),sizeof(double)), \ 00231 (x)) 00232 #else 00233 # define DOUBLE_CONVWITH(y) 00234 # define HTOND(x,y) rb_htond(x) 00235 # define HTOVD(x,y) rb_htovd(x) 00236 # define NTOHD(x,y) rb_ntohd(x) 00237 # define VTOHD(x,y) rb_vtohd(x) 00238 #endif 00239 00240 static unsigned long 00241 num2i32(VALUE x) 00242 { 00243 x = rb_to_int(x); /* is nil OK? (should not) */ 00244 00245 if (FIXNUM_P(x)) return FIX2LONG(x); 00246 if (TYPE(x) == T_BIGNUM) { 00247 return rb_big2ulong_pack(x); 00248 } 00249 rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x)); 00250 return 0; /* not reached */ 00251 } 00252 00253 #define MAX_INTEGER_PACK_SIZE 8 00254 /* #define FORCE_BIG_PACK */ 00255 00256 static const char toofew[] = "too few arguments"; 00257 00258 static void encodes(VALUE,const char*,long,int,int); 00259 static void qpencode(VALUE,VALUE,long); 00260 00261 static unsigned long utf8_to_uv(const char*,long*); 00262 00263 /* 00264 * call-seq: 00265 * arr.pack ( aTemplateString ) -> aBinaryString 00266 * 00267 * Packs the contents of <i>arr</i> into a binary sequence according to 00268 * the directives in <i>aTemplateString</i> (see the table below) 00269 * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, 00270 * which gives the width of the resulting field. The remaining 00271 * directives also may take a count, indicating the number of array 00272 * elements to convert. If the count is an asterisk 00273 * (``<code>*</code>''), all remaining array elements will be 00274 * converted. Any of the directives ``<code>sSiIlL</code>'' may be 00275 * followed by an underscore (``<code>_</code>'') or 00276 * exclamation mark (``<code>!</code>'') to use the underlying 00277 * platform's native size for the specified type; otherwise, they use a 00278 * platform-independent size. Spaces are ignored in the template 00279 * string. See also <code>String#unpack</code>. 00280 * 00281 * a = [ "a", "b", "c" ] 00282 * n = [ 65, 66, 67 ] 00283 * a.pack("A3A3A3") #=> "a b c " 00284 * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000" 00285 * n.pack("ccc") #=> "ABC" 00286 * 00287 * Directives for +pack+. 00288 * 00289 * Integer | Array | 00290 * Directive | Element | Meaning 00291 * --------------------------------------------------------------------------- 00292 * C | Integer | 8-bit unsigned (unsigned char) 00293 * S | Integer | 16-bit unsigned, native endian (uint16_t) 00294 * L | Integer | 32-bit unsigned, native endian (uint32_t) 00295 * Q | Integer | 64-bit unsigned, native endian (uint64_t) 00296 * | | 00297 * c | Integer | 8-bit signed (signed char) 00298 * s | Integer | 16-bit signed, native endian (int16_t) 00299 * l | Integer | 32-bit signed, native endian (int32_t) 00300 * q | Integer | 64-bit signed, native endian (int64_t) 00301 * | | 00302 * S_, S! | Integer | unsigned short, native endian 00303 * I, I_, I! | Integer | unsigned int, native endian 00304 * L_, L! | Integer | unsigned long, native endian 00305 * | | 00306 * s_, s! | Integer | signed short, native endian 00307 * i, i_, i! | Integer | signed int, native endian 00308 * l_, l! | Integer | signed long, native endian 00309 * | | 00310 * S> L> Q> | Integer | same as the directives without ">" except 00311 * s> l> q> | | big endian 00312 * S!> I!> | | (available since Ruby 1.9.3) 00313 * L!> | | "S>" is same as "n" 00314 * s!> i!> | | "L>" is same as "N" 00315 * l!> | | 00316 * | | 00317 * S< L< Q< | Integer | same as the directives without "<" except 00318 * s< l< q< | | little endian 00319 * S!< I!< | | (available since Ruby 1.9.3) 00320 * L!< | | "S<" is same as "v" 00321 * s!< i!< | | "L<" is same as "V" 00322 * l!< | | 00323 * | | 00324 * n | Integer | 16-bit unsigned, network (big-endian) byte order 00325 * N | Integer | 32-bit unsigned, network (big-endian) byte order 00326 * v | Integer | 16-bit unsigned, VAX (little-endian) byte order 00327 * V | Integer | 32-bit unsigned, VAX (little-endian) byte order 00328 * | | 00329 * U | Integer | UTF-8 character 00330 * w | Integer | BER-compressed integer 00331 * 00332 * Float | | 00333 * Directive | | Meaning 00334 * --------------------------------------------------------------------------- 00335 * D, d | Float | double-precision, native format 00336 * F, f | Float | single-precision, native format 00337 * E | Float | double-precision, little-endian byte order 00338 * e | Float | single-precision, little-endian byte order 00339 * G | Float | double-precision, network (big-endian) byte order 00340 * g | Float | single-precision, network (big-endian) byte order 00341 * 00342 * String | | 00343 * Directive | | Meaning 00344 * --------------------------------------------------------------------------- 00345 * A | String | arbitrary binary string (space padded, count is width) 00346 * a | String | arbitrary binary string (null padded, count is width) 00347 * Z | String | same as ``a'', except that null is added with * 00348 * B | String | bit string (MSB first) 00349 * b | String | bit string (LSB first) 00350 * H | String | hex string (high nibble first) 00351 * h | String | hex string (low nibble first) 00352 * u | String | UU-encoded string 00353 * M | String | quoted printable, MIME encoding (see RFC2045) 00354 * m | String | base64 encoded string (see RFC 2045, count is width) 00355 * | | (if count is 0, no line feed are added, see RFC 4648) 00356 * P | String | pointer to a structure (fixed-length string) 00357 * p | String | pointer to a null-terminated string 00358 * 00359 * Misc. | | 00360 * Directive | | Meaning 00361 * --------------------------------------------------------------------------- 00362 * @ | --- | moves to absolute position 00363 * X | --- | back up a byte 00364 * x | --- | null byte 00365 */ 00366 00367 static VALUE 00368 pack_pack(VALUE ary, VALUE fmt) 00369 { 00370 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0"; 00371 static const char spc10[] = " "; 00372 const char *p, *pend; 00373 VALUE res, from, associates = 0; 00374 char type; 00375 long items, len, idx, plen; 00376 const char *ptr; 00377 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ 00378 #ifdef NATINT_PACK 00379 int natint; /* native integer */ 00380 #endif 00381 int signed_p, integer_size, bigendian_p; 00382 00383 StringValue(fmt); 00384 p = RSTRING_PTR(fmt); 00385 pend = p + RSTRING_LEN(fmt); 00386 res = rb_str_buf_new(0); 00387 00388 items = RARRAY_LEN(ary); 00389 idx = 0; 00390 00391 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0) 00392 #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW) 00393 #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW) 00394 00395 while (p < pend) { 00396 int explicit_endian = 0; 00397 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { 00398 rb_raise(rb_eRuntimeError, "format string modified"); 00399 } 00400 type = *p++; /* get data type */ 00401 #ifdef NATINT_PACK 00402 natint = 0; 00403 #endif 00404 00405 if (ISSPACE(type)) continue; 00406 if (type == '#') { 00407 while ((p < pend) && (*p != '\n')) { 00408 p++; 00409 } 00410 continue; 00411 } 00412 00413 { 00414 static const char natstr[] = "sSiIlL"; 00415 static const char endstr[] = "sSiIlLqQ"; 00416 00417 modifiers: 00418 switch (*p) { 00419 case '_': 00420 case '!': 00421 if (strchr(natstr, type)) { 00422 #ifdef NATINT_PACK 00423 natint = 1; 00424 #endif 00425 p++; 00426 } 00427 else { 00428 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); 00429 } 00430 goto modifiers; 00431 00432 case '<': 00433 case '>': 00434 if (!strchr(endstr, type)) { 00435 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); 00436 } 00437 if (explicit_endian) { 00438 rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); 00439 } 00440 explicit_endian = *p++; 00441 goto modifiers; 00442 } 00443 } 00444 00445 if (*p == '*') { /* set data length */ 00446 len = strchr("@Xxu", type) ? 0 00447 : strchr("PMm", type) ? 1 00448 : items; 00449 p++; 00450 } 00451 else if (ISDIGIT(*p)) { 00452 errno = 0; 00453 len = STRTOUL(p, (char**)&p, 10); 00454 if (errno) { 00455 rb_raise(rb_eRangeError, "pack length too big"); 00456 } 00457 } 00458 else { 00459 len = 1; 00460 } 00461 00462 switch (type) { 00463 case 'U': 00464 /* if encoding is US-ASCII, upgrade to UTF-8 */ 00465 if (enc_info == 1) enc_info = 2; 00466 break; 00467 case 'm': case 'M': case 'u': 00468 /* keep US-ASCII (do nothing) */ 00469 break; 00470 default: 00471 /* fall back to BINARY */ 00472 enc_info = 0; 00473 break; 00474 } 00475 switch (type) { 00476 case 'A': case 'a': case 'Z': 00477 case 'B': case 'b': 00478 case 'H': case 'h': 00479 from = NEXTFROM; 00480 if (NIL_P(from)) { 00481 ptr = ""; 00482 plen = 0; 00483 } 00484 else { 00485 StringValue(from); 00486 ptr = RSTRING_PTR(from); 00487 plen = RSTRING_LEN(from); 00488 OBJ_INFECT(res, from); 00489 } 00490 00491 if (p[-1] == '*') 00492 len = plen; 00493 00494 switch (type) { 00495 case 'a': /* arbitrary binary string (null padded) */ 00496 case 'A': /* arbitrary binary string (ASCII space padded) */ 00497 case 'Z': /* null terminated string */ 00498 if (plen >= len) { 00499 rb_str_buf_cat(res, ptr, len); 00500 if (p[-1] == '*' && type == 'Z') 00501 rb_str_buf_cat(res, nul10, 1); 00502 } 00503 else { 00504 rb_str_buf_cat(res, ptr, plen); 00505 len -= plen; 00506 while (len >= 10) { 00507 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); 00508 len -= 10; 00509 } 00510 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); 00511 } 00512 break; 00513 00514 case 'b': /* bit string (ascending) */ 00515 { 00516 int byte = 0; 00517 long i, j = 0; 00518 00519 if (len > plen) { 00520 j = (len - plen + 1)/2; 00521 len = plen; 00522 } 00523 for (i=0; i++ < len; ptr++) { 00524 if (*ptr & 1) 00525 byte |= 128; 00526 if (i & 7) 00527 byte >>= 1; 00528 else { 00529 char c = byte & 0xff; 00530 rb_str_buf_cat(res, &c, 1); 00531 byte = 0; 00532 } 00533 } 00534 if (len & 7) { 00535 char c; 00536 byte >>= 7 - (len & 7); 00537 c = byte & 0xff; 00538 rb_str_buf_cat(res, &c, 1); 00539 } 00540 len = j; 00541 goto grow; 00542 } 00543 break; 00544 00545 case 'B': /* bit string (descending) */ 00546 { 00547 int byte = 0; 00548 long i, j = 0; 00549 00550 if (len > plen) { 00551 j = (len - plen + 1)/2; 00552 len = plen; 00553 } 00554 for (i=0; i++ < len; ptr++) { 00555 byte |= *ptr & 1; 00556 if (i & 7) 00557 byte <<= 1; 00558 else { 00559 char c = byte & 0xff; 00560 rb_str_buf_cat(res, &c, 1); 00561 byte = 0; 00562 } 00563 } 00564 if (len & 7) { 00565 char c; 00566 byte <<= 7 - (len & 7); 00567 c = byte & 0xff; 00568 rb_str_buf_cat(res, &c, 1); 00569 } 00570 len = j; 00571 goto grow; 00572 } 00573 break; 00574 00575 case 'h': /* hex string (low nibble first) */ 00576 { 00577 int byte = 0; 00578 long i, j = 0; 00579 00580 if (len > plen) { 00581 j = (len + 1) / 2 - (plen + 1) / 2; 00582 len = plen; 00583 } 00584 for (i=0; i++ < len; ptr++) { 00585 if (ISALPHA(*ptr)) 00586 byte |= (((*ptr & 15) + 9) & 15) << 4; 00587 else 00588 byte |= (*ptr & 15) << 4; 00589 if (i & 1) 00590 byte >>= 4; 00591 else { 00592 char c = byte & 0xff; 00593 rb_str_buf_cat(res, &c, 1); 00594 byte = 0; 00595 } 00596 } 00597 if (len & 1) { 00598 char c = byte & 0xff; 00599 rb_str_buf_cat(res, &c, 1); 00600 } 00601 len = j; 00602 goto grow; 00603 } 00604 break; 00605 00606 case 'H': /* hex string (high nibble first) */ 00607 { 00608 int byte = 0; 00609 long i, j = 0; 00610 00611 if (len > plen) { 00612 j = (len + 1) / 2 - (plen + 1) / 2; 00613 len = plen; 00614 } 00615 for (i=0; i++ < len; ptr++) { 00616 if (ISALPHA(*ptr)) 00617 byte |= ((*ptr & 15) + 9) & 15; 00618 else 00619 byte |= *ptr & 15; 00620 if (i & 1) 00621 byte <<= 4; 00622 else { 00623 char c = byte & 0xff; 00624 rb_str_buf_cat(res, &c, 1); 00625 byte = 0; 00626 } 00627 } 00628 if (len & 1) { 00629 char c = byte & 0xff; 00630 rb_str_buf_cat(res, &c, 1); 00631 } 00632 len = j; 00633 goto grow; 00634 } 00635 break; 00636 } 00637 break; 00638 00639 case 'c': /* signed char */ 00640 case 'C': /* unsigned char */ 00641 while (len-- > 0) { 00642 char c; 00643 00644 from = NEXTFROM; 00645 c = (char)num2i32(from); 00646 rb_str_buf_cat(res, &c, sizeof(char)); 00647 } 00648 break; 00649 00650 case 's': /* signed short */ 00651 signed_p = 1; 00652 integer_size = NATINT_LEN(short, 2); 00653 bigendian_p = BIGENDIAN_P(); 00654 goto pack_integer; 00655 00656 case 'S': /* unsigned short */ 00657 signed_p = 0; 00658 integer_size = NATINT_LEN(short, 2); 00659 bigendian_p = BIGENDIAN_P(); 00660 goto pack_integer; 00661 00662 case 'i': /* signed int */ 00663 signed_p = 1; 00664 integer_size = (int)sizeof(int); 00665 bigendian_p = BIGENDIAN_P(); 00666 goto pack_integer; 00667 00668 case 'I': /* unsigned int */ 00669 signed_p = 0; 00670 integer_size = (int)sizeof(int); 00671 bigendian_p = BIGENDIAN_P(); 00672 goto pack_integer; 00673 00674 case 'l': /* signed long */ 00675 signed_p = 1; 00676 integer_size = NATINT_LEN(long, 4); 00677 bigendian_p = BIGENDIAN_P(); 00678 goto pack_integer; 00679 00680 case 'L': /* unsigned long */ 00681 signed_p = 0; 00682 integer_size = NATINT_LEN(long, 4); 00683 bigendian_p = BIGENDIAN_P(); 00684 goto pack_integer; 00685 00686 case 'q': /* signed quad (64bit) int */ 00687 signed_p = 1; 00688 integer_size = 8; 00689 bigendian_p = BIGENDIAN_P(); 00690 goto pack_integer; 00691 00692 case 'Q': /* unsigned quad (64bit) int */ 00693 signed_p = 0; 00694 integer_size = 8; 00695 bigendian_p = BIGENDIAN_P(); 00696 goto pack_integer; 00697 00698 case 'n': /* unsigned short (network byte-order) */ 00699 signed_p = 0; 00700 integer_size = 2; 00701 bigendian_p = 1; 00702 goto pack_integer; 00703 00704 case 'N': /* unsigned long (network byte-order) */ 00705 signed_p = 0; 00706 integer_size = 4; 00707 bigendian_p = 1; 00708 goto pack_integer; 00709 00710 case 'v': /* unsigned short (VAX byte-order) */ 00711 signed_p = 0; 00712 integer_size = 2; 00713 bigendian_p = 0; 00714 goto pack_integer; 00715 00716 case 'V': /* unsigned long (VAX byte-order) */ 00717 signed_p = 0; 00718 integer_size = 4; 00719 bigendian_p = 0; 00720 goto pack_integer; 00721 00722 pack_integer: 00723 if (explicit_endian) { 00724 bigendian_p = explicit_endian == '>'; 00725 } 00726 00727 switch (integer_size) { 00728 #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK) 00729 case SIZEOF_INT16_T: 00730 while (len-- > 0) { 00731 union { 00732 int16_t i; 00733 char a[sizeof(int16_t)]; 00734 } v; 00735 00736 from = NEXTFROM; 00737 v.i = (int16_t)num2i32(from); 00738 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 00739 rb_str_buf_cat(res, v.a, sizeof(int16_t)); 00740 } 00741 break; 00742 #endif 00743 00744 #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK) 00745 case SIZEOF_INT32_T: 00746 while (len-- > 0) { 00747 union { 00748 int32_t i; 00749 char a[sizeof(int32_t)]; 00750 } v; 00751 00752 from = NEXTFROM; 00753 v.i = (int32_t)num2i32(from); 00754 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 00755 rb_str_buf_cat(res, v.a, sizeof(int32_t)); 00756 } 00757 break; 00758 #endif 00759 00760 #if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK) 00761 case SIZEOF_INT64_T: 00762 while (len-- > 0) { 00763 union { 00764 int64_t i; 00765 char a[sizeof(int64_t)]; 00766 } v; 00767 00768 from = NEXTFROM; 00769 v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */ 00770 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 00771 rb_str_buf_cat(res, v.a, sizeof(int64_t)); 00772 } 00773 break; 00774 #endif 00775 00776 default: 00777 if (integer_size > MAX_INTEGER_PACK_SIZE) 00778 rb_bug("unexpected intger size for pack: %d", integer_size); 00779 while (len-- > 0) { 00780 union { 00781 unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG]; 00782 char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG]; 00783 } v; 00784 int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG; 00785 int i; 00786 00787 from = NEXTFROM; 00788 rb_big_pack(from, v.i, num_longs); 00789 if (bigendian_p) { 00790 for (i = 0; i < num_longs/2; i++) { 00791 unsigned long t = v.i[i]; 00792 v.i[i] = v.i[num_longs-1-i]; 00793 v.i[num_longs-1-i] = t; 00794 } 00795 } 00796 if (bigendian_p != BIGENDIAN_P()) { 00797 for (i = 0; i < num_longs; i++) 00798 v.i[i] = swapl(v.i[i]); 00799 } 00800 rb_str_buf_cat(res, 00801 bigendian_p ? 00802 v.a + sizeof(long)*num_longs - integer_size : 00803 v.a, 00804 integer_size); 00805 } 00806 break; 00807 } 00808 break; 00809 00810 case 'f': /* single precision float in native format */ 00811 case 'F': /* ditto */ 00812 while (len-- > 0) { 00813 float f; 00814 00815 from = NEXTFROM; 00816 f = (float)RFLOAT_VALUE(rb_to_float(from)); 00817 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 00818 } 00819 break; 00820 00821 case 'e': /* single precision float in VAX byte-order */ 00822 while (len-- > 0) { 00823 float f; 00824 FLOAT_CONVWITH(ftmp); 00825 00826 from = NEXTFROM; 00827 f = (float)RFLOAT_VALUE(rb_to_float(from)); 00828 f = HTOVF(f,ftmp); 00829 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 00830 } 00831 break; 00832 00833 case 'E': /* double precision float in VAX byte-order */ 00834 while (len-- > 0) { 00835 double d; 00836 DOUBLE_CONVWITH(dtmp); 00837 00838 from = NEXTFROM; 00839 d = RFLOAT_VALUE(rb_to_float(from)); 00840 d = HTOVD(d,dtmp); 00841 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 00842 } 00843 break; 00844 00845 case 'd': /* double precision float in native format */ 00846 case 'D': /* ditto */ 00847 while (len-- > 0) { 00848 double d; 00849 00850 from = NEXTFROM; 00851 d = RFLOAT_VALUE(rb_to_float(from)); 00852 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 00853 } 00854 break; 00855 00856 case 'g': /* single precision float in network byte-order */ 00857 while (len-- > 0) { 00858 float f; 00859 FLOAT_CONVWITH(ftmp); 00860 00861 from = NEXTFROM; 00862 f = (float)RFLOAT_VALUE(rb_to_float(from)); 00863 f = HTONF(f,ftmp); 00864 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 00865 } 00866 break; 00867 00868 case 'G': /* double precision float in network byte-order */ 00869 while (len-- > 0) { 00870 double d; 00871 DOUBLE_CONVWITH(dtmp); 00872 00873 from = NEXTFROM; 00874 d = RFLOAT_VALUE(rb_to_float(from)); 00875 d = HTOND(d,dtmp); 00876 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 00877 } 00878 break; 00879 00880 case 'x': /* null byte */ 00881 grow: 00882 while (len >= 10) { 00883 rb_str_buf_cat(res, nul10, 10); 00884 len -= 10; 00885 } 00886 rb_str_buf_cat(res, nul10, len); 00887 break; 00888 00889 case 'X': /* back up byte */ 00890 shrink: 00891 plen = RSTRING_LEN(res); 00892 if (plen < len) 00893 rb_raise(rb_eArgError, "X outside of string"); 00894 rb_str_set_len(res, plen - len); 00895 break; 00896 00897 case '@': /* null fill to absolute position */ 00898 len -= RSTRING_LEN(res); 00899 if (len > 0) goto grow; 00900 len = -len; 00901 if (len > 0) goto shrink; 00902 break; 00903 00904 case '%': 00905 rb_raise(rb_eArgError, "%% is not supported"); 00906 break; 00907 00908 case 'U': /* Unicode character */ 00909 while (len-- > 0) { 00910 SIGNED_VALUE l; 00911 char buf[8]; 00912 int le; 00913 00914 from = NEXTFROM; 00915 from = rb_to_int(from); 00916 l = NUM2LONG(from); 00917 if (l < 0) { 00918 rb_raise(rb_eRangeError, "pack(U): value out of range"); 00919 } 00920 le = rb_uv_to_utf8(buf, l); 00921 rb_str_buf_cat(res, (char*)buf, le); 00922 } 00923 break; 00924 00925 case 'u': /* uuencoded string */ 00926 case 'm': /* base64 encoded string */ 00927 from = NEXTFROM; 00928 StringValue(from); 00929 ptr = RSTRING_PTR(from); 00930 plen = RSTRING_LEN(from); 00931 00932 if (len == 0 && type == 'm') { 00933 encodes(res, ptr, plen, type, 0); 00934 ptr += plen; 00935 break; 00936 } 00937 if (len <= 2) 00938 len = 45; 00939 else 00940 len = len / 3 * 3; 00941 while (plen > 0) { 00942 long todo; 00943 00944 if (plen > len) 00945 todo = len; 00946 else 00947 todo = plen; 00948 encodes(res, ptr, todo, type, 1); 00949 plen -= todo; 00950 ptr += todo; 00951 } 00952 break; 00953 00954 case 'M': /* quoted-printable encoded string */ 00955 from = rb_obj_as_string(NEXTFROM); 00956 if (len <= 1) 00957 len = 72; 00958 qpencode(res, from, len); 00959 break; 00960 00961 case 'P': /* pointer to packed byte string */ 00962 from = THISFROM; 00963 if (!NIL_P(from)) { 00964 StringValue(from); 00965 if (RSTRING_LEN(from) < len) { 00966 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", 00967 RSTRING_LEN(from), len); 00968 } 00969 } 00970 len = 1; 00971 /* FALL THROUGH */ 00972 case 'p': /* pointer to string */ 00973 while (len-- > 0) { 00974 char *t; 00975 from = NEXTFROM; 00976 if (NIL_P(from)) { 00977 t = 0; 00978 } 00979 else { 00980 t = StringValuePtr(from); 00981 } 00982 if (!associates) { 00983 associates = rb_ary_new(); 00984 } 00985 rb_ary_push(associates, from); 00986 rb_obj_taint(from); 00987 rb_str_buf_cat(res, (char*)&t, sizeof(char*)); 00988 } 00989 break; 00990 00991 case 'w': /* BER compressed integer */ 00992 while (len-- > 0) { 00993 unsigned long ul; 00994 VALUE buf = rb_str_new(0, 0); 00995 char c, *bufs, *bufe; 00996 00997 from = NEXTFROM; 00998 if (TYPE(from) == T_BIGNUM) { 00999 VALUE big128 = rb_uint2big(128); 01000 while (TYPE(from) == T_BIGNUM) { 01001 from = rb_big_divmod(from, big128); 01002 c = NUM2INT(RARRAY_PTR(from)[1]) | 0x80; /* mod */ 01003 rb_str_buf_cat(buf, &c, sizeof(char)); 01004 from = RARRAY_PTR(from)[0]; /* div */ 01005 } 01006 } 01007 01008 { 01009 long l = NUM2LONG(from); 01010 if (l < 0) { 01011 rb_raise(rb_eArgError, "can't compress negative numbers"); 01012 } 01013 ul = l; 01014 } 01015 01016 while (ul) { 01017 c = (char)(ul & 0x7f) | 0x80; 01018 rb_str_buf_cat(buf, &c, sizeof(char)); 01019 ul >>= 7; 01020 } 01021 01022 if (RSTRING_LEN(buf)) { 01023 bufs = RSTRING_PTR(buf); 01024 bufe = bufs + RSTRING_LEN(buf) - 1; 01025 *bufs &= 0x7f; /* clear continue bit */ 01026 while (bufs < bufe) { /* reverse */ 01027 c = *bufs; 01028 *bufs++ = *bufe; 01029 *bufe-- = c; 01030 } 01031 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); 01032 } 01033 else { 01034 c = 0; 01035 rb_str_buf_cat(res, &c, sizeof(char)); 01036 } 01037 } 01038 break; 01039 01040 default: 01041 break; 01042 } 01043 } 01044 01045 if (associates) { 01046 rb_str_associate(res, associates); 01047 } 01048 OBJ_INFECT(res, fmt); 01049 switch (enc_info) { 01050 case 1: 01051 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); 01052 break; 01053 case 2: 01054 rb_enc_set_index(res, rb_utf8_encindex()); 01055 break; 01056 default: 01057 /* do nothing, keep ASCII-8BIT */ 01058 break; 01059 } 01060 return res; 01061 } 01062 01063 static const char uu_table[] = 01064 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; 01065 static const char b64_table[] = 01066 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 01067 01068 static void 01069 encodes(VALUE str, const char *s, long len, int type, int tail_lf) 01070 { 01071 char buff[4096]; 01072 long i = 0; 01073 const char *trans = type == 'u' ? uu_table : b64_table; 01074 int padding; 01075 01076 if (type == 'u') { 01077 buff[i++] = (char)len + ' '; 01078 padding = '`'; 01079 } 01080 else { 01081 padding = '='; 01082 } 01083 while (len >= 3) { 01084 while (len >= 3 && sizeof(buff)-i >= 4) { 01085 buff[i++] = trans[077 & (*s >> 2)]; 01086 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; 01087 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; 01088 buff[i++] = trans[077 & s[2]]; 01089 s += 3; 01090 len -= 3; 01091 } 01092 if (sizeof(buff)-i < 4) { 01093 rb_str_buf_cat(str, buff, i); 01094 i = 0; 01095 } 01096 } 01097 01098 if (len == 2) { 01099 buff[i++] = trans[077 & (*s >> 2)]; 01100 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; 01101 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; 01102 buff[i++] = padding; 01103 } 01104 else if (len == 1) { 01105 buff[i++] = trans[077 & (*s >> 2)]; 01106 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; 01107 buff[i++] = padding; 01108 buff[i++] = padding; 01109 } 01110 if (tail_lf) buff[i++] = '\n'; 01111 rb_str_buf_cat(str, buff, i); 01112 } 01113 01114 static const char hex_table[] = "0123456789ABCDEF"; 01115 01116 static void 01117 qpencode(VALUE str, VALUE from, long len) 01118 { 01119 char buff[1024]; 01120 long i = 0, n = 0, prev = EOF; 01121 unsigned char *s = (unsigned char*)RSTRING_PTR(from); 01122 unsigned char *send = s + RSTRING_LEN(from); 01123 01124 while (s < send) { 01125 if ((*s > 126) || 01126 (*s < 32 && *s != '\n' && *s != '\t') || 01127 (*s == '=')) { 01128 buff[i++] = '='; 01129 buff[i++] = hex_table[*s >> 4]; 01130 buff[i++] = hex_table[*s & 0x0f]; 01131 n += 3; 01132 prev = EOF; 01133 } 01134 else if (*s == '\n') { 01135 if (prev == ' ' || prev == '\t') { 01136 buff[i++] = '='; 01137 buff[i++] = *s; 01138 } 01139 buff[i++] = *s; 01140 n = 0; 01141 prev = *s; 01142 } 01143 else { 01144 buff[i++] = *s; 01145 n++; 01146 prev = *s; 01147 } 01148 if (n > len) { 01149 buff[i++] = '='; 01150 buff[i++] = '\n'; 01151 n = 0; 01152 prev = '\n'; 01153 } 01154 if (i > 1024 - 5) { 01155 rb_str_buf_cat(str, buff, i); 01156 i = 0; 01157 } 01158 s++; 01159 } 01160 if (n > 0) { 01161 buff[i++] = '='; 01162 buff[i++] = '\n'; 01163 } 01164 if (i > 0) { 01165 rb_str_buf_cat(str, buff, i); 01166 } 01167 } 01168 01169 static inline int 01170 hex2num(char c) 01171 { 01172 switch (c) { 01173 case '0': case '1': case '2': case '3': case '4': 01174 case '5': case '6': case '7': case '8': case '9': 01175 return c - '0'; 01176 case 'a': case 'b': case 'c': 01177 case 'd': case 'e': case 'f': 01178 return c - 'a' + 10; 01179 case 'A': case 'B': case 'C': 01180 case 'D': case 'E': case 'F': 01181 return c - 'A' + 10; 01182 default: 01183 return -1; 01184 } 01185 } 01186 01187 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \ 01188 tmp_len = 0; \ 01189 if (len > (long)((send-s)/(sz))) { \ 01190 if (!star) { \ 01191 tmp_len = len-(send-s)/(sz); \ 01192 } \ 01193 len = (send-s)/(sz); \ 01194 } \ 01195 } while (0) 01196 01197 #define PACK_ITEM_ADJUST() do { \ 01198 if (tmp_len > 0 && !block_p) \ 01199 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ 01200 } while (0) 01201 01202 static VALUE 01203 infected_str_new(const char *ptr, long len, VALUE str) 01204 { 01205 VALUE s = rb_str_new(ptr, len); 01206 01207 OBJ_INFECT(s, str); 01208 return s; 01209 } 01210 01211 /* 01212 * call-seq: 01213 * str.unpack(format) -> anArray 01214 * 01215 * Decodes <i>str</i> (which may contain binary data) according to the 01216 * format string, returning an array of each value extracted. The 01217 * format string consists of a sequence of single-character directives, 01218 * summarized in the table at the end of this entry. 01219 * Each directive may be followed 01220 * by a number, indicating the number of times to repeat with this 01221 * directive. An asterisk (``<code>*</code>'') will use up all 01222 * remaining elements. The directives <code>sSiIlL</code> may each be 01223 * followed by an underscore (``<code>_</code>'') or 01224 * exclamation mark (``<code>!</code>'') to use the underlying 01225 * platform's native size for the specified type; otherwise, it uses a 01226 * platform-independent consistent size. Spaces are ignored in the 01227 * format string. See also <code>Array#pack</code>. 01228 * 01229 * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] 01230 * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] 01231 * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] 01232 * "aa".unpack('b8B8') #=> ["10000110", "01100001"] 01233 * "aaa".unpack('h2H2c') #=> ["16", "61", 97] 01234 * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] 01235 * "now=20is".unpack('M*') #=> ["now is"] 01236 * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] 01237 * 01238 * This table summarizes the various formats and the Ruby classes 01239 * returned by each. 01240 * 01241 * Integer | | 01242 * Directive | Returns | Meaning 01243 * ----------------------------------------------------------------- 01244 * C | Integer | 8-bit unsigned (unsigned char) 01245 * S | Integer | 16-bit unsigned, native endian (uint16_t) 01246 * L | Integer | 32-bit unsigned, native endian (uint32_t) 01247 * Q | Integer | 64-bit unsigned, native endian (uint64_t) 01248 * | | 01249 * c | Integer | 8-bit signed (signed char) 01250 * s | Integer | 16-bit signed, native endian (int16_t) 01251 * l | Integer | 32-bit signed, native endian (int32_t) 01252 * q | Integer | 64-bit signed, native endian (int64_t) 01253 * | | 01254 * S_, S! | Integer | unsigned short, native endian 01255 * I, I_, I! | Integer | unsigned int, native endian 01256 * L_, L! | Integer | unsigned long, native endian 01257 * | | 01258 * s_, s! | Integer | signed short, native endian 01259 * i, i_, i! | Integer | signed int, native endian 01260 * l_, l! | Integer | signed long, native endian 01261 * | | 01262 * S> L> Q> | Integer | same as the directives without ">" except 01263 * s> l> q> | | big endian 01264 * S!> I!> | | (available since Ruby 1.9.3) 01265 * L!> Q!> | | "S>" is same as "n" 01266 * s!> i!> | | "L>" is same as "N" 01267 * l!> q!> | | 01268 * | | 01269 * S< L< Q< | Integer | same as the directives without "<" except 01270 * s< l< q< | | little endian 01271 * S!< I!< | | (available since Ruby 1.9.3) 01272 * L!< Q!< | | "S<" is same as "v" 01273 * s!< i!< | | "L<" is same as "V" 01274 * l!< q!< | | 01275 * | | 01276 * n | Integer | 16-bit unsigned, network (big-endian) byte order 01277 * N | Integer | 32-bit unsigned, network (big-endian) byte order 01278 * v | Integer | 16-bit unsigned, VAX (little-endian) byte order 01279 * V | Integer | 32-bit unsigned, VAX (little-endian) byte order 01280 * | | 01281 * U | Integer | UTF-8 character 01282 * w | Integer | BER-compressed integer (see Array.pack) 01283 * 01284 * Float | | 01285 * Directive | Returns | Meaning 01286 * ----------------------------------------------------------------- 01287 * D, d | Float | double-precision, native format 01288 * F, f | Float | single-precision, native format 01289 * E | Float | double-precision, little-endian byte order 01290 * e | Float | single-precision, little-endian byte order 01291 * G | Float | double-precision, network (big-endian) byte order 01292 * g | Float | single-precision, network (big-endian) byte order 01293 * 01294 * String | | 01295 * Directive | Returns | Meaning 01296 * ----------------------------------------------------------------- 01297 * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces) 01298 * a | String | arbitrary binary string 01299 * Z | String | null-terminated string 01300 * B | String | bit string (MSB first) 01301 * b | String | bit string (LSB first) 01302 * H | String | hex string (high nibble first) 01303 * h | String | hex string (low nibble first) 01304 * u | String | UU-encoded string 01305 * M | String | quoted-printable, MIME encoding (see RFC2045) 01306 * m | String | base64 encoded string (RFC 2045) (default) 01307 * | | base64 encoded string (RFC 4648) if followed by 0 01308 * P | String | pointer to a structure (fixed-length string) 01309 * p | String | pointer to a null-terminated string 01310 * 01311 * Misc. | | 01312 * Directive | Returns | Meaning 01313 * ----------------------------------------------------------------- 01314 * @ | --- | skip to the offset given by the length argument 01315 * X | --- | skip backward one byte 01316 * x | --- | skip forward one byte 01317 */ 01318 01319 static VALUE 01320 pack_unpack(VALUE str, VALUE fmt) 01321 { 01322 static const char hexdigits[] = "0123456789abcdef"; 01323 char *s, *send; 01324 char *p, *pend; 01325 VALUE ary; 01326 char type; 01327 long len, tmp_len; 01328 int star; 01329 #ifdef NATINT_PACK 01330 int natint; /* native integer */ 01331 #endif 01332 int block_p = rb_block_given_p(); 01333 int signed_p, integer_size, bigendian_p; 01334 #define UNPACK_PUSH(item) do {\ 01335 VALUE item_val = (item);\ 01336 if (block_p) {\ 01337 rb_yield(item_val);\ 01338 }\ 01339 else {\ 01340 rb_ary_push(ary, item_val);\ 01341 }\ 01342 } while (0) 01343 01344 StringValue(str); 01345 StringValue(fmt); 01346 s = RSTRING_PTR(str); 01347 send = s + RSTRING_LEN(str); 01348 p = RSTRING_PTR(fmt); 01349 pend = p + RSTRING_LEN(fmt); 01350 01351 ary = block_p ? Qnil : rb_ary_new(); 01352 while (p < pend) { 01353 int explicit_endian = 0; 01354 type = *p++; 01355 #ifdef NATINT_PACK 01356 natint = 0; 01357 #endif 01358 01359 if (ISSPACE(type)) continue; 01360 if (type == '#') { 01361 while ((p < pend) && (*p != '\n')) { 01362 p++; 01363 } 01364 continue; 01365 } 01366 01367 star = 0; 01368 { 01369 static const char natstr[] = "sSiIlL"; 01370 static const char endstr[] = "sSiIlLqQ"; 01371 01372 modifiers: 01373 switch (*p) { 01374 case '_': 01375 case '!': 01376 01377 if (strchr(natstr, type)) { 01378 #ifdef NATINT_PACK 01379 natint = 1; 01380 #endif 01381 p++; 01382 } 01383 else { 01384 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); 01385 } 01386 goto modifiers; 01387 01388 case '<': 01389 case '>': 01390 if (!strchr(endstr, type)) { 01391 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); 01392 } 01393 if (explicit_endian) { 01394 rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); 01395 } 01396 explicit_endian = *p++; 01397 goto modifiers; 01398 } 01399 } 01400 01401 if (p >= pend) 01402 len = 1; 01403 else if (*p == '*') { 01404 star = 1; 01405 len = send - s; 01406 p++; 01407 } 01408 else if (ISDIGIT(*p)) { 01409 errno = 0; 01410 len = STRTOUL(p, (char**)&p, 10); 01411 if (errno) { 01412 rb_raise(rb_eRangeError, "pack length too big"); 01413 } 01414 } 01415 else { 01416 len = (type != '@'); 01417 } 01418 01419 switch (type) { 01420 case '%': 01421 rb_raise(rb_eArgError, "%% is not supported"); 01422 break; 01423 01424 case 'A': 01425 if (len > send - s) len = send - s; 01426 { 01427 long end = len; 01428 char *t = s + len - 1; 01429 01430 while (t >= s) { 01431 if (*t != ' ' && *t != '\0') break; 01432 t--; len--; 01433 } 01434 UNPACK_PUSH(infected_str_new(s, len, str)); 01435 s += end; 01436 } 01437 break; 01438 01439 case 'Z': 01440 { 01441 char *t = s; 01442 01443 if (len > send-s) len = send-s; 01444 while (t < s+len && *t) t++; 01445 UNPACK_PUSH(infected_str_new(s, t-s, str)); 01446 if (t < send) t++; 01447 s = star ? t : s+len; 01448 } 01449 break; 01450 01451 case 'a': 01452 if (len > send - s) len = send - s; 01453 UNPACK_PUSH(infected_str_new(s, len, str)); 01454 s += len; 01455 break; 01456 01457 case 'b': 01458 { 01459 VALUE bitstr; 01460 char *t; 01461 int bits; 01462 long i; 01463 01464 if (p[-1] == '*' || len > (send - s) * 8) 01465 len = (send - s) * 8; 01466 bits = 0; 01467 UNPACK_PUSH(bitstr = rb_str_new(0, len)); 01468 t = RSTRING_PTR(bitstr); 01469 for (i=0; i<len; i++) { 01470 if (i & 7) bits >>= 1; 01471 else bits = *s++; 01472 *t++ = (bits & 1) ? '1' : '0'; 01473 } 01474 } 01475 break; 01476 01477 case 'B': 01478 { 01479 VALUE bitstr; 01480 char *t; 01481 int bits; 01482 long i; 01483 01484 if (p[-1] == '*' || len > (send - s) * 8) 01485 len = (send - s) * 8; 01486 bits = 0; 01487 UNPACK_PUSH(bitstr = rb_str_new(0, len)); 01488 t = RSTRING_PTR(bitstr); 01489 for (i=0; i<len; i++) { 01490 if (i & 7) bits <<= 1; 01491 else bits = *s++; 01492 *t++ = (bits & 128) ? '1' : '0'; 01493 } 01494 } 01495 break; 01496 01497 case 'h': 01498 { 01499 VALUE bitstr; 01500 char *t; 01501 int bits; 01502 long i; 01503 01504 if (p[-1] == '*' || len > (send - s) * 2) 01505 len = (send - s) * 2; 01506 bits = 0; 01507 UNPACK_PUSH(bitstr = rb_str_new(0, len)); 01508 t = RSTRING_PTR(bitstr); 01509 for (i=0; i<len; i++) { 01510 if (i & 1) 01511 bits >>= 4; 01512 else 01513 bits = *s++; 01514 *t++ = hexdigits[bits & 15]; 01515 } 01516 } 01517 break; 01518 01519 case 'H': 01520 { 01521 VALUE bitstr; 01522 char *t; 01523 int bits; 01524 long i; 01525 01526 if (p[-1] == '*' || len > (send - s) * 2) 01527 len = (send - s) * 2; 01528 bits = 0; 01529 UNPACK_PUSH(bitstr = rb_str_new(0, len)); 01530 t = RSTRING_PTR(bitstr); 01531 for (i=0; i<len; i++) { 01532 if (i & 1) 01533 bits <<= 4; 01534 else 01535 bits = *s++; 01536 *t++ = hexdigits[(bits >> 4) & 15]; 01537 } 01538 } 01539 break; 01540 01541 case 'c': 01542 PACK_LENGTH_ADJUST_SIZE(sizeof(char)); 01543 while (len-- > 0) { 01544 int c = *s++; 01545 if (c > (char)127) c-=256; 01546 UNPACK_PUSH(INT2FIX(c)); 01547 } 01548 PACK_ITEM_ADJUST(); 01549 break; 01550 01551 case 'C': 01552 PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char)); 01553 while (len-- > 0) { 01554 unsigned char c = *s++; 01555 UNPACK_PUSH(INT2FIX(c)); 01556 } 01557 PACK_ITEM_ADJUST(); 01558 break; 01559 01560 case 's': 01561 signed_p = 1; 01562 integer_size = NATINT_LEN(short, 2); 01563 bigendian_p = BIGENDIAN_P(); 01564 goto unpack_integer; 01565 01566 case 'S': 01567 signed_p = 0; 01568 integer_size = NATINT_LEN(short, 2); 01569 bigendian_p = BIGENDIAN_P(); 01570 goto unpack_integer; 01571 01572 case 'i': 01573 signed_p = 1; 01574 integer_size = (int)sizeof(int); 01575 bigendian_p = BIGENDIAN_P(); 01576 goto unpack_integer; 01577 01578 case 'I': 01579 signed_p = 0; 01580 integer_size = (int)sizeof(int); 01581 bigendian_p = BIGENDIAN_P(); 01582 goto unpack_integer; 01583 01584 case 'l': 01585 signed_p = 1; 01586 integer_size = NATINT_LEN(long, 4); 01587 bigendian_p = BIGENDIAN_P(); 01588 goto unpack_integer; 01589 01590 case 'L': 01591 signed_p = 0; 01592 integer_size = NATINT_LEN(long, 4); 01593 bigendian_p = BIGENDIAN_P(); 01594 goto unpack_integer; 01595 01596 case 'q': 01597 signed_p = 1; 01598 integer_size = 8; 01599 bigendian_p = BIGENDIAN_P(); 01600 goto unpack_integer; 01601 01602 case 'Q': 01603 signed_p = 0; 01604 integer_size = 8; 01605 bigendian_p = BIGENDIAN_P(); 01606 goto unpack_integer; 01607 01608 case 'n': 01609 signed_p = 0; 01610 integer_size = 2; 01611 bigendian_p = 1; 01612 goto unpack_integer; 01613 01614 case 'N': 01615 signed_p = 0; 01616 integer_size = 4; 01617 bigendian_p = 1; 01618 goto unpack_integer; 01619 01620 case 'v': 01621 signed_p = 0; 01622 integer_size = 2; 01623 bigendian_p = 0; 01624 goto unpack_integer; 01625 01626 case 'V': 01627 signed_p = 0; 01628 integer_size = 4; 01629 bigendian_p = 0; 01630 goto unpack_integer; 01631 01632 unpack_integer: 01633 if (explicit_endian) { 01634 bigendian_p = explicit_endian == '>'; 01635 } 01636 01637 switch (integer_size) { 01638 #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK) 01639 case SIZEOF_INT16_T: 01640 if (signed_p) { 01641 PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t)); 01642 while (len-- > 0) { 01643 union { 01644 int16_t i; 01645 char a[sizeof(int16_t)]; 01646 } v; 01647 memcpy(v.a, s, sizeof(int16_t)); 01648 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 01649 s += sizeof(int16_t); 01650 UNPACK_PUSH(INT2FIX(v.i)); 01651 } 01652 PACK_ITEM_ADJUST(); 01653 } 01654 else { 01655 PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t)); 01656 while (len-- > 0) { 01657 union { 01658 uint16_t i; 01659 char a[sizeof(uint16_t)]; 01660 } v; 01661 memcpy(v.a, s, sizeof(uint16_t)); 01662 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 01663 s += sizeof(uint16_t); 01664 UNPACK_PUSH(INT2FIX(v.i)); 01665 } 01666 PACK_ITEM_ADJUST(); 01667 } 01668 break; 01669 #endif 01670 01671 #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK) 01672 case SIZEOF_INT32_T: 01673 if (signed_p) { 01674 PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t)); 01675 while (len-- > 0) { 01676 union { 01677 int32_t i; 01678 char a[sizeof(int32_t)]; 01679 } v; 01680 memcpy(v.a, s, sizeof(int32_t)); 01681 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 01682 s += sizeof(int32_t); 01683 UNPACK_PUSH(INT2NUM(v.i)); 01684 } 01685 PACK_ITEM_ADJUST(); 01686 } 01687 else { 01688 PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t)); 01689 while (len-- > 0) { 01690 union { 01691 uint32_t i; 01692 char a[sizeof(uint32_t)]; 01693 } v; 01694 memcpy(v.a, s, sizeof(uint32_t)); 01695 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 01696 s += sizeof(uint32_t); 01697 UNPACK_PUSH(UINT2NUM(v.i)); 01698 } 01699 PACK_ITEM_ADJUST(); 01700 } 01701 break; 01702 #endif 01703 01704 #if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK) 01705 case SIZEOF_INT64_T: 01706 if (signed_p) { 01707 PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t)); 01708 while (len-- > 0) { 01709 union { 01710 int64_t i; 01711 char a[sizeof(int64_t)]; 01712 } v; 01713 memcpy(v.a, s, sizeof(int64_t)); 01714 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 01715 s += sizeof(int64_t); 01716 UNPACK_PUSH(INT64toNUM(v.i)); 01717 } 01718 PACK_ITEM_ADJUST(); 01719 } 01720 else { 01721 PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t)); 01722 while (len-- > 0) { 01723 union { 01724 uint64_t i; 01725 char a[sizeof(uint64_t)]; 01726 } v; 01727 memcpy(v.a, s, sizeof(uint64_t)); 01728 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 01729 s += sizeof(uint64_t); 01730 UNPACK_PUSH(UINT64toNUM(v.i)); 01731 } 01732 PACK_ITEM_ADJUST(); 01733 } 01734 break; 01735 #endif 01736 01737 default: 01738 if (integer_size > MAX_INTEGER_PACK_SIZE) 01739 rb_bug("unexpected intger size for pack: %d", integer_size); 01740 PACK_LENGTH_ADJUST_SIZE(integer_size); 01741 while (len-- > 0) { 01742 union { 01743 unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG]; 01744 char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG]; 01745 } v; 01746 int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG; 01747 int i; 01748 01749 if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0) 01750 memset(v.a, 0xff, sizeof(long)*num_longs); 01751 else 01752 memset(v.a, 0, sizeof(long)*num_longs); 01753 if (bigendian_p) 01754 memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size); 01755 else 01756 memcpy(v.a, s, integer_size); 01757 if (bigendian_p) { 01758 for (i = 0; i < num_longs/2; i++) { 01759 unsigned long t = v.i[i]; 01760 v.i[i] = v.i[num_longs-1-i]; 01761 v.i[num_longs-1-i] = t; 01762 } 01763 } 01764 if (bigendian_p != BIGENDIAN_P()) { 01765 for (i = 0; i < num_longs; i++) 01766 v.i[i] = swapl(v.i[i]); 01767 } 01768 s += integer_size; 01769 UNPACK_PUSH(rb_big_unpack(v.i, num_longs)); 01770 } 01771 PACK_ITEM_ADJUST(); 01772 break; 01773 } 01774 break; 01775 01776 case 'f': 01777 case 'F': 01778 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 01779 while (len-- > 0) { 01780 float tmp; 01781 memcpy(&tmp, s, sizeof(float)); 01782 s += sizeof(float); 01783 UNPACK_PUSH(DBL2NUM((double)tmp)); 01784 } 01785 PACK_ITEM_ADJUST(); 01786 break; 01787 01788 case 'e': 01789 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 01790 while (len-- > 0) { 01791 float tmp; 01792 FLOAT_CONVWITH(ftmp); 01793 01794 memcpy(&tmp, s, sizeof(float)); 01795 s += sizeof(float); 01796 tmp = VTOHF(tmp,ftmp); 01797 UNPACK_PUSH(DBL2NUM((double)tmp)); 01798 } 01799 PACK_ITEM_ADJUST(); 01800 break; 01801 01802 case 'E': 01803 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 01804 while (len-- > 0) { 01805 double tmp; 01806 DOUBLE_CONVWITH(dtmp); 01807 01808 memcpy(&tmp, s, sizeof(double)); 01809 s += sizeof(double); 01810 tmp = VTOHD(tmp,dtmp); 01811 UNPACK_PUSH(DBL2NUM(tmp)); 01812 } 01813 PACK_ITEM_ADJUST(); 01814 break; 01815 01816 case 'D': 01817 case 'd': 01818 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 01819 while (len-- > 0) { 01820 double tmp; 01821 memcpy(&tmp, s, sizeof(double)); 01822 s += sizeof(double); 01823 UNPACK_PUSH(DBL2NUM(tmp)); 01824 } 01825 PACK_ITEM_ADJUST(); 01826 break; 01827 01828 case 'g': 01829 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 01830 while (len-- > 0) { 01831 float tmp; 01832 FLOAT_CONVWITH(ftmp); 01833 01834 memcpy(&tmp, s, sizeof(float)); 01835 s += sizeof(float); 01836 tmp = NTOHF(tmp,ftmp); 01837 UNPACK_PUSH(DBL2NUM((double)tmp)); 01838 } 01839 PACK_ITEM_ADJUST(); 01840 break; 01841 01842 case 'G': 01843 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 01844 while (len-- > 0) { 01845 double tmp; 01846 DOUBLE_CONVWITH(dtmp); 01847 01848 memcpy(&tmp, s, sizeof(double)); 01849 s += sizeof(double); 01850 tmp = NTOHD(tmp,dtmp); 01851 UNPACK_PUSH(DBL2NUM(tmp)); 01852 } 01853 PACK_ITEM_ADJUST(); 01854 break; 01855 01856 case 'U': 01857 if (len > send - s) len = send - s; 01858 while (len > 0 && s < send) { 01859 long alen = send - s; 01860 unsigned long l; 01861 01862 l = utf8_to_uv(s, &alen); 01863 s += alen; len--; 01864 UNPACK_PUSH(ULONG2NUM(l)); 01865 } 01866 break; 01867 01868 case 'u': 01869 { 01870 VALUE buf = infected_str_new(0, (send - s)*3/4, str); 01871 char *ptr = RSTRING_PTR(buf); 01872 long total = 0; 01873 01874 while (s < send && *s > ' ' && *s < 'a') { 01875 long a,b,c,d; 01876 char hunk[4]; 01877 01878 hunk[3] = '\0'; 01879 len = (*s++ - ' ') & 077; 01880 total += len; 01881 if (total > RSTRING_LEN(buf)) { 01882 len -= total - RSTRING_LEN(buf); 01883 total = RSTRING_LEN(buf); 01884 } 01885 01886 while (len > 0) { 01887 long mlen = len > 3 ? 3 : len; 01888 01889 if (s < send && *s >= ' ') 01890 a = (*s++ - ' ') & 077; 01891 else 01892 a = 0; 01893 if (s < send && *s >= ' ') 01894 b = (*s++ - ' ') & 077; 01895 else 01896 b = 0; 01897 if (s < send && *s >= ' ') 01898 c = (*s++ - ' ') & 077; 01899 else 01900 c = 0; 01901 if (s < send && *s >= ' ') 01902 d = (*s++ - ' ') & 077; 01903 else 01904 d = 0; 01905 hunk[0] = (char)(a << 2 | b >> 4); 01906 hunk[1] = (char)(b << 4 | c >> 2); 01907 hunk[2] = (char)(c << 6 | d); 01908 memcpy(ptr, hunk, mlen); 01909 ptr += mlen; 01910 len -= mlen; 01911 } 01912 if (*s == '\r') s++; 01913 if (*s == '\n') s++; 01914 else if (s < send && (s+1 == send || s[1] == '\n')) 01915 s += 2; /* possible checksum byte */ 01916 } 01917 01918 rb_str_set_len(buf, total); 01919 UNPACK_PUSH(buf); 01920 } 01921 break; 01922 01923 case 'm': 01924 { 01925 VALUE buf = infected_str_new(0, (send - s)*3/4, str); 01926 char *ptr = RSTRING_PTR(buf); 01927 int a = -1,b = -1,c = 0,d = 0; 01928 static signed char b64_xtable[256]; 01929 01930 if (b64_xtable['/'] <= 0) { 01931 int i; 01932 01933 for (i = 0; i < 256; i++) { 01934 b64_xtable[i] = -1; 01935 } 01936 for (i = 0; i < 64; i++) { 01937 b64_xtable[(unsigned char)b64_table[i]] = i; 01938 } 01939 } 01940 if (len == 0) { 01941 while (s < send) { 01942 a = b = c = d = -1; 01943 a = b64_xtable[(unsigned char)*s++]; 01944 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); 01945 b = b64_xtable[(unsigned char)*s++]; 01946 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); 01947 if (*s == '=') { 01948 if (s + 2 == send && *(s + 1) == '=') break; 01949 rb_raise(rb_eArgError, "invalid base64"); 01950 } 01951 c = b64_xtable[(unsigned char)*s++]; 01952 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); 01953 if (s + 1 == send && *s == '=') break; 01954 d = b64_xtable[(unsigned char)*s++]; 01955 if (d == -1) rb_raise(rb_eArgError, "invalid base64"); 01956 *ptr++ = a << 2 | b >> 4; 01957 *ptr++ = b << 4 | c >> 2; 01958 *ptr++ = c << 6 | d; 01959 } 01960 if (c == -1) { 01961 *ptr++ = a << 2 | b >> 4; 01962 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); 01963 } 01964 else if (d == -1) { 01965 *ptr++ = a << 2 | b >> 4; 01966 *ptr++ = b << 4 | c >> 2; 01967 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); 01968 } 01969 } 01970 else { 01971 while (s < send) { 01972 a = b = c = d = -1; 01973 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} 01974 if (s >= send) break; 01975 s++; 01976 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} 01977 if (s >= send) break; 01978 s++; 01979 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} 01980 if (*s == '=' || s >= send) break; 01981 s++; 01982 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} 01983 if (*s == '=' || s >= send) break; 01984 s++; 01985 *ptr++ = a << 2 | b >> 4; 01986 *ptr++ = b << 4 | c >> 2; 01987 *ptr++ = c << 6 | d; 01988 } 01989 if (a != -1 && b != -1) { 01990 if (c == -1 && *s == '=') 01991 *ptr++ = a << 2 | b >> 4; 01992 else if (c != -1 && *s == '=') { 01993 *ptr++ = a << 2 | b >> 4; 01994 *ptr++ = b << 4 | c >> 2; 01995 } 01996 } 01997 } 01998 rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); 01999 UNPACK_PUSH(buf); 02000 } 02001 break; 02002 02003 case 'M': 02004 { 02005 VALUE buf = infected_str_new(0, send - s, str); 02006 char *ptr = RSTRING_PTR(buf), *ss = s; 02007 int c1, c2; 02008 02009 while (s < send) { 02010 if (*s == '=') { 02011 if (++s == send) break; 02012 if (s+1 < send && *s == '\r' && *(s+1) == '\n') 02013 s++; 02014 if (*s != '\n') { 02015 if ((c1 = hex2num(*s)) == -1) break; 02016 if (++s == send) break; 02017 if ((c2 = hex2num(*s)) == -1) break; 02018 *ptr++ = c1 << 4 | c2; 02019 } 02020 } 02021 else { 02022 *ptr++ = *s; 02023 } 02024 s++; 02025 ss = s; 02026 } 02027 rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); 02028 rb_str_buf_cat(buf, ss, send-ss); 02029 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID); 02030 UNPACK_PUSH(buf); 02031 } 02032 break; 02033 02034 case '@': 02035 if (len > RSTRING_LEN(str)) 02036 rb_raise(rb_eArgError, "@ outside of string"); 02037 s = RSTRING_PTR(str) + len; 02038 break; 02039 02040 case 'X': 02041 if (len > s - RSTRING_PTR(str)) 02042 rb_raise(rb_eArgError, "X outside of string"); 02043 s -= len; 02044 break; 02045 02046 case 'x': 02047 if (len > send - s) 02048 rb_raise(rb_eArgError, "x outside of string"); 02049 s += len; 02050 break; 02051 02052 case 'P': 02053 if (sizeof(char *) <= (size_t)(send - s)) { 02054 VALUE tmp = Qnil; 02055 char *t; 02056 02057 memcpy(&t, s, sizeof(char *)); 02058 s += sizeof(char *); 02059 02060 if (t) { 02061 VALUE a, *p, *pend; 02062 02063 if (!(a = rb_str_associated(str))) { 02064 rb_raise(rb_eArgError, "no associated pointer"); 02065 } 02066 p = RARRAY_PTR(a); 02067 pend = p + RARRAY_LEN(a); 02068 while (p < pend) { 02069 if (TYPE(*p) == T_STRING && RSTRING_PTR(*p) == t) { 02070 if (len < RSTRING_LEN(*p)) { 02071 tmp = rb_tainted_str_new(t, len); 02072 rb_str_associate(tmp, a); 02073 } 02074 else { 02075 tmp = *p; 02076 } 02077 break; 02078 } 02079 p++; 02080 } 02081 if (p == pend) { 02082 rb_raise(rb_eArgError, "non associated pointer"); 02083 } 02084 } 02085 UNPACK_PUSH(tmp); 02086 } 02087 break; 02088 02089 case 'p': 02090 if (len > (long)((send - s) / sizeof(char *))) 02091 len = (send - s) / sizeof(char *); 02092 while (len-- > 0) { 02093 if ((size_t)(send - s) < sizeof(char *)) 02094 break; 02095 else { 02096 VALUE tmp = Qnil; 02097 char *t; 02098 02099 memcpy(&t, s, sizeof(char *)); 02100 s += sizeof(char *); 02101 02102 if (t) { 02103 VALUE a, *p, *pend; 02104 02105 if (!(a = rb_str_associated(str))) { 02106 rb_raise(rb_eArgError, "no associated pointer"); 02107 } 02108 p = RARRAY_PTR(a); 02109 pend = p + RARRAY_LEN(a); 02110 while (p < pend) { 02111 if (TYPE(*p) == T_STRING && RSTRING_PTR(*p) == t) { 02112 tmp = *p; 02113 break; 02114 } 02115 p++; 02116 } 02117 if (p == pend) { 02118 rb_raise(rb_eArgError, "non associated pointer"); 02119 } 02120 } 02121 UNPACK_PUSH(tmp); 02122 } 02123 } 02124 break; 02125 02126 case 'w': 02127 { 02128 unsigned long ul = 0; 02129 unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8); 02130 02131 while (len > 0 && s < send) { 02132 ul <<= 7; 02133 ul |= (*s & 0x7f); 02134 if (!(*s++ & 0x80)) { 02135 UNPACK_PUSH(ULONG2NUM(ul)); 02136 len--; 02137 ul = 0; 02138 } 02139 else if (ul & ulmask) { 02140 VALUE big = rb_uint2big(ul); 02141 VALUE big128 = rb_uint2big(128); 02142 while (s < send) { 02143 big = rb_big_mul(big, big128); 02144 big = rb_big_plus(big, rb_uint2big(*s & 0x7f)); 02145 if (!(*s++ & 0x80)) { 02146 UNPACK_PUSH(big); 02147 len--; 02148 ul = 0; 02149 break; 02150 } 02151 } 02152 } 02153 } 02154 } 02155 break; 02156 02157 default: 02158 break; 02159 } 02160 } 02161 02162 return ary; 02163 } 02164 02165 #define BYTEWIDTH 8 02166 02167 int 02168 rb_uv_to_utf8(char buf[6], unsigned long uv) 02169 { 02170 if (uv <= 0x7f) { 02171 buf[0] = (char)uv; 02172 return 1; 02173 } 02174 if (uv <= 0x7ff) { 02175 buf[0] = (char)((uv>>6)&0xff)|0xc0; 02176 buf[1] = (char)(uv&0x3f)|0x80; 02177 return 2; 02178 } 02179 if (uv <= 0xffff) { 02180 buf[0] = (char)((uv>>12)&0xff)|0xe0; 02181 buf[1] = (char)((uv>>6)&0x3f)|0x80; 02182 buf[2] = (char)(uv&0x3f)|0x80; 02183 return 3; 02184 } 02185 if (uv <= 0x1fffff) { 02186 buf[0] = (char)((uv>>18)&0xff)|0xf0; 02187 buf[1] = (char)((uv>>12)&0x3f)|0x80; 02188 buf[2] = (char)((uv>>6)&0x3f)|0x80; 02189 buf[3] = (char)(uv&0x3f)|0x80; 02190 return 4; 02191 } 02192 if (uv <= 0x3ffffff) { 02193 buf[0] = (char)((uv>>24)&0xff)|0xf8; 02194 buf[1] = (char)((uv>>18)&0x3f)|0x80; 02195 buf[2] = (char)((uv>>12)&0x3f)|0x80; 02196 buf[3] = (char)((uv>>6)&0x3f)|0x80; 02197 buf[4] = (char)(uv&0x3f)|0x80; 02198 return 5; 02199 } 02200 if (uv <= 0x7fffffff) { 02201 buf[0] = (char)((uv>>30)&0xff)|0xfc; 02202 buf[1] = (char)((uv>>24)&0x3f)|0x80; 02203 buf[2] = (char)((uv>>18)&0x3f)|0x80; 02204 buf[3] = (char)((uv>>12)&0x3f)|0x80; 02205 buf[4] = (char)((uv>>6)&0x3f)|0x80; 02206 buf[5] = (char)(uv&0x3f)|0x80; 02207 return 6; 02208 } 02209 rb_raise(rb_eRangeError, "pack(U): value out of range"); 02210 } 02211 02212 static const unsigned long utf8_limits[] = { 02213 0x0, /* 1 */ 02214 0x80, /* 2 */ 02215 0x800, /* 3 */ 02216 0x10000, /* 4 */ 02217 0x200000, /* 5 */ 02218 0x4000000, /* 6 */ 02219 0x80000000, /* 7 */ 02220 }; 02221 02222 static unsigned long 02223 utf8_to_uv(const char *p, long *lenp) 02224 { 02225 int c = *p++ & 0xff; 02226 unsigned long uv = c; 02227 long n; 02228 02229 if (!(uv & 0x80)) { 02230 *lenp = 1; 02231 return uv; 02232 } 02233 if (!(uv & 0x40)) { 02234 *lenp = 1; 02235 rb_raise(rb_eArgError, "malformed UTF-8 character"); 02236 } 02237 02238 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } 02239 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } 02240 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } 02241 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } 02242 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } 02243 else { 02244 *lenp = 1; 02245 rb_raise(rb_eArgError, "malformed UTF-8 character"); 02246 } 02247 if (n > *lenp) { 02248 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", 02249 n, *lenp); 02250 } 02251 *lenp = n--; 02252 if (n != 0) { 02253 while (n--) { 02254 c = *p++ & 0xff; 02255 if ((c & 0xc0) != 0x80) { 02256 *lenp -= n + 1; 02257 rb_raise(rb_eArgError, "malformed UTF-8 character"); 02258 } 02259 else { 02260 c &= 0x3f; 02261 uv = uv << 6 | c; 02262 } 02263 } 02264 } 02265 n = *lenp - 1; 02266 if (uv < utf8_limits[n]) { 02267 rb_raise(rb_eArgError, "redundant UTF-8 sequence"); 02268 } 02269 return uv; 02270 } 02271 02272 void 02273 Init_pack(void) 02274 { 02275 rb_define_method(rb_cArray, "pack", pack_pack, 1); 02276 rb_define_method(rb_cString, "unpack", pack_unpack, 1); 02277 } 02278