Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /* 00002 date_strptime.c: Coded by Tadayoshi Funaba 2011,2012 00003 */ 00004 00005 #include "ruby.h" 00006 #include "ruby/encoding.h" 00007 #include "ruby/re.h" 00008 #include <ctype.h> 00009 00010 static const char *day_names[] = { 00011 "Sunday", "Monday", "Tuesday", "Wednesday", 00012 "Thursday", "Friday", "Saturday", 00013 "Sun", "Mon", "Tue", "Wed", 00014 "Thu", "Fri", "Sat" 00015 }; 00016 00017 static const char *month_names[] = { 00018 "January", "February", "March", "April", 00019 "May", "June", "July", "August", "September", 00020 "October", "November", "December", 00021 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 00022 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 00023 }; 00024 00025 static const char *merid_names[] = { 00026 "am", "pm", 00027 "a.m.", "p.m." 00028 }; 00029 00030 static const char *extz_pats[] = { 00031 ":z", 00032 "::z", 00033 ":::z" 00034 }; 00035 00036 #define sizeof_array(o) (sizeof o / sizeof o[0]) 00037 00038 #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0) 00039 #define f_add(x,y) rb_funcall(x, '+', 1, y) 00040 #define f_sub(x,y) rb_funcall(x, '-', 1, y) 00041 #define f_mul(x,y) rb_funcall(x, '*', 1, y) 00042 #define f_div(x,y) rb_funcall(x, '/', 1, y) 00043 #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y) 00044 #define f_mod(x,y) rb_funcall(x, '%', 1, y) 00045 #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y) 00046 00047 #define f_lt_p(x,y) rb_funcall(x, '<', 1, y) 00048 #define f_gt_p(x,y) rb_funcall(x, '>', 1, y) 00049 #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y) 00050 #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y) 00051 00052 #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s) 00053 #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i) 00054 #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i) 00055 00056 #define issign(c) ((c) == '-' || (c) == '+') 00057 00058 static int 00059 num_pattern_p(const char *s) 00060 { 00061 if (isdigit(*s)) 00062 return 1; 00063 if (*s == '%') { 00064 s++; 00065 if (*s == 'E' || *s == 'O') 00066 s++; 00067 if (*s && 00068 (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) || isdigit(*s))) 00069 return 1; 00070 } 00071 return 0; 00072 } 00073 00074 #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1]) 00075 00076 static long 00077 read_digits(const char *s, VALUE *n, size_t width) 00078 { 00079 size_t l; 00080 00081 l = strspn(s, "0123456789"); 00082 00083 if (l == 0) 00084 return 0; 00085 00086 if (width < l) 00087 l = width; 00088 00089 if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) { 00090 const char *os = s; 00091 long v; 00092 00093 v = 0; 00094 while ((size_t)(s - os) < l) { 00095 v *= 10; 00096 v += *s - '0'; 00097 s++; 00098 } 00099 if (os == s) 00100 return 0; 00101 *n = LONG2NUM(v); 00102 return l; 00103 } 00104 else { 00105 char *s2 = ALLOCA_N(char, l + 1); 00106 memcpy(s2, s, l); 00107 s2[l] = '\0'; 00108 *n = rb_cstr_to_inum(s2, 10, 0); 00109 return l; 00110 } 00111 } 00112 00113 #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v) 00114 #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k))) 00115 #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k))) 00116 00117 #define fail() \ 00118 { \ 00119 set_hash("_fail", Qtrue); \ 00120 return 0; \ 00121 } 00122 00123 #define fail_p() (!NIL_P(ref_hash("_fail"))) 00124 00125 #define READ_DIGITS(n,w) \ 00126 { \ 00127 size_t l; \ 00128 l = read_digits(&str[si], &n, w); \ 00129 if (l == 0) \ 00130 fail(); \ 00131 si += l; \ 00132 } 00133 00134 #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX) 00135 00136 static int 00137 valid_range_p(VALUE v, int a, int b) 00138 { 00139 if (FIXNUM_P(v)) { 00140 int vi = FIX2INT(v); 00141 return !(vi < a || vi > b); 00142 } 00143 return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b))); 00144 } 00145 00146 #define recur(fmt) \ 00147 { \ 00148 size_t l; \ 00149 l = date__strptime_internal(&str[si], slen - si, \ 00150 fmt, sizeof fmt - 1, hash); \ 00151 if (fail_p()) \ 00152 return 0; \ 00153 si += l; \ 00154 } 00155 00156 VALUE date_zone_to_diff(VALUE); 00157 00158 static size_t 00159 date__strptime_internal(const char *str, size_t slen, 00160 const char *fmt, size_t flen, VALUE hash) 00161 { 00162 size_t si, fi; 00163 int c; 00164 00165 si = fi = 0; 00166 00167 while (fi < flen) { 00168 00169 switch (fmt[fi]) { 00170 case '%': 00171 00172 again: 00173 fi++; 00174 c = fmt[fi]; 00175 00176 switch (c) { 00177 case 'E': 00178 if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1])) 00179 goto again; 00180 fi--; 00181 goto ordinal; 00182 case 'O': 00183 if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1])) 00184 goto again; 00185 fi--; 00186 goto ordinal; 00187 case ':': 00188 { 00189 int i; 00190 00191 for (i = 0; i < (int)sizeof_array(extz_pats); i++) 00192 if (strncmp(extz_pats[i], &fmt[fi], 00193 strlen(extz_pats[i])) == 0) { 00194 fi += i; 00195 goto again; 00196 } 00197 fail(); 00198 } 00199 00200 case 'A': 00201 case 'a': 00202 { 00203 int i; 00204 00205 for (i = 0; i < (int)sizeof_array(day_names); i++) { 00206 size_t l = strlen(day_names[i]); 00207 if (strncasecmp(day_names[i], &str[si], l) == 0) { 00208 si += l; 00209 set_hash("wday", INT2FIX(i % 7)); 00210 goto matched; 00211 } 00212 } 00213 fail(); 00214 } 00215 case 'B': 00216 case 'b': 00217 case 'h': 00218 { 00219 int i; 00220 00221 for (i = 0; i < (int)sizeof_array(month_names); i++) { 00222 size_t l = strlen(month_names[i]); 00223 if (strncasecmp(month_names[i], &str[si], l) == 0) { 00224 si += l; 00225 set_hash("mon", INT2FIX((i % 12) + 1)); 00226 goto matched; 00227 } 00228 } 00229 fail(); 00230 } 00231 00232 case 'C': 00233 { 00234 VALUE n; 00235 00236 if (NUM_PATTERN_P()) 00237 READ_DIGITS(n, 2) 00238 else 00239 READ_DIGITS_MAX(n) 00240 set_hash("_cent", n); 00241 goto matched; 00242 } 00243 00244 case 'c': 00245 recur("%a %b %e %H:%M:%S %Y"); 00246 goto matched; 00247 00248 case 'D': 00249 recur("%m/%d/%y"); 00250 goto matched; 00251 00252 case 'd': 00253 case 'e': 00254 { 00255 VALUE n; 00256 00257 if (str[si] == ' ') { 00258 si++; 00259 READ_DIGITS(n, 1); 00260 } else { 00261 READ_DIGITS(n, 2); 00262 } 00263 if (!valid_range_p(n, 1, 31)) 00264 fail(); 00265 set_hash("mday", n); 00266 goto matched; 00267 } 00268 00269 case 'F': 00270 recur("%Y-%m-%d"); 00271 goto matched; 00272 00273 case 'G': 00274 { 00275 VALUE n; 00276 00277 if (NUM_PATTERN_P()) 00278 READ_DIGITS(n, 4) 00279 else 00280 READ_DIGITS_MAX(n) 00281 set_hash("cwyear", n); 00282 goto matched; 00283 } 00284 00285 case 'g': 00286 { 00287 VALUE n; 00288 00289 READ_DIGITS(n, 2); 00290 if (!valid_range_p(n, 0, 99)) 00291 fail(); 00292 set_hash("cwyear",n); 00293 set_hash("_cent", 00294 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20)); 00295 goto matched; 00296 } 00297 00298 case 'H': 00299 case 'k': 00300 { 00301 VALUE n; 00302 00303 if (str[si] == ' ') { 00304 si++; 00305 READ_DIGITS(n, 1); 00306 } else { 00307 READ_DIGITS(n, 2); 00308 } 00309 if (!valid_range_p(n, 0, 24)) 00310 fail(); 00311 set_hash("hour", n); 00312 goto matched; 00313 } 00314 00315 case 'I': 00316 case 'l': 00317 { 00318 VALUE n; 00319 00320 if (str[si] == ' ') { 00321 si++; 00322 READ_DIGITS(n, 1); 00323 } else { 00324 READ_DIGITS(n, 2); 00325 } 00326 if (!valid_range_p(n, 1, 12)) 00327 fail(); 00328 set_hash("hour", n); 00329 goto matched; 00330 } 00331 00332 case 'j': 00333 { 00334 VALUE n; 00335 00336 READ_DIGITS(n, 3); 00337 if (!valid_range_p(n, 1, 366)) 00338 fail(); 00339 set_hash("yday", n); 00340 goto matched; 00341 } 00342 00343 case 'L': 00344 case 'N': 00345 { 00346 VALUE n; 00347 int sign = 1; 00348 size_t osi; 00349 00350 if (issign(str[si])) { 00351 if (str[si] == '-') 00352 sign = -1; 00353 si++; 00354 } 00355 osi = si; 00356 if (NUM_PATTERN_P()) 00357 READ_DIGITS(n, c == 'L' ? 3 : 9) 00358 else 00359 READ_DIGITS_MAX(n) 00360 if (sign == -1) 00361 n = f_negate(n); 00362 set_hash("sec_fraction", 00363 rb_rational_new2(n, 00364 f_expt(INT2FIX(10), 00365 ULONG2NUM(si - osi)))); 00366 goto matched; 00367 } 00368 00369 case 'M': 00370 { 00371 VALUE n; 00372 00373 READ_DIGITS(n, 2); 00374 if (!valid_range_p(n, 0, 59)) 00375 fail(); 00376 set_hash("min", n); 00377 goto matched; 00378 } 00379 00380 case 'm': 00381 { 00382 VALUE n; 00383 00384 READ_DIGITS(n, 2); 00385 if (!valid_range_p(n, 1, 12)) 00386 fail(); 00387 set_hash("mon", n); 00388 goto matched; 00389 } 00390 00391 case 'n': 00392 case 't': 00393 recur(" "); 00394 goto matched; 00395 00396 case 'P': 00397 case 'p': 00398 { 00399 int i; 00400 00401 for (i = 0; i < 4; i++) { 00402 size_t l = strlen(merid_names[i]); 00403 if (strncasecmp(merid_names[i], &str[si], l) == 0) { 00404 si += l; 00405 set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12)); 00406 goto matched; 00407 } 00408 } 00409 fail(); 00410 } 00411 00412 case 'Q': 00413 { 00414 VALUE n; 00415 int sign = 1; 00416 00417 if (str[si] == '-') { 00418 sign = -1; 00419 si++; 00420 } 00421 READ_DIGITS_MAX(n); 00422 if (sign == -1) 00423 n = f_negate(n); 00424 set_hash("seconds", 00425 rb_rational_new2(n, 00426 f_expt(INT2FIX(10), 00427 INT2FIX(3)))); 00428 goto matched; 00429 } 00430 00431 case 'R': 00432 recur("%H:%M"); 00433 goto matched; 00434 00435 case 'r': 00436 recur("%I:%M:%S %p"); 00437 goto matched; 00438 00439 case 'S': 00440 { 00441 VALUE n; 00442 00443 READ_DIGITS(n, 2); 00444 if (!valid_range_p(n, 0, 60)) 00445 fail(); 00446 set_hash("sec", n); 00447 goto matched; 00448 } 00449 00450 case 's': 00451 { 00452 VALUE n; 00453 int sign = 1; 00454 00455 if (str[si] == '-') { 00456 sign = -1; 00457 si++; 00458 } 00459 READ_DIGITS_MAX(n); 00460 if (sign == -1) 00461 n = f_negate(n); 00462 set_hash("seconds", n); 00463 goto matched; 00464 } 00465 00466 case 'T': 00467 recur("%H:%M:%S"); 00468 goto matched; 00469 00470 case 'U': 00471 case 'W': 00472 { 00473 VALUE n; 00474 00475 READ_DIGITS(n, 2); 00476 if (!valid_range_p(n, 0, 53)) 00477 fail(); 00478 set_hash(c == 'U' ? "wnum0" : "wnum1", n); 00479 goto matched; 00480 } 00481 00482 case 'u': 00483 { 00484 VALUE n; 00485 00486 READ_DIGITS(n, 1); 00487 if (!valid_range_p(n, 1, 7)) 00488 fail(); 00489 set_hash("cwday", n); 00490 goto matched; 00491 } 00492 00493 case 'V': 00494 { 00495 VALUE n; 00496 00497 READ_DIGITS(n, 2); 00498 if (!valid_range_p(n, 1, 53)) 00499 fail(); 00500 set_hash("cweek", n); 00501 goto matched; 00502 } 00503 00504 case 'v': 00505 recur("%e-%b-%Y"); 00506 goto matched; 00507 00508 case 'w': 00509 { 00510 VALUE n; 00511 00512 READ_DIGITS(n, 1); 00513 if (!valid_range_p(n, 0, 6)) 00514 fail(); 00515 set_hash("wday", n); 00516 goto matched; 00517 } 00518 00519 case 'X': 00520 recur("%H:%M:%S"); 00521 goto matched; 00522 00523 case 'x': 00524 recur("%m/%d/%y"); 00525 goto matched; 00526 00527 case 'Y': 00528 { 00529 VALUE n; 00530 int sign = 1; 00531 00532 if (issign(str[si])) { 00533 if (str[si] == '-') 00534 sign = -1; 00535 si++; 00536 } 00537 if (NUM_PATTERN_P()) 00538 READ_DIGITS(n, 4) 00539 else 00540 READ_DIGITS_MAX(n) 00541 if (sign == -1) 00542 n = f_negate(n); 00543 set_hash("year", n); 00544 goto matched; 00545 } 00546 00547 case 'y': 00548 { 00549 VALUE n; 00550 int sign = 1; 00551 00552 READ_DIGITS(n, 2); 00553 if (!valid_range_p(n, 0, 99)) 00554 fail(); 00555 if (sign == -1) 00556 n = f_negate(n); 00557 set_hash("year", n); 00558 set_hash("_cent", 00559 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20)); 00560 goto matched; 00561 } 00562 00563 case 'Z': 00564 case 'z': 00565 { 00566 static const char pat_source[] = 00567 "\\A(" 00568 "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?" 00569 "|[[:alpha:].\\s]+(?:standard|daylight)\\s+time\\b" 00570 "|[[:alpha:]]+(?:\\s+dst)?\\b" 00571 ")"; 00572 static VALUE pat = Qnil; 00573 VALUE m, b; 00574 00575 if (NIL_P(pat)) { 00576 pat = rb_reg_new(pat_source, sizeof pat_source - 1, 00577 ONIG_OPTION_IGNORECASE); 00578 rb_gc_register_mark_object(pat); 00579 } 00580 00581 b = rb_backref_get(); 00582 rb_match_busy(b); 00583 m = f_match(pat, rb_usascii_str_new2(&str[si])); 00584 00585 if (!NIL_P(m)) { 00586 VALUE s, l, o; 00587 00588 s = rb_reg_nth_match(1, m); 00589 l = f_end(m, INT2FIX(0)); 00590 o = date_zone_to_diff(s); 00591 si += NUM2LONG(l); 00592 set_hash("zone", s); 00593 set_hash("offset", o); 00594 rb_backref_set(b); 00595 goto matched; 00596 } 00597 rb_backref_set(b); 00598 fail(); 00599 } 00600 00601 case '%': 00602 if (str[si] != '%') 00603 fail(); 00604 si++; 00605 goto matched; 00606 00607 case '+': 00608 recur("%a %b %e %H:%M:%S %Z %Y"); 00609 goto matched; 00610 00611 default: 00612 if (str[si] != '%') 00613 fail(); 00614 si++; 00615 if (fi < flen) 00616 if (str[si] != fmt[fi]) 00617 fail(); 00618 si++; 00619 goto matched; 00620 } 00621 case ' ': 00622 case '\t': 00623 case '\n': 00624 case '\v': 00625 case '\f': 00626 case '\r': 00627 while (isspace(str[si])) 00628 si++; 00629 fi++; 00630 break; 00631 default: 00632 ordinal: 00633 if (str[si] != fmt[fi]) 00634 fail(); 00635 si++; 00636 fi++; 00637 break; 00638 matched: 00639 fi++; 00640 break; 00641 } 00642 } 00643 00644 return si; 00645 } 00646 00647 VALUE 00648 date__strptime(const char *str, size_t slen, 00649 const char *fmt, size_t flen, VALUE hash) 00650 { 00651 size_t si; 00652 VALUE cent, merid; 00653 00654 si = date__strptime_internal(str, slen, fmt, flen, hash); 00655 00656 if (slen > si) { 00657 VALUE s; 00658 00659 s = rb_usascii_str_new(&str[si], slen - si); 00660 set_hash("leftover", s); 00661 } 00662 00663 if (fail_p()) 00664 return Qnil; 00665 00666 cent = ref_hash("_cent"); 00667 if (!NIL_P(cent)) { 00668 VALUE year; 00669 00670 year = ref_hash("cwyear"); 00671 if (!NIL_P(year)) 00672 set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100)))); 00673 year = ref_hash("year"); 00674 if (!NIL_P(year)) 00675 set_hash("year", f_add(year, f_mul(cent, INT2FIX(100)))); 00676 del_hash("_cent"); 00677 } 00678 00679 merid = ref_hash("_merid"); 00680 if (!NIL_P(merid)) { 00681 VALUE hour; 00682 00683 hour = ref_hash("hour"); 00684 if (!NIL_P(hour)) { 00685 hour = f_mod(hour, INT2FIX(12)); 00686 set_hash("hour", f_add(hour, merid)); 00687 } 00688 del_hash("_merid"); 00689 } 00690 00691 return hash; 00692 } 00693 00694 /* 00695 Local variables: 00696 c-file-style: "ruby" 00697 End: 00698 */ 00699