Ruby 1.9.3p327(2012-11-10revision37606)
ext/date/date_strptime.c
Go to the documentation of this file.
00001 /*
00002   date_strptime.c: Coded by Tadayoshi Funaba 2011,2012
00003 */
00004 
00005 #include "ruby.h"
00006 #include "ruby/encoding.h"
00007 #include "ruby/re.h"
00008 #include <ctype.h>
00009 
00010 static const char *day_names[] = {
00011     "Sunday", "Monday", "Tuesday", "Wednesday",
00012     "Thursday", "Friday", "Saturday",
00013     "Sun", "Mon", "Tue", "Wed",
00014     "Thu", "Fri", "Sat"
00015 };
00016 
00017 static const char *month_names[] = {
00018     "January", "February", "March", "April",
00019     "May", "June", "July", "August", "September",
00020     "October", "November", "December",
00021     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
00022     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
00023 };
00024 
00025 static const char *merid_names[] = {
00026     "am", "pm",
00027     "a.m.", "p.m."
00028 };
00029 
00030 static const char *extz_pats[] = {
00031     ":z",
00032     "::z",
00033     ":::z"
00034 };
00035 
00036 #define sizeof_array(o) (sizeof o / sizeof o[0])
00037 
00038 #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
00039 #define f_add(x,y) rb_funcall(x, '+', 1, y)
00040 #define f_sub(x,y) rb_funcall(x, '-', 1, y)
00041 #define f_mul(x,y) rb_funcall(x, '*', 1, y)
00042 #define f_div(x,y) rb_funcall(x, '/', 1, y)
00043 #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y)
00044 #define f_mod(x,y) rb_funcall(x, '%', 1, y)
00045 #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
00046 
00047 #define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
00048 #define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
00049 #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
00050 #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
00051 
00052 #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
00053 #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
00054 #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
00055 
00056 #define issign(c) ((c) == '-' || (c) == '+')
00057 
00058 static int
00059 num_pattern_p(const char *s)
00060 {
00061     if (isdigit(*s))
00062         return 1;
00063     if (*s == '%') {
00064         s++;
00065         if (*s == 'E' || *s == 'O')
00066             s++;
00067         if (*s &&
00068             (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) || isdigit(*s)))
00069             return 1;
00070     }
00071     return 0;
00072 }
00073 
00074 #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
00075 
00076 static long
00077 read_digits(const char *s, VALUE *n, size_t width)
00078 {
00079     size_t l;
00080 
00081     l = strspn(s, "0123456789");
00082 
00083     if (l == 0)
00084         return 0;
00085 
00086     if (width < l)
00087         l = width;
00088 
00089     if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
00090         const char *os = s;
00091         long v;
00092 
00093         v = 0;
00094         while ((size_t)(s - os) < l) {
00095             v *= 10;
00096             v += *s - '0';
00097             s++;
00098         }
00099         if (os == s)
00100             return 0;
00101         *n = LONG2NUM(v);
00102         return l;
00103     }
00104     else {
00105         char *s2 = ALLOCA_N(char, l + 1);
00106         memcpy(s2, s, l);
00107         s2[l] = '\0';
00108         *n = rb_cstr_to_inum(s2, 10, 0);
00109         return l;
00110     }
00111 }
00112 
00113 #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
00114 #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
00115 #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
00116 
00117 #define fail() \
00118 { \
00119     set_hash("_fail", Qtrue); \
00120     return 0; \
00121 }
00122 
00123 #define fail_p() (!NIL_P(ref_hash("_fail")))
00124 
00125 #define READ_DIGITS(n,w) \
00126 { \
00127     size_t l; \
00128     l = read_digits(&str[si], &n, w); \
00129     if (l == 0) \
00130         fail(); \
00131     si += l; \
00132 }
00133 
00134 #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
00135 
00136 static int
00137 valid_range_p(VALUE v, int a, int b)
00138 {
00139     if (FIXNUM_P(v)) {
00140         int vi = FIX2INT(v);
00141         return !(vi < a || vi > b);
00142     }
00143     return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
00144 }
00145 
00146 #define recur(fmt) \
00147 { \
00148     size_t l; \
00149     l = date__strptime_internal(&str[si], slen - si, \
00150                                 fmt, sizeof fmt - 1, hash); \
00151     if (fail_p()) \
00152         return 0; \
00153     si += l; \
00154 }
00155 
00156 VALUE date_zone_to_diff(VALUE);
00157 
00158 static size_t
00159 date__strptime_internal(const char *str, size_t slen,
00160                         const char *fmt, size_t flen, VALUE hash)
00161 {
00162     size_t si, fi;
00163     int c;
00164 
00165     si = fi = 0;
00166 
00167     while (fi < flen) {
00168 
00169         switch (fmt[fi]) {
00170           case '%':
00171 
00172           again:
00173             fi++;
00174             c = fmt[fi];
00175 
00176             switch (c) {
00177               case 'E':
00178                 if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
00179                     goto again;
00180                 fi--;
00181                 goto ordinal;
00182               case 'O':
00183                 if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
00184                     goto again;
00185                 fi--;
00186                 goto ordinal;
00187               case ':':
00188                 {
00189                     int i;
00190 
00191                     for (i = 0; i < (int)sizeof_array(extz_pats); i++)
00192                         if (strncmp(extz_pats[i], &fmt[fi],
00193                                         strlen(extz_pats[i])) == 0) {
00194                             fi += i;
00195                             goto again;
00196                         }
00197                     fail();
00198                 }
00199 
00200               case 'A':
00201               case 'a':
00202                 {
00203                     int i;
00204 
00205                     for (i = 0; i < (int)sizeof_array(day_names); i++) {
00206                         size_t l = strlen(day_names[i]);
00207                         if (strncasecmp(day_names[i], &str[si], l) == 0) {
00208                             si += l;
00209                             set_hash("wday", INT2FIX(i % 7));
00210                             goto matched;
00211                         }
00212                     }
00213                     fail();
00214                 }
00215               case 'B':
00216               case 'b':
00217               case 'h':
00218                 {
00219                     int i;
00220 
00221                     for (i = 0; i < (int)sizeof_array(month_names); i++) {
00222                         size_t l = strlen(month_names[i]);
00223                         if (strncasecmp(month_names[i], &str[si], l) == 0) {
00224                             si += l;
00225                             set_hash("mon", INT2FIX((i % 12) + 1));
00226                             goto matched;
00227                         }
00228                     }
00229                     fail();
00230                 }
00231 
00232               case 'C':
00233                 {
00234                     VALUE n;
00235 
00236                     if (NUM_PATTERN_P())
00237                         READ_DIGITS(n, 2)
00238                     else
00239                         READ_DIGITS_MAX(n)
00240                     set_hash("_cent", n);
00241                     goto matched;
00242                 }
00243 
00244               case 'c':
00245                 recur("%a %b %e %H:%M:%S %Y");
00246                 goto matched;
00247 
00248               case 'D':
00249                 recur("%m/%d/%y");
00250                 goto matched;
00251 
00252               case 'd':
00253               case 'e':
00254                 {
00255                     VALUE n;
00256 
00257                     if (str[si] == ' ') {
00258                         si++;
00259                         READ_DIGITS(n, 1);
00260                     } else {
00261                         READ_DIGITS(n, 2);
00262                     }
00263                     if (!valid_range_p(n, 1, 31))
00264                         fail();
00265                     set_hash("mday", n);
00266                     goto matched;
00267                 }
00268 
00269               case 'F':
00270                 recur("%Y-%m-%d");
00271                 goto matched;
00272 
00273               case 'G':
00274                 {
00275                     VALUE n;
00276 
00277                     if (NUM_PATTERN_P())
00278                         READ_DIGITS(n, 4)
00279                     else
00280                         READ_DIGITS_MAX(n)
00281                     set_hash("cwyear", n);
00282                     goto matched;
00283                 }
00284 
00285               case 'g':
00286                 {
00287                     VALUE n;
00288 
00289                     READ_DIGITS(n, 2);
00290                     if (!valid_range_p(n, 0, 99))
00291                         fail();
00292                     set_hash("cwyear",n);
00293                     set_hash("_cent",
00294                              INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
00295                     goto matched;
00296                 }
00297 
00298               case 'H':
00299               case 'k':
00300                 {
00301                     VALUE n;
00302 
00303                     if (str[si] == ' ') {
00304                         si++;
00305                         READ_DIGITS(n, 1);
00306                     } else {
00307                         READ_DIGITS(n, 2);
00308                     }
00309                     if (!valid_range_p(n, 0, 24))
00310                         fail();
00311                     set_hash("hour", n);
00312                     goto matched;
00313                 }
00314 
00315               case 'I':
00316               case 'l':
00317                 {
00318                     VALUE n;
00319 
00320                     if (str[si] == ' ') {
00321                         si++;
00322                         READ_DIGITS(n, 1);
00323                     } else {
00324                         READ_DIGITS(n, 2);
00325                     }
00326                     if (!valid_range_p(n, 1, 12))
00327                         fail();
00328                     set_hash("hour", n);
00329                     goto matched;
00330                 }
00331 
00332               case 'j':
00333                 {
00334                     VALUE n;
00335 
00336                     READ_DIGITS(n, 3);
00337                     if (!valid_range_p(n, 1, 366))
00338                         fail();
00339                     set_hash("yday", n);
00340                     goto matched;
00341                 }
00342 
00343               case 'L':
00344               case 'N':
00345                 {
00346                     VALUE n;
00347                     int sign = 1;
00348                     size_t osi;
00349 
00350                     if (issign(str[si])) {
00351                         if (str[si] == '-')
00352                             sign = -1;
00353                         si++;
00354                     }
00355                     osi = si;
00356                     if (NUM_PATTERN_P())
00357                         READ_DIGITS(n, c == 'L' ? 3 : 9)
00358                     else
00359                         READ_DIGITS_MAX(n)
00360                     if (sign == -1)
00361                         n = f_negate(n);
00362                     set_hash("sec_fraction",
00363                              rb_rational_new2(n,
00364                                               f_expt(INT2FIX(10),
00365                                                      ULONG2NUM(si - osi))));
00366                     goto matched;
00367                 }
00368 
00369               case 'M':
00370                 {
00371                     VALUE n;
00372 
00373                     READ_DIGITS(n, 2);
00374                     if (!valid_range_p(n, 0, 59))
00375                         fail();
00376                     set_hash("min", n);
00377                     goto matched;
00378                 }
00379 
00380               case 'm':
00381                 {
00382                     VALUE n;
00383 
00384                     READ_DIGITS(n, 2);
00385                     if (!valid_range_p(n, 1, 12))
00386                         fail();
00387                     set_hash("mon", n);
00388                     goto matched;
00389                 }
00390 
00391               case 'n':
00392               case 't':
00393                 recur(" ");
00394                 goto matched;
00395 
00396               case 'P':
00397               case 'p':
00398                 {
00399                     int i;
00400 
00401                     for (i = 0; i < 4; i++) {
00402                         size_t l = strlen(merid_names[i]);
00403                         if (strncasecmp(merid_names[i], &str[si], l) == 0) {
00404                             si += l;
00405                             set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
00406                             goto matched;
00407                         }
00408                     }
00409                     fail();
00410                 }
00411 
00412               case 'Q':
00413                 {
00414                     VALUE n;
00415                     int sign = 1;
00416 
00417                     if (str[si] == '-') {
00418                         sign = -1;
00419                         si++;
00420                     }
00421                     READ_DIGITS_MAX(n);
00422                     if (sign == -1)
00423                         n = f_negate(n);
00424                     set_hash("seconds",
00425                              rb_rational_new2(n,
00426                                               f_expt(INT2FIX(10),
00427                                                      INT2FIX(3))));
00428                     goto matched;
00429                 }
00430 
00431               case 'R':
00432                 recur("%H:%M");
00433                 goto matched;
00434 
00435               case 'r':
00436                 recur("%I:%M:%S %p");
00437                 goto matched;
00438 
00439               case 'S':
00440                 {
00441                     VALUE n;
00442 
00443                     READ_DIGITS(n, 2);
00444                     if (!valid_range_p(n, 0, 60))
00445                         fail();
00446                     set_hash("sec", n);
00447                     goto matched;
00448                 }
00449 
00450               case 's':
00451                 {
00452                     VALUE n;
00453                     int sign = 1;
00454 
00455                     if (str[si] == '-') {
00456                         sign = -1;
00457                         si++;
00458                     }
00459                     READ_DIGITS_MAX(n);
00460                     if (sign == -1)
00461                         n = f_negate(n);
00462                     set_hash("seconds", n);
00463                     goto matched;
00464                 }
00465 
00466               case 'T':
00467                 recur("%H:%M:%S");
00468                 goto matched;
00469 
00470               case 'U':
00471               case 'W':
00472                 {
00473                     VALUE n;
00474 
00475                     READ_DIGITS(n, 2);
00476                     if (!valid_range_p(n, 0, 53))
00477                         fail();
00478                     set_hash(c == 'U' ? "wnum0" : "wnum1", n);
00479                     goto matched;
00480                 }
00481 
00482               case 'u':
00483                 {
00484                     VALUE n;
00485 
00486                     READ_DIGITS(n, 1);
00487                     if (!valid_range_p(n, 1, 7))
00488                         fail();
00489                     set_hash("cwday", n);
00490                     goto matched;
00491                 }
00492 
00493               case 'V':
00494                 {
00495                     VALUE n;
00496 
00497                     READ_DIGITS(n, 2);
00498                     if (!valid_range_p(n, 1, 53))
00499                         fail();
00500                     set_hash("cweek", n);
00501                     goto matched;
00502                 }
00503 
00504               case 'v':
00505                 recur("%e-%b-%Y");
00506                 goto matched;
00507 
00508               case 'w':
00509                 {
00510                     VALUE n;
00511 
00512                     READ_DIGITS(n, 1);
00513                     if (!valid_range_p(n, 0, 6))
00514                         fail();
00515                     set_hash("wday", n);
00516                     goto matched;
00517                 }
00518 
00519               case 'X':
00520                 recur("%H:%M:%S");
00521                 goto matched;
00522 
00523               case 'x':
00524                 recur("%m/%d/%y");
00525                 goto matched;
00526 
00527               case 'Y':
00528                   {
00529                       VALUE n;
00530                       int sign = 1;
00531 
00532                       if (issign(str[si])) {
00533                           if (str[si] == '-')
00534                               sign = -1;
00535                           si++;
00536                       }
00537                       if (NUM_PATTERN_P())
00538                           READ_DIGITS(n, 4)
00539                       else
00540                           READ_DIGITS_MAX(n)
00541                     if (sign == -1)
00542                         n = f_negate(n);
00543                       set_hash("year", n);
00544                       goto matched;
00545                   }
00546 
00547               case 'y':
00548                 {
00549                     VALUE n;
00550                     int sign = 1;
00551 
00552                     READ_DIGITS(n, 2);
00553                     if (!valid_range_p(n, 0, 99))
00554                         fail();
00555                     if (sign == -1)
00556                         n = f_negate(n);
00557                     set_hash("year", n);
00558                     set_hash("_cent",
00559                              INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
00560                     goto matched;
00561                 }
00562 
00563               case 'Z':
00564               case 'z':
00565                 {
00566                     static const char pat_source[] =
00567                         "\\A("
00568                         "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
00569                         "|[[:alpha:].\\s]+(?:standard|daylight)\\s+time\\b"
00570                         "|[[:alpha:]]+(?:\\s+dst)?\\b"
00571                         ")";
00572                     static VALUE pat = Qnil;
00573                     VALUE m, b;
00574 
00575                     if (NIL_P(pat)) {
00576                         pat = rb_reg_new(pat_source, sizeof pat_source - 1,
00577                                          ONIG_OPTION_IGNORECASE);
00578                         rb_gc_register_mark_object(pat);
00579                     }
00580 
00581                     b = rb_backref_get();
00582                     rb_match_busy(b);
00583                     m = f_match(pat, rb_usascii_str_new2(&str[si]));
00584 
00585                     if (!NIL_P(m)) {
00586                         VALUE s, l, o;
00587 
00588                         s = rb_reg_nth_match(1, m);
00589                         l = f_end(m, INT2FIX(0));
00590                         o = date_zone_to_diff(s);
00591                         si += NUM2LONG(l);
00592                         set_hash("zone", s);
00593                         set_hash("offset", o);
00594                         rb_backref_set(b);
00595                         goto matched;
00596                     }
00597                     rb_backref_set(b);
00598                     fail();
00599                 }
00600 
00601               case '%':
00602                 if (str[si] != '%')
00603                     fail();
00604                 si++;
00605                 goto matched;
00606 
00607               case '+':
00608                 recur("%a %b %e %H:%M:%S %Z %Y");
00609                 goto matched;
00610 
00611               default:
00612                 if (str[si] != '%')
00613                     fail();
00614                 si++;
00615                 if (fi < flen)
00616                     if (str[si] != fmt[fi])
00617                         fail();
00618                 si++;
00619                 goto matched;
00620             }
00621           case ' ':
00622           case '\t':
00623           case '\n':
00624           case '\v':
00625           case '\f':
00626           case '\r':
00627             while (isspace(str[si]))
00628                 si++;
00629             fi++;
00630             break;
00631           default:
00632           ordinal:
00633             if (str[si] != fmt[fi])
00634                 fail();
00635             si++;
00636             fi++;
00637             break;
00638           matched:
00639             fi++;
00640             break;
00641         }
00642     }
00643 
00644     return si;
00645 }
00646 
00647 VALUE
00648 date__strptime(const char *str, size_t slen,
00649                const char *fmt, size_t flen, VALUE hash)
00650 {
00651     size_t si;
00652     VALUE cent, merid;
00653 
00654     si = date__strptime_internal(str, slen, fmt, flen, hash);
00655 
00656     if (slen > si) {
00657         VALUE s;
00658 
00659         s = rb_usascii_str_new(&str[si], slen - si);
00660         set_hash("leftover", s);
00661     }
00662 
00663     if (fail_p())
00664         return Qnil;
00665 
00666     cent = ref_hash("_cent");
00667     if (!NIL_P(cent)) {
00668         VALUE year;
00669 
00670         year = ref_hash("cwyear");
00671         if (!NIL_P(year))
00672             set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
00673         year = ref_hash("year");
00674         if (!NIL_P(year))
00675             set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
00676         del_hash("_cent");
00677     }
00678 
00679     merid = ref_hash("_merid");
00680     if (!NIL_P(merid)) {
00681         VALUE hour;
00682 
00683         hour = ref_hash("hour");
00684         if (!NIL_P(hour)) {
00685             hour = f_mod(hour, INT2FIX(12));
00686             set_hash("hour", f_add(hour, merid));
00687         }
00688         del_hash("_merid");
00689     }
00690 
00691     return hash;
00692 }
00693 
00694 /*
00695 Local variables:
00696 c-file-style: "ruby"
00697 End:
00698 */
00699