Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /********************************************************************** 00002 regexec.c - Oniguruma (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * All rights reserved. 00007 * 00008 * Redistribution and use in source and binary forms, with or without 00009 * modification, are permitted provided that the following conditions 00010 * are met: 00011 * 1. Redistributions of source code must retain the above copyright 00012 * notice, this list of conditions and the following disclaimer. 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in the 00015 * documentation and/or other materials provided with the distribution. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00018 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00020 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00021 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00022 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00023 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00024 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00025 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00026 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00027 * SUCH DAMAGE. 00028 */ 00029 00030 #include "regint.h" 00031 00032 /* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ 00033 00034 #ifdef USE_CRNL_AS_LINE_TERMINATOR 00035 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ 00036 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ 00037 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) 00038 #endif 00039 00040 #ifdef USE_CAPTURE_HISTORY 00041 static void history_tree_free(OnigCaptureTreeNode* node); 00042 00043 static void 00044 history_tree_clear(OnigCaptureTreeNode* node) 00045 { 00046 int i; 00047 00048 if (IS_NOT_NULL(node)) { 00049 for (i = 0; i < node->num_childs; i++) { 00050 if (IS_NOT_NULL(node->childs[i])) { 00051 history_tree_free(node->childs[i]); 00052 } 00053 } 00054 for (i = 0; i < node->allocated; i++) { 00055 node->childs[i] = (OnigCaptureTreeNode* )0; 00056 } 00057 node->num_childs = 0; 00058 node->beg = ONIG_REGION_NOTPOS; 00059 node->end = ONIG_REGION_NOTPOS; 00060 node->group = -1; 00061 } 00062 } 00063 00064 static void 00065 history_tree_free(OnigCaptureTreeNode* node) 00066 { 00067 history_tree_clear(node); 00068 xfree(node); 00069 } 00070 00071 static void 00072 history_root_free(OnigRegion* r) 00073 { 00074 if (IS_NOT_NULL(r->history_root)) { 00075 history_tree_free(r->history_root); 00076 r->history_root = (OnigCaptureTreeNode* )0; 00077 } 00078 } 00079 00080 static OnigCaptureTreeNode* 00081 history_node_new(void) 00082 { 00083 OnigCaptureTreeNode* node; 00084 00085 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); 00086 CHECK_NULL_RETURN(node); 00087 node->childs = (OnigCaptureTreeNode** )0; 00088 node->allocated = 0; 00089 node->num_childs = 0; 00090 node->group = -1; 00091 node->beg = ONIG_REGION_NOTPOS; 00092 node->end = ONIG_REGION_NOTPOS; 00093 00094 return node; 00095 } 00096 00097 static int 00098 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) 00099 { 00100 #define HISTORY_TREE_INIT_ALLOC_SIZE 8 00101 00102 if (parent->num_childs >= parent->allocated) { 00103 int n, i; 00104 00105 if (IS_NULL(parent->childs)) { 00106 n = HISTORY_TREE_INIT_ALLOC_SIZE; 00107 parent->childs = 00108 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); 00109 } 00110 else { 00111 n = parent->allocated * 2; 00112 parent->childs = 00113 (OnigCaptureTreeNode** )xrealloc(parent->childs, 00114 sizeof(OnigCaptureTreeNode*) * n); 00115 } 00116 CHECK_NULL_RETURN_MEMERR(parent->childs); 00117 for (i = parent->allocated; i < n; i++) { 00118 parent->childs[i] = (OnigCaptureTreeNode* )0; 00119 } 00120 parent->allocated = n; 00121 } 00122 00123 parent->childs[parent->num_childs] = child; 00124 parent->num_childs++; 00125 return 0; 00126 } 00127 00128 static OnigCaptureTreeNode* 00129 history_tree_clone(OnigCaptureTreeNode* node) 00130 { 00131 int i; 00132 OnigCaptureTreeNode *clone, *child; 00133 00134 clone = history_node_new(); 00135 CHECK_NULL_RETURN(clone); 00136 00137 clone->beg = node->beg; 00138 clone->end = node->end; 00139 for (i = 0; i < node->num_childs; i++) { 00140 child = history_tree_clone(node->childs[i]); 00141 if (IS_NULL(child)) { 00142 history_tree_free(clone); 00143 return (OnigCaptureTreeNode* )0; 00144 } 00145 history_tree_add_child(clone, child); 00146 } 00147 00148 return clone; 00149 } 00150 00151 extern OnigCaptureTreeNode* 00152 onig_get_capture_tree(OnigRegion* region) 00153 { 00154 return region->history_root; 00155 } 00156 #endif /* USE_CAPTURE_HISTORY */ 00157 00158 extern void 00159 onig_region_clear(OnigRegion* region) 00160 { 00161 int i; 00162 00163 for (i = 0; i < region->num_regs; i++) { 00164 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; 00165 } 00166 #ifdef USE_CAPTURE_HISTORY 00167 history_root_free(region); 00168 #endif 00169 } 00170 00171 extern int 00172 onig_region_resize(OnigRegion* region, int n) 00173 { 00174 region->num_regs = n; 00175 00176 if (n < ONIG_NREGION) 00177 n = ONIG_NREGION; 00178 00179 if (region->allocated == 0) { 00180 region->beg = (int* )xmalloc(n * sizeof(int)); 00181 if (region->beg == 0) 00182 return ONIGERR_MEMORY; 00183 00184 region->end = (int* )xmalloc(n * sizeof(int)); 00185 if (region->end == 0) { 00186 xfree(region->beg); 00187 return ONIGERR_MEMORY; 00188 } 00189 00190 region->allocated = n; 00191 } 00192 else if (region->allocated < n) { 00193 int *tmp; 00194 00195 region->allocated = 0; 00196 tmp = (int* )xrealloc(region->beg, n * sizeof(int)); 00197 if (tmp == 0) { 00198 xfree(region->beg); 00199 xfree(region->end); 00200 return ONIGERR_MEMORY; 00201 } 00202 region->beg = tmp; 00203 tmp = (int* )xrealloc(region->end, n * sizeof(int)); 00204 if (tmp == 0) { 00205 xfree(region->beg); 00206 return ONIGERR_MEMORY; 00207 } 00208 region->end = tmp; 00209 00210 if (region->beg == 0 || region->end == 0) 00211 return ONIGERR_MEMORY; 00212 00213 region->allocated = n; 00214 } 00215 00216 return 0; 00217 } 00218 00219 static int 00220 onig_region_resize_clear(OnigRegion* region, int n) 00221 { 00222 int r; 00223 00224 r = onig_region_resize(region, n); 00225 if (r != 0) return r; 00226 onig_region_clear(region); 00227 return 0; 00228 } 00229 00230 extern int 00231 onig_region_set(OnigRegion* region, int at, int beg, int end) 00232 { 00233 if (at < 0) return ONIGERR_INVALID_ARGUMENT; 00234 00235 if (at >= region->allocated) { 00236 int r = onig_region_resize(region, at + 1); 00237 if (r < 0) return r; 00238 } 00239 00240 region->beg[at] = beg; 00241 region->end[at] = end; 00242 return 0; 00243 } 00244 00245 extern void 00246 onig_region_init(OnigRegion* region) 00247 { 00248 region->num_regs = 0; 00249 region->allocated = 0; 00250 region->beg = (int* )0; 00251 region->end = (int* )0; 00252 region->history_root = (OnigCaptureTreeNode* )0; 00253 } 00254 00255 extern OnigRegion* 00256 onig_region_new(void) 00257 { 00258 OnigRegion* r; 00259 00260 r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); 00261 if (r) 00262 onig_region_init(r); 00263 return r; 00264 } 00265 00266 extern void 00267 onig_region_free(OnigRegion* r, int free_self) 00268 { 00269 if (r) { 00270 if (r->allocated > 0) { 00271 if (r->beg) xfree(r->beg); 00272 if (r->end) xfree(r->end); 00273 r->allocated = 0; 00274 } 00275 #ifdef USE_CAPTURE_HISTORY 00276 history_root_free(r); 00277 #endif 00278 if (free_self) xfree(r); 00279 } 00280 } 00281 00282 extern void 00283 onig_region_copy(OnigRegion* to, OnigRegion* from) 00284 { 00285 #define RREGC_SIZE (sizeof(int) * from->num_regs) 00286 int i; 00287 00288 if (to == from) return; 00289 00290 onig_region_resize(to, from->num_regs); 00291 for (i = 0; i < from->num_regs; i++) { 00292 to->beg[i] = from->beg[i]; 00293 to->end[i] = from->end[i]; 00294 } 00295 to->num_regs = from->num_regs; 00296 00297 #ifdef USE_CAPTURE_HISTORY 00298 history_root_free(to); 00299 00300 if (IS_NOT_NULL(from->history_root)) { 00301 to->history_root = history_tree_clone(from->history_root); 00302 } 00303 #endif 00304 } 00305 00306 00308 #define INVALID_STACK_INDEX -1 00309 00310 /* stack type */ 00311 /* used by normal-POP */ 00312 #define STK_ALT 0x0001 00313 #define STK_LOOK_BEHIND_NOT 0x0002 00314 #define STK_POS_NOT 0x0003 00315 /* handled by normal-POP */ 00316 #define STK_MEM_START 0x0100 00317 #define STK_MEM_END 0x8200 00318 #define STK_REPEAT_INC 0x0300 00319 #define STK_STATE_CHECK_MARK 0x1000 00320 /* avoided by normal-POP */ 00321 #define STK_NULL_CHECK_START 0x3000 00322 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */ 00323 #define STK_MEM_END_MARK 0x8400 00324 #define STK_POS 0x0500 /* used when POP-POS */ 00325 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ 00326 #define STK_REPEAT 0x0700 00327 #define STK_CALL_FRAME 0x0800 00328 #define STK_RETURN 0x0900 00329 #define STK_VOID 0x0a00 /* for fill a blank */ 00330 00331 /* stack type check mask */ 00332 #define STK_MASK_POP_USED 0x00ff 00333 #define STK_MASK_TO_VOID_TARGET 0x10ff 00334 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ 00335 00336 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 00337 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ 00338 (msa).stack_p = (void* )0;\ 00339 (msa).options = (arg_option);\ 00340 (msa).region = (arg_region);\ 00341 (msa).start = (arg_start);\ 00342 (msa).best_len = ONIG_MISMATCH;\ 00343 } while(0) 00344 #else 00345 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ 00346 (msa).stack_p = (void* )0;\ 00347 (msa).options = (arg_option);\ 00348 (msa).region = (arg_region);\ 00349 (msa).start = (arg_start);\ 00350 } while(0) 00351 #endif 00352 00353 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00354 00355 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 00356 00357 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ 00358 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ 00359 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ 00360 offset = ((offset) * (state_num)) >> 3;\ 00361 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ 00362 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\ 00363 (msa).state_check_buff = (void* )xmalloc(size);\ 00364 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\ 00365 }\ 00366 else \ 00367 (msa).state_check_buff = (void* )xalloca(size);\ 00368 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ 00369 (size_t )(size - (offset))); \ 00370 (msa).state_check_buff_size = size;\ 00371 }\ 00372 else {\ 00373 (msa).state_check_buff = (void* )0;\ 00374 (msa).state_check_buff_size = 0;\ 00375 }\ 00376 }\ 00377 else {\ 00378 (msa).state_check_buff = (void* )0;\ 00379 (msa).state_check_buff_size = 0;\ 00380 }\ 00381 } while(0) 00382 00383 #define MATCH_ARG_FREE(msa) do {\ 00384 if ((msa).stack_p) xfree((msa).stack_p);\ 00385 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ 00386 if ((msa).state_check_buff) xfree((msa).state_check_buff);\ 00387 }\ 00388 } while(0) 00389 #else 00390 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) 00391 #endif 00392 00393 00394 00395 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ 00396 if (msa->stack_p) {\ 00397 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ 00398 stk_alloc = (OnigStackType* )(msa->stack_p);\ 00399 stk_base = stk_alloc;\ 00400 stk = stk_base;\ 00401 stk_end = stk_base + msa->stack_n;\ 00402 }\ 00403 else {\ 00404 alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ 00405 + sizeof(OnigStackType) * (stack_num));\ 00406 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ 00407 stk_base = stk_alloc;\ 00408 stk = stk_base;\ 00409 stk_end = stk_base + (stack_num);\ 00410 }\ 00411 } while(0) 00412 00413 #define STACK_SAVE do{\ 00414 if (stk_base != stk_alloc) {\ 00415 msa->stack_p = stk_base;\ 00416 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\ 00417 };\ 00418 } while(0) 00419 00420 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; 00421 00422 extern unsigned int 00423 onig_get_match_stack_limit_size(void) 00424 { 00425 return MatchStackLimitSize; 00426 } 00427 00428 extern int 00429 onig_set_match_stack_limit_size(unsigned int size) 00430 { 00431 MatchStackLimitSize = size; 00432 return 0; 00433 } 00434 00435 static int 00436 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, 00437 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) 00438 { 00439 size_t n; 00440 OnigStackType *x, *stk_base, *stk_end, *stk; 00441 00442 stk_base = *arg_stk_base; 00443 stk_end = *arg_stk_end; 00444 stk = *arg_stk; 00445 00446 n = stk_end - stk_base; 00447 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { 00448 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); 00449 if (IS_NULL(x)) { 00450 STACK_SAVE; 00451 return ONIGERR_MEMORY; 00452 } 00453 xmemcpy(x, stk_base, n * sizeof(OnigStackType)); 00454 n *= 2; 00455 } 00456 else { 00457 unsigned int limit_size = MatchStackLimitSize; 00458 n *= 2; 00459 if (limit_size != 0 && n > limit_size) { 00460 if ((unsigned int )(stk_end - stk_base) == limit_size) 00461 return ONIGERR_MATCH_STACK_LIMIT_OVER; 00462 else 00463 n = limit_size; 00464 } 00465 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); 00466 if (IS_NULL(x)) { 00467 STACK_SAVE; 00468 return ONIGERR_MEMORY; 00469 } 00470 } 00471 *arg_stk = x + (stk - stk_base); 00472 *arg_stk_base = x; 00473 *arg_stk_end = x + n; 00474 return 0; 00475 } 00476 00477 #define STACK_ENSURE(n) do {\ 00478 if (stk_end - stk < (n)) {\ 00479 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ 00480 if (r != 0) { STACK_SAVE; return r; } \ 00481 }\ 00482 } while(0) 00483 00484 #define STACK_AT(index) (stk_base + (index)) 00485 #define GET_STACK_INDEX(stk) ((stk) - stk_base) 00486 00487 #define STACK_PUSH_TYPE(stack_type) do {\ 00488 STACK_ENSURE(1);\ 00489 stk->type = (stack_type);\ 00490 STACK_INC;\ 00491 } while(0) 00492 00493 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) 00494 00495 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00496 #define STATE_CHECK_POS(s,snum) \ 00497 (((s) - str) * num_comb_exp_check + ((snum) - 1)) 00498 #define STATE_CHECK_VAL(v,snum) do {\ 00499 if (state_check_buff != NULL) {\ 00500 int x = STATE_CHECK_POS(s,snum);\ 00501 (v) = state_check_buff[x/8] & (1<<(x%8));\ 00502 }\ 00503 else (v) = 0;\ 00504 } while(0) 00505 00506 00507 #define ELSE_IF_STATE_CHECK_MARK(stk) \ 00508 else if ((stk)->type == STK_STATE_CHECK_MARK) { \ 00509 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ 00510 state_check_buff[x/8] |= (1<<(x%8)); \ 00511 } 00512 00513 #define STACK_PUSH(stack_type,pat,s,sprev) do {\ 00514 STACK_ENSURE(1);\ 00515 stk->type = (stack_type);\ 00516 stk->u.state.pcode = (pat);\ 00517 stk->u.state.pstr = (s);\ 00518 stk->u.state.pstr_prev = (sprev);\ 00519 stk->u.state.state_check = 0;\ 00520 STACK_INC;\ 00521 } while(0) 00522 00523 #define STACK_PUSH_ENSURED(stack_type,pat) do {\ 00524 stk->type = (stack_type);\ 00525 stk->u.state.pcode = (pat);\ 00526 stk->u.state.state_check = 0;\ 00527 STACK_INC;\ 00528 } while(0) 00529 00530 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ 00531 STACK_ENSURE(1);\ 00532 stk->type = STK_ALT;\ 00533 stk->u.state.pcode = (pat);\ 00534 stk->u.state.pstr = (s);\ 00535 stk->u.state.pstr_prev = (sprev);\ 00536 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ 00537 STACK_INC;\ 00538 } while(0) 00539 00540 #define STACK_PUSH_STATE_CHECK(s,snum) do {\ 00541 if (state_check_buff != NULL) {\ 00542 STACK_ENSURE(1);\ 00543 stk->type = STK_STATE_CHECK_MARK;\ 00544 stk->u.state.pstr = (s);\ 00545 stk->u.state.state_check = (snum);\ 00546 STACK_INC;\ 00547 }\ 00548 } while(0) 00549 00550 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 00551 00552 #define ELSE_IF_STATE_CHECK_MARK(stk) 00553 00554 #define STACK_PUSH(stack_type,pat,s,sprev) do {\ 00555 STACK_ENSURE(1);\ 00556 stk->type = (stack_type);\ 00557 stk->u.state.pcode = (pat);\ 00558 stk->u.state.pstr = (s);\ 00559 stk->u.state.pstr_prev = (sprev);\ 00560 STACK_INC;\ 00561 } while(0) 00562 00563 #define STACK_PUSH_ENSURED(stack_type,pat) do {\ 00564 stk->type = (stack_type);\ 00565 stk->u.state.pcode = (pat);\ 00566 STACK_INC;\ 00567 } while(0) 00568 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 00569 00570 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) 00571 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) 00572 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) 00573 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) 00574 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ 00575 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) 00576 00577 #define STACK_PUSH_REPEAT(id, pat) do {\ 00578 STACK_ENSURE(1);\ 00579 stk->type = STK_REPEAT;\ 00580 stk->u.repeat.num = (id);\ 00581 stk->u.repeat.pcode = (pat);\ 00582 stk->u.repeat.count = 0;\ 00583 STACK_INC;\ 00584 } while(0) 00585 00586 #define STACK_PUSH_REPEAT_INC(sindex) do {\ 00587 STACK_ENSURE(1);\ 00588 stk->type = STK_REPEAT_INC;\ 00589 stk->u.repeat_inc.si = (sindex);\ 00590 STACK_INC;\ 00591 } while(0) 00592 00593 #define STACK_PUSH_MEM_START(mnum, s) do {\ 00594 STACK_ENSURE(1);\ 00595 stk->type = STK_MEM_START;\ 00596 stk->u.mem.num = (mnum);\ 00597 stk->u.mem.pstr = (s);\ 00598 stk->u.mem.start = mem_start_stk[mnum];\ 00599 stk->u.mem.end = mem_end_stk[mnum];\ 00600 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ 00601 mem_end_stk[mnum] = INVALID_STACK_INDEX;\ 00602 STACK_INC;\ 00603 } while(0) 00604 00605 #define STACK_PUSH_MEM_END(mnum, s) do {\ 00606 STACK_ENSURE(1);\ 00607 stk->type = STK_MEM_END;\ 00608 stk->u.mem.num = (mnum);\ 00609 stk->u.mem.pstr = (s);\ 00610 stk->u.mem.start = mem_start_stk[mnum];\ 00611 stk->u.mem.end = mem_end_stk[mnum];\ 00612 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ 00613 STACK_INC;\ 00614 } while(0) 00615 00616 #define STACK_PUSH_MEM_END_MARK(mnum) do {\ 00617 STACK_ENSURE(1);\ 00618 stk->type = STK_MEM_END_MARK;\ 00619 stk->u.mem.num = (mnum);\ 00620 STACK_INC;\ 00621 } while(0) 00622 00623 #define STACK_GET_MEM_START(mnum, k) do {\ 00624 int level = 0;\ 00625 k = stk;\ 00626 while (k > stk_base) {\ 00627 k--;\ 00628 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ 00629 && k->u.mem.num == (mnum)) {\ 00630 level++;\ 00631 }\ 00632 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ 00633 if (level == 0) break;\ 00634 level--;\ 00635 }\ 00636 }\ 00637 } while(0) 00638 00639 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ 00640 int level = 0;\ 00641 while (k < stk) {\ 00642 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ 00643 if (level == 0) (start) = k->u.mem.pstr;\ 00644 level++;\ 00645 }\ 00646 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ 00647 level--;\ 00648 if (level == 0) {\ 00649 (end) = k->u.mem.pstr;\ 00650 break;\ 00651 }\ 00652 }\ 00653 k++;\ 00654 }\ 00655 } while(0) 00656 00657 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ 00658 STACK_ENSURE(1);\ 00659 stk->type = STK_NULL_CHECK_START;\ 00660 stk->u.null_check.num = (cnum);\ 00661 stk->u.null_check.pstr = (s);\ 00662 STACK_INC;\ 00663 } while(0) 00664 00665 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\ 00666 STACK_ENSURE(1);\ 00667 stk->type = STK_NULL_CHECK_END;\ 00668 stk->u.null_check.num = (cnum);\ 00669 STACK_INC;\ 00670 } while(0) 00671 00672 #define STACK_PUSH_CALL_FRAME(pat) do {\ 00673 STACK_ENSURE(1);\ 00674 stk->type = STK_CALL_FRAME;\ 00675 stk->u.call_frame.ret_addr = (pat);\ 00676 STACK_INC;\ 00677 } while(0) 00678 00679 #define STACK_PUSH_RETURN do {\ 00680 STACK_ENSURE(1);\ 00681 stk->type = STK_RETURN;\ 00682 STACK_INC;\ 00683 } while(0) 00684 00685 00686 #ifdef ONIG_DEBUG 00687 #define STACK_BASE_CHECK(p, at) \ 00688 if ((p) < stk_base) {\ 00689 fprintf(stderr, "at %s\n", at);\ 00690 goto stack_error;\ 00691 } 00692 #else 00693 #define STACK_BASE_CHECK(p, at) 00694 #endif 00695 00696 #define STACK_POP_ONE do {\ 00697 stk--;\ 00698 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ 00699 } while(0) 00700 00701 #define STACK_POP do {\ 00702 switch (pop_level) {\ 00703 case STACK_POP_LEVEL_FREE:\ 00704 while (1) {\ 00705 stk--;\ 00706 STACK_BASE_CHECK(stk, "STACK_POP"); \ 00707 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 00708 ELSE_IF_STATE_CHECK_MARK(stk);\ 00709 }\ 00710 break;\ 00711 case STACK_POP_LEVEL_MEM_START:\ 00712 while (1) {\ 00713 stk--;\ 00714 STACK_BASE_CHECK(stk, "STACK_POP 2"); \ 00715 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 00716 else if (stk->type == STK_MEM_START) {\ 00717 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00718 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00719 }\ 00720 ELSE_IF_STATE_CHECK_MARK(stk);\ 00721 }\ 00722 break;\ 00723 default:\ 00724 while (1) {\ 00725 stk--;\ 00726 STACK_BASE_CHECK(stk, "STACK_POP 3"); \ 00727 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 00728 else if (stk->type == STK_MEM_START) {\ 00729 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00730 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00731 }\ 00732 else if (stk->type == STK_REPEAT_INC) {\ 00733 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 00734 }\ 00735 else if (stk->type == STK_MEM_END) {\ 00736 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00737 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00738 }\ 00739 ELSE_IF_STATE_CHECK_MARK(stk);\ 00740 }\ 00741 break;\ 00742 }\ 00743 } while(0) 00744 00745 #define STACK_POP_TIL_POS_NOT do {\ 00746 while (1) {\ 00747 stk--;\ 00748 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ 00749 if (stk->type == STK_POS_NOT) break;\ 00750 else if (stk->type == STK_MEM_START) {\ 00751 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00752 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00753 }\ 00754 else if (stk->type == STK_REPEAT_INC) {\ 00755 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 00756 }\ 00757 else if (stk->type == STK_MEM_END) {\ 00758 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00759 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00760 }\ 00761 ELSE_IF_STATE_CHECK_MARK(stk);\ 00762 }\ 00763 } while(0) 00764 00765 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ 00766 while (1) {\ 00767 stk--;\ 00768 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ 00769 if (stk->type == STK_LOOK_BEHIND_NOT) break;\ 00770 else if (stk->type == STK_MEM_START) {\ 00771 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00772 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00773 }\ 00774 else if (stk->type == STK_REPEAT_INC) {\ 00775 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 00776 }\ 00777 else if (stk->type == STK_MEM_END) {\ 00778 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00779 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00780 }\ 00781 ELSE_IF_STATE_CHECK_MARK(stk);\ 00782 }\ 00783 } while(0) 00784 00785 #define STACK_POS_END(k) do {\ 00786 k = stk;\ 00787 while (1) {\ 00788 k--;\ 00789 STACK_BASE_CHECK(k, "STACK_POS_END"); \ 00790 if (IS_TO_VOID_TARGET(k)) {\ 00791 k->type = STK_VOID;\ 00792 }\ 00793 else if (k->type == STK_POS) {\ 00794 k->type = STK_VOID;\ 00795 break;\ 00796 }\ 00797 }\ 00798 } while(0) 00799 00800 #define STACK_STOP_BT_END do {\ 00801 OnigStackType *k = stk;\ 00802 while (1) {\ 00803 k--;\ 00804 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ 00805 if (IS_TO_VOID_TARGET(k)) {\ 00806 k->type = STK_VOID;\ 00807 }\ 00808 else if (k->type == STK_STOP_BT) {\ 00809 k->type = STK_VOID;\ 00810 break;\ 00811 }\ 00812 }\ 00813 } while(0) 00814 00815 #define STACK_NULL_CHECK(isnull,id,s) do {\ 00816 OnigStackType* k = stk;\ 00817 while (1) {\ 00818 k--;\ 00819 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ 00820 if (k->type == STK_NULL_CHECK_START) {\ 00821 if (k->u.null_check.num == (id)) {\ 00822 (isnull) = (k->u.null_check.pstr == (s));\ 00823 break;\ 00824 }\ 00825 }\ 00826 }\ 00827 } while(0) 00828 00829 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\ 00830 int level = 0;\ 00831 OnigStackType* k = stk;\ 00832 while (1) {\ 00833 k--;\ 00834 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ 00835 if (k->type == STK_NULL_CHECK_START) {\ 00836 if (k->u.null_check.num == (id)) {\ 00837 if (level == 0) {\ 00838 (isnull) = (k->u.null_check.pstr == (s));\ 00839 break;\ 00840 }\ 00841 else level--;\ 00842 }\ 00843 }\ 00844 else if (k->type == STK_NULL_CHECK_END) {\ 00845 level++;\ 00846 }\ 00847 }\ 00848 } while(0) 00849 00850 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ 00851 OnigStackType* k = stk;\ 00852 while (1) {\ 00853 k--;\ 00854 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ 00855 if (k->type == STK_NULL_CHECK_START) {\ 00856 if (k->u.null_check.num == (id)) {\ 00857 if (k->u.null_check.pstr != (s)) {\ 00858 (isnull) = 0;\ 00859 break;\ 00860 }\ 00861 else {\ 00862 UChar* endp;\ 00863 (isnull) = 1;\ 00864 while (k < stk) {\ 00865 if (k->type == STK_MEM_START) {\ 00866 if (k->u.mem.end == INVALID_STACK_INDEX) {\ 00867 (isnull) = 0; break;\ 00868 }\ 00869 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ 00870 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ 00871 else\ 00872 endp = (UChar* )k->u.mem.end;\ 00873 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ 00874 (isnull) = 0; break;\ 00875 }\ 00876 else if (endp != s) {\ 00877 (isnull) = -1; /* empty, but position changed */ \ 00878 }\ 00879 }\ 00880 k++;\ 00881 }\ 00882 break;\ 00883 }\ 00884 }\ 00885 }\ 00886 }\ 00887 } while(0) 00888 00889 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ 00890 int level = 0;\ 00891 OnigStackType* k = stk;\ 00892 while (1) {\ 00893 k--;\ 00894 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ 00895 if (k->type == STK_NULL_CHECK_START) {\ 00896 if (k->u.null_check.num == (id)) {\ 00897 if (level == 0) {\ 00898 if (k->u.null_check.pstr != (s)) {\ 00899 (isnull) = 0;\ 00900 break;\ 00901 }\ 00902 else {\ 00903 UChar* endp;\ 00904 (isnull) = 1;\ 00905 while (k < stk) {\ 00906 if (k->type == STK_MEM_START) {\ 00907 if (k->u.mem.end == INVALID_STACK_INDEX) {\ 00908 (isnull) = 0; break;\ 00909 }\ 00910 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ 00911 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ 00912 else\ 00913 endp = (UChar* )k->u.mem.end;\ 00914 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ 00915 (isnull) = 0; break;\ 00916 }\ 00917 else if (endp != s) {\ 00918 (isnull) = -1; /* empty, but position changed */ \ 00919 }\ 00920 }\ 00921 k++;\ 00922 }\ 00923 break;\ 00924 }\ 00925 }\ 00926 else {\ 00927 level--;\ 00928 }\ 00929 }\ 00930 }\ 00931 else if (k->type == STK_NULL_CHECK_END) {\ 00932 if (k->u.null_check.num == (id)) level++;\ 00933 }\ 00934 }\ 00935 } while(0) 00936 00937 #define STACK_GET_REPEAT(id, k) do {\ 00938 int level = 0;\ 00939 k = stk;\ 00940 while (1) {\ 00941 k--;\ 00942 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ 00943 if (k->type == STK_REPEAT) {\ 00944 if (level == 0) {\ 00945 if (k->u.repeat.num == (id)) {\ 00946 break;\ 00947 }\ 00948 }\ 00949 }\ 00950 else if (k->type == STK_CALL_FRAME) level--;\ 00951 else if (k->type == STK_RETURN) level++;\ 00952 }\ 00953 } while(0) 00954 00955 #define STACK_RETURN(addr) do {\ 00956 int level = 0;\ 00957 OnigStackType* k = stk;\ 00958 while (1) {\ 00959 k--;\ 00960 STACK_BASE_CHECK(k, "STACK_RETURN"); \ 00961 if (k->type == STK_CALL_FRAME) {\ 00962 if (level == 0) {\ 00963 (addr) = k->u.call_frame.ret_addr;\ 00964 break;\ 00965 }\ 00966 else level--;\ 00967 }\ 00968 else if (k->type == STK_RETURN)\ 00969 level++;\ 00970 }\ 00971 } while(0) 00972 00973 00974 #define STRING_CMP(s1,s2,len) do {\ 00975 while (len-- > 0) {\ 00976 if (*s1++ != *s2++) goto fail;\ 00977 }\ 00978 } while(0) 00979 00980 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\ 00981 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ 00982 goto fail; \ 00983 } while(0) 00984 00985 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, 00986 UChar* s1, UChar** ps2, int mblen, const UChar* text_end) 00987 { 00988 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 00989 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 00990 UChar *p1, *p2, *end1, *s2; 00991 int len1, len2; 00992 00993 s2 = *ps2; 00994 end1 = s1 + mblen; 00995 while (s1 < end1) { 00996 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1); 00997 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2); 00998 if (len1 != len2) return 0; 00999 p1 = buf1; 01000 p2 = buf2; 01001 while (len1-- > 0) { 01002 if (*p1 != *p2) return 0; 01003 p1++; 01004 p2++; 01005 } 01006 } 01007 01008 *ps2 = s2; 01009 return 1; 01010 } 01011 01012 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ 01013 is_fail = 0;\ 01014 while (len-- > 0) {\ 01015 if (*s1++ != *s2++) {\ 01016 is_fail = 1; break;\ 01017 }\ 01018 }\ 01019 } while(0) 01020 01021 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\ 01022 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ 01023 is_fail = 1; \ 01024 else \ 01025 is_fail = 0; \ 01026 } while(0) 01027 01028 01029 #define IS_EMPTY_STR (str == end) 01030 #define ON_STR_BEGIN(s) ((s) == str) 01031 #define ON_STR_END(s) ((s) == end) 01032 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 01033 #define DATA_ENSURE_CHECK1 (s < right_range) 01034 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) 01035 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail 01036 #else 01037 #define DATA_ENSURE_CHECK1 (s < end) 01038 #define DATA_ENSURE_CHECK(n) (s + (n) <= end) 01039 #define DATA_ENSURE(n) if (s + (n) > end) goto fail 01040 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ 01041 01042 01043 #ifdef USE_CAPTURE_HISTORY 01044 static int 01045 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, 01046 OnigStackType* stk_top, UChar* str, regex_t* reg) 01047 { 01048 int n, r; 01049 OnigCaptureTreeNode* child; 01050 OnigStackType* k = *kp; 01051 01052 while (k < stk_top) { 01053 if (k->type == STK_MEM_START) { 01054 n = k->u.mem.num; 01055 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && 01056 BIT_STATUS_AT(reg->capture_history, n) != 0) { 01057 child = history_node_new(); 01058 CHECK_NULL_RETURN_MEMERR(child); 01059 child->group = n; 01060 child->beg = (int )(k->u.mem.pstr - str); 01061 r = history_tree_add_child(node, child); 01062 if (r != 0) return r; 01063 *kp = (k + 1); 01064 r = make_capture_history_tree(child, kp, stk_top, str, reg); 01065 if (r != 0) return r; 01066 01067 k = *kp; 01068 child->end = (int )(k->u.mem.pstr - str); 01069 } 01070 } 01071 else if (k->type == STK_MEM_END) { 01072 if (k->u.mem.num == node->group) { 01073 node->end = (int )(k->u.mem.pstr - str); 01074 *kp = k; 01075 return 0; 01076 } 01077 } 01078 k++; 01079 } 01080 01081 return 1; /* 1: root node ending. */ 01082 } 01083 #endif 01084 01085 #ifdef USE_BACKREF_WITH_LEVEL 01086 static int mem_is_in_memp(int mem, int num, UChar* memp) 01087 { 01088 int i; 01089 MemNumType m; 01090 01091 for (i = 0; i < num; i++) { 01092 GET_MEMNUM_INC(m, memp); 01093 if (mem == (int )m) return 1; 01094 } 01095 return 0; 01096 } 01097 01098 static int backref_match_at_nested_level(regex_t* reg 01099 , OnigStackType* top, OnigStackType* stk_base 01100 , int ignore_case, int case_fold_flag 01101 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) 01102 { 01103 UChar *ss, *p, *pstart, *pend = NULL_UCHARP; 01104 int level; 01105 OnigStackType* k; 01106 01107 level = 0; 01108 k = top; 01109 k--; 01110 while (k >= stk_base) { 01111 if (k->type == STK_CALL_FRAME) { 01112 level--; 01113 } 01114 else if (k->type == STK_RETURN) { 01115 level++; 01116 } 01117 else if (level == nest) { 01118 if (k->type == STK_MEM_START) { 01119 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { 01120 pstart = k->u.mem.pstr; 01121 if (pend != NULL_UCHARP) { 01122 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ 01123 p = pstart; 01124 ss = *s; 01125 01126 if (ignore_case != 0) { 01127 if (string_cmp_ic(reg->enc, case_fold_flag, 01128 pstart, &ss, (int )(pend - pstart), send) == 0) 01129 return 0; /* or goto next_mem; */ 01130 } 01131 else { 01132 while (p < pend) { 01133 if (*p++ != *ss++) return 0; /* or goto next_mem; */ 01134 } 01135 } 01136 01137 *s = ss; 01138 return 1; 01139 } 01140 } 01141 } 01142 else if (k->type == STK_MEM_END) { 01143 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { 01144 pend = k->u.mem.pstr; 01145 } 01146 } 01147 } 01148 k--; 01149 } 01150 01151 return 0; 01152 } 01153 #endif /* USE_BACKREF_WITH_LEVEL */ 01154 01155 01156 #ifdef ONIG_DEBUG_STATISTICS 01157 01158 #define USE_TIMEOFDAY 01159 01160 #ifdef USE_TIMEOFDAY 01161 #ifdef HAVE_SYS_TIME_H 01162 #include <sys/time.h> 01163 #endif 01164 #ifdef HAVE_UNISTD_H 01165 #include <unistd.h> 01166 #endif 01167 static struct timeval ts, te; 01168 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) 01169 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ 01170 (((te).tv_sec - (ts).tv_sec)*1000000)) 01171 #else 01172 #ifdef HAVE_SYS_TIMES_H 01173 #include <sys/times.h> 01174 #endif 01175 static struct tms ts, te; 01176 #define GETTIME(t) times(&(t)) 01177 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) 01178 #endif 01179 01180 static int OpCounter[256]; 01181 static int OpPrevCounter[256]; 01182 static unsigned long OpTime[256]; 01183 static int OpCurr = OP_FINISH; 01184 static int OpPrevTarget = OP_FAIL; 01185 static int MaxStackDepth = 0; 01186 01187 #define MOP_IN(opcode) do {\ 01188 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ 01189 OpCurr = opcode;\ 01190 OpCounter[opcode]++;\ 01191 GETTIME(ts);\ 01192 } while(0) 01193 01194 #define MOP_OUT do {\ 01195 GETTIME(te);\ 01196 OpTime[OpCurr] += TIMEDIFF(te, ts);\ 01197 } while(0) 01198 01199 extern void 01200 onig_statistics_init(void) 01201 { 01202 int i; 01203 for (i = 0; i < 256; i++) { 01204 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; 01205 } 01206 MaxStackDepth = 0; 01207 } 01208 01209 extern void 01210 onig_print_statistics(FILE* f) 01211 { 01212 int i; 01213 fprintf(f, " count prev time\n"); 01214 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 01215 fprintf(f, "%8d: %8d: %10ld: %s\n", 01216 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); 01217 } 01218 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); 01219 } 01220 01221 #define STACK_INC do {\ 01222 stk++;\ 01223 if (stk - stk_base > MaxStackDepth) \ 01224 MaxStackDepth = stk - stk_base;\ 01225 } while(0) 01226 01227 #else 01228 #define STACK_INC stk++ 01229 01230 #define MOP_IN(opcode) 01231 #define MOP_OUT 01232 #endif 01233 01234 01235 /* matching region of POSIX API */ 01236 typedef int regoff_t; 01237 01238 typedef struct { 01239 regoff_t rm_so; 01240 regoff_t rm_eo; 01241 } posix_regmatch_t; 01242 01243 void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, 01244 OnigEncoding enc); 01245 01246 /* match data(str - end) from position (sstart). */ 01247 /* if sstart == str then set sprev to NULL. */ 01248 static long 01249 match_at(regex_t* reg, const UChar* str, const UChar* end, 01250 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 01251 const UChar* right_range, 01252 #endif 01253 const UChar* sstart, UChar* sprev, OnigMatchArg* msa) 01254 { 01255 static const UChar FinishCode[] = { OP_FINISH }; 01256 01257 int i, num_mem, best_len, pop_level; 01258 ptrdiff_t n; 01259 LengthType tlen, tlen2; 01260 MemNumType mem; 01261 RelAddrType addr; 01262 OnigOptionType option = reg->options; 01263 OnigEncoding encode = reg->enc; 01264 OnigCaseFoldType case_fold_flag = reg->case_fold_flag; 01265 UChar *s, *q, *sbegin; 01266 UChar *p = reg->p; 01267 char *alloca_base; 01268 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; 01269 OnigStackType *stkp; /* used as any purpose. */ 01270 OnigStackIndex si; 01271 OnigStackIndex *repeat_stk; 01272 OnigStackIndex *mem_start_stk, *mem_end_stk; 01273 #ifdef USE_COMBINATION_EXPLOSION_CHECK 01274 int scv; 01275 unsigned char* state_check_buff = msa->state_check_buff; 01276 int num_comb_exp_check = reg->num_comb_exp_check; 01277 #endif 01278 n = reg->num_repeat + reg->num_mem * 2; 01279 01280 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); 01281 pop_level = reg->stack_pop_level; 01282 num_mem = reg->num_mem; 01283 repeat_stk = (OnigStackIndex* )alloca_base; 01284 01285 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); 01286 mem_end_stk = mem_start_stk + num_mem; 01287 mem_start_stk--; /* for index start from 1, 01288 mem_start_stk[1]..mem_start_stk[num_mem] */ 01289 mem_end_stk--; /* for index start from 1, 01290 mem_end_stk[1]..mem_end_stk[num_mem] */ 01291 for (i = 1; i <= num_mem; i++) { 01292 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; 01293 } 01294 01295 #ifdef ONIG_DEBUG_MATCH 01296 fprintf(stderr, "match_at: str: %"PRIdPTR", end: %"PRIdPTR", start: %"PRIdPTR", sprev: %"PRIdPTR"\n", 01297 (intptr_t)str, (intptr_t)end, (intptr_t)sstart, (intptr_t)sprev); 01298 fprintf(stderr, "size: %d, start offset: %d\n", 01299 (int )(end - str), (int )(sstart - str)); 01300 #endif 01301 01302 STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */ 01303 best_len = ONIG_MISMATCH; 01304 s = (UChar* )sstart; 01305 while (1) { 01306 #ifdef ONIG_DEBUG_MATCH 01307 if (s) { 01308 UChar *q, *bp, buf[50]; 01309 int len; 01310 fprintf(stderr, "%4d> \"", (int )(s - str)); 01311 bp = buf; 01312 for (i = 0, q = s; i < 7 && q < end; i++) { 01313 len = enclen(encode, q, end); 01314 while (len-- > 0) *bp++ = *q++; 01315 } 01316 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } 01317 else { xmemcpy(bp, "\"", 1); bp += 1; } 01318 *bp = 0; 01319 fputs((char* )buf, stderr); 01320 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); 01321 onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode); 01322 fprintf(stderr, "\n"); 01323 } 01324 #endif 01325 01326 sbegin = s; 01327 switch (*p++) { 01328 case OP_END: MOP_IN(OP_END); 01329 n = s - sstart; 01330 if (n > best_len) { 01331 OnigRegion* region; 01332 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 01333 if (IS_FIND_LONGEST(option)) { 01334 if (n > msa->best_len) { 01335 msa->best_len = (int)n; 01336 msa->best_s = (UChar* )sstart; 01337 } 01338 else 01339 goto end_best_len; 01340 } 01341 #endif 01342 best_len = (int)n; 01343 region = msa->region; 01344 if (region) { 01345 #ifdef USE_POSIX_API_REGION_OPTION 01346 if (IS_POSIX_REGION(msa->options)) { 01347 posix_regmatch_t* rmt = (posix_regmatch_t* )region; 01348 01349 rmt[0].rm_so = sstart - str; 01350 rmt[0].rm_eo = s - str; 01351 for (i = 1; i <= num_mem; i++) { 01352 if (mem_end_stk[i] != INVALID_STACK_INDEX) { 01353 if (BIT_STATUS_AT(reg->bt_mem_start, i)) 01354 rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; 01355 else 01356 rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; 01357 01358 rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) 01359 ? STACK_AT(mem_end_stk[i])->u.mem.pstr 01360 : (UChar* )((void* )mem_end_stk[i])) - str; 01361 } 01362 else { 01363 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; 01364 } 01365 } 01366 } 01367 else { 01368 #endif /* USE_POSIX_API_REGION_OPTION */ 01369 region->beg[0] = (int)(sstart - str); 01370 region->end[0] = (int)(s - str); 01371 for (i = 1; i <= num_mem; i++) { 01372 if (mem_end_stk[i] != INVALID_STACK_INDEX) { 01373 region->beg[i] = (int)((BIT_STATUS_AT(reg->bt_mem_start, i)) 01374 ? STACK_AT(mem_start_stk[i])->u.mem.pstr - str 01375 : (UChar* )((void* )mem_start_stk[i]) - str); 01376 region->end[i] = (int)(BIT_STATUS_AT(reg->bt_mem_end, i) 01377 ? STACK_AT(mem_end_stk[i])->u.mem.pstr - str 01378 : (UChar* )((void* )mem_end_stk[i]) - str); 01379 } 01380 else { 01381 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; 01382 } 01383 } 01384 01385 #ifdef USE_CAPTURE_HISTORY 01386 if (reg->capture_history != 0) { 01387 int r; 01388 OnigCaptureTreeNode* node; 01389 01390 if (IS_NULL(region->history_root)) { 01391 region->history_root = node = history_node_new(); 01392 CHECK_NULL_RETURN_MEMERR(node); 01393 } 01394 else { 01395 node = region->history_root; 01396 history_tree_clear(node); 01397 } 01398 01399 node->group = 0; 01400 node->beg = sstart - str; 01401 node->end = s - str; 01402 01403 stkp = stk_base; 01404 r = make_capture_history_tree(region->history_root, &stkp, 01405 stk, (UChar* )str, reg); 01406 if (r < 0) { 01407 best_len = r; /* error code */ 01408 goto finish; 01409 } 01410 } 01411 #endif /* USE_CAPTURE_HISTORY */ 01412 #ifdef USE_POSIX_API_REGION_OPTION 01413 } /* else IS_POSIX_REGION() */ 01414 #endif 01415 } /* if (region) */ 01416 } /* n > best_len */ 01417 01418 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 01419 end_best_len: 01420 #endif 01421 MOP_OUT; 01422 01423 if (IS_FIND_CONDITION(option)) { 01424 if (IS_FIND_NOT_EMPTY(option) && s == sstart) { 01425 best_len = ONIG_MISMATCH; 01426 goto fail; /* for retry */ 01427 } 01428 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { 01429 goto fail; /* for retry */ 01430 } 01431 } 01432 01433 /* default behavior: return first-matching result. */ 01434 goto finish; 01435 break; 01436 01437 case OP_EXACT1: MOP_IN(OP_EXACT1); 01438 #if 0 01439 DATA_ENSURE(1); 01440 if (*p != *s) goto fail; 01441 p++; s++; 01442 #endif 01443 if (*p != *s++) goto fail; 01444 DATA_ENSURE(0); 01445 p++; 01446 MOP_OUT; 01447 break; 01448 01449 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); 01450 { 01451 int len; 01452 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 01453 01454 DATA_ENSURE(1); 01455 len = ONIGENC_MBC_CASE_FOLD(encode, 01456 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ 01457 case_fold_flag, 01458 &s, end, lowbuf); 01459 DATA_ENSURE(0); 01460 q = lowbuf; 01461 while (len-- > 0) { 01462 if (*p != *q) { 01463 goto fail; 01464 } 01465 p++; q++; 01466 } 01467 } 01468 MOP_OUT; 01469 break; 01470 01471 case OP_EXACT2: MOP_IN(OP_EXACT2); 01472 DATA_ENSURE(2); 01473 if (*p != *s) goto fail; 01474 p++; s++; 01475 if (*p != *s) goto fail; 01476 sprev = s; 01477 p++; s++; 01478 MOP_OUT; 01479 continue; 01480 break; 01481 01482 case OP_EXACT3: MOP_IN(OP_EXACT3); 01483 DATA_ENSURE(3); 01484 if (*p != *s) goto fail; 01485 p++; s++; 01486 if (*p != *s) goto fail; 01487 p++; s++; 01488 if (*p != *s) goto fail; 01489 sprev = s; 01490 p++; s++; 01491 MOP_OUT; 01492 continue; 01493 break; 01494 01495 case OP_EXACT4: MOP_IN(OP_EXACT4); 01496 DATA_ENSURE(4); 01497 if (*p != *s) goto fail; 01498 p++; s++; 01499 if (*p != *s) goto fail; 01500 p++; s++; 01501 if (*p != *s) goto fail; 01502 p++; s++; 01503 if (*p != *s) goto fail; 01504 sprev = s; 01505 p++; s++; 01506 MOP_OUT; 01507 continue; 01508 break; 01509 01510 case OP_EXACT5: MOP_IN(OP_EXACT5); 01511 DATA_ENSURE(5); 01512 if (*p != *s) goto fail; 01513 p++; s++; 01514 if (*p != *s) goto fail; 01515 p++; s++; 01516 if (*p != *s) goto fail; 01517 p++; s++; 01518 if (*p != *s) goto fail; 01519 p++; s++; 01520 if (*p != *s) goto fail; 01521 sprev = s; 01522 p++; s++; 01523 MOP_OUT; 01524 continue; 01525 break; 01526 01527 case OP_EXACTN: MOP_IN(OP_EXACTN); 01528 GET_LENGTH_INC(tlen, p); 01529 DATA_ENSURE(tlen); 01530 while (tlen-- > 0) { 01531 if (*p++ != *s++) goto fail; 01532 } 01533 sprev = s - 1; 01534 MOP_OUT; 01535 continue; 01536 break; 01537 01538 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); 01539 { 01540 int len; 01541 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 01542 01543 GET_LENGTH_INC(tlen, p); 01544 endp = p + tlen; 01545 01546 while (p < endp) { 01547 sprev = s; 01548 DATA_ENSURE(1); 01549 len = ONIGENC_MBC_CASE_FOLD(encode, 01550 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ 01551 case_fold_flag, 01552 &s, end, lowbuf); 01553 DATA_ENSURE(0); 01554 q = lowbuf; 01555 while (len-- > 0) { 01556 if (*p != *q) goto fail; 01557 p++; q++; 01558 } 01559 } 01560 } 01561 01562 MOP_OUT; 01563 continue; 01564 break; 01565 01566 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); 01567 DATA_ENSURE(2); 01568 if (*p != *s) goto fail; 01569 p++; s++; 01570 if (*p != *s) goto fail; 01571 p++; s++; 01572 MOP_OUT; 01573 break; 01574 01575 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); 01576 DATA_ENSURE(4); 01577 if (*p != *s) goto fail; 01578 p++; s++; 01579 if (*p != *s) goto fail; 01580 p++; s++; 01581 sprev = s; 01582 if (*p != *s) goto fail; 01583 p++; s++; 01584 if (*p != *s) goto fail; 01585 p++; s++; 01586 MOP_OUT; 01587 continue; 01588 break; 01589 01590 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); 01591 DATA_ENSURE(6); 01592 if (*p != *s) goto fail; 01593 p++; s++; 01594 if (*p != *s) goto fail; 01595 p++; s++; 01596 if (*p != *s) goto fail; 01597 p++; s++; 01598 if (*p != *s) goto fail; 01599 p++; s++; 01600 sprev = s; 01601 if (*p != *s) goto fail; 01602 p++; s++; 01603 if (*p != *s) goto fail; 01604 p++; s++; 01605 MOP_OUT; 01606 continue; 01607 break; 01608 01609 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); 01610 GET_LENGTH_INC(tlen, p); 01611 DATA_ENSURE(tlen * 2); 01612 while (tlen-- > 0) { 01613 if (*p != *s) goto fail; 01614 p++; s++; 01615 if (*p != *s) goto fail; 01616 p++; s++; 01617 } 01618 sprev = s - 2; 01619 MOP_OUT; 01620 continue; 01621 break; 01622 01623 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); 01624 GET_LENGTH_INC(tlen, p); 01625 DATA_ENSURE(tlen * 3); 01626 while (tlen-- > 0) { 01627 if (*p != *s) goto fail; 01628 p++; s++; 01629 if (*p != *s) goto fail; 01630 p++; s++; 01631 if (*p != *s) goto fail; 01632 p++; s++; 01633 } 01634 sprev = s - 3; 01635 MOP_OUT; 01636 continue; 01637 break; 01638 01639 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); 01640 GET_LENGTH_INC(tlen, p); /* mb-len */ 01641 GET_LENGTH_INC(tlen2, p); /* string len */ 01642 tlen2 *= tlen; 01643 DATA_ENSURE(tlen2); 01644 while (tlen2-- > 0) { 01645 if (*p != *s) goto fail; 01646 p++; s++; 01647 } 01648 sprev = s - tlen; 01649 MOP_OUT; 01650 continue; 01651 break; 01652 01653 case OP_CCLASS: MOP_IN(OP_CCLASS); 01654 DATA_ENSURE(1); 01655 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; 01656 p += SIZE_BITSET; 01657 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ 01658 MOP_OUT; 01659 break; 01660 01661 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); 01662 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; 01663 01664 cclass_mb: 01665 GET_LENGTH_INC(tlen, p); 01666 { 01667 OnigCodePoint code; 01668 UChar *ss; 01669 int mb_len; 01670 01671 DATA_ENSURE(1); 01672 mb_len = enclen(encode, s, end); 01673 DATA_ENSURE(mb_len); 01674 ss = s; 01675 s += mb_len; 01676 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 01677 01678 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 01679 if (! onig_is_in_code_range(p, code)) goto fail; 01680 #else 01681 q = p; 01682 ALIGNMENT_RIGHT(q); 01683 if (! onig_is_in_code_range(q, code)) goto fail; 01684 #endif 01685 } 01686 p += tlen; 01687 MOP_OUT; 01688 break; 01689 01690 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); 01691 DATA_ENSURE(1); 01692 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { 01693 p += SIZE_BITSET; 01694 goto cclass_mb; 01695 } 01696 else { 01697 if (BITSET_AT(((BitSetRef )p), *s) == 0) 01698 goto fail; 01699 01700 p += SIZE_BITSET; 01701 GET_LENGTH_INC(tlen, p); 01702 p += tlen; 01703 s++; 01704 } 01705 MOP_OUT; 01706 break; 01707 01708 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); 01709 DATA_ENSURE(1); 01710 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; 01711 p += SIZE_BITSET; 01712 s += enclen(encode, s, end); 01713 MOP_OUT; 01714 break; 01715 01716 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); 01717 DATA_ENSURE(1); 01718 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { 01719 s++; 01720 GET_LENGTH_INC(tlen, p); 01721 p += tlen; 01722 goto cc_mb_not_success; 01723 } 01724 01725 cclass_mb_not: 01726 GET_LENGTH_INC(tlen, p); 01727 { 01728 OnigCodePoint code; 01729 UChar *ss; 01730 int mb_len = enclen(encode, s, end); 01731 01732 if (! DATA_ENSURE_CHECK(mb_len)) { 01733 DATA_ENSURE(1); 01734 s = (UChar* )end; 01735 p += tlen; 01736 goto cc_mb_not_success; 01737 } 01738 01739 ss = s; 01740 s += mb_len; 01741 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 01742 01743 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 01744 if (onig_is_in_code_range(p, code)) goto fail; 01745 #else 01746 q = p; 01747 ALIGNMENT_RIGHT(q); 01748 if (onig_is_in_code_range(q, code)) goto fail; 01749 #endif 01750 } 01751 p += tlen; 01752 01753 cc_mb_not_success: 01754 MOP_OUT; 01755 break; 01756 01757 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); 01758 DATA_ENSURE(1); 01759 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { 01760 p += SIZE_BITSET; 01761 goto cclass_mb_not; 01762 } 01763 else { 01764 if (BITSET_AT(((BitSetRef )p), *s) != 0) 01765 goto fail; 01766 01767 p += SIZE_BITSET; 01768 GET_LENGTH_INC(tlen, p); 01769 p += tlen; 01770 s++; 01771 } 01772 MOP_OUT; 01773 break; 01774 01775 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); 01776 { 01777 OnigCodePoint code; 01778 void *node; 01779 int mb_len; 01780 UChar *ss; 01781 01782 DATA_ENSURE(1); 01783 GET_POINTER_INC(node, p); 01784 mb_len = enclen(encode, s, end); 01785 ss = s; 01786 s += mb_len; 01787 DATA_ENSURE(0); 01788 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 01789 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; 01790 } 01791 MOP_OUT; 01792 break; 01793 01794 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); 01795 DATA_ENSURE(1); 01796 n = enclen(encode, s, end); 01797 DATA_ENSURE(n); 01798 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; 01799 s += n; 01800 MOP_OUT; 01801 break; 01802 01803 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); 01804 DATA_ENSURE(1); 01805 n = enclen(encode, s, end); 01806 DATA_ENSURE(n); 01807 s += n; 01808 MOP_OUT; 01809 break; 01810 01811 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); 01812 while (DATA_ENSURE_CHECK1) { 01813 STACK_PUSH_ALT(p, s, sprev); 01814 n = enclen(encode, s, end); 01815 DATA_ENSURE(n); 01816 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; 01817 sprev = s; 01818 s += n; 01819 } 01820 MOP_OUT; 01821 break; 01822 01823 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); 01824 while (DATA_ENSURE_CHECK1) { 01825 STACK_PUSH_ALT(p, s, sprev); 01826 n = enclen(encode, s, end); 01827 if (n > 1) { 01828 DATA_ENSURE(n); 01829 sprev = s; 01830 s += n; 01831 } 01832 else { 01833 sprev = s; 01834 s++; 01835 } 01836 } 01837 MOP_OUT; 01838 break; 01839 01840 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); 01841 while (DATA_ENSURE_CHECK1) { 01842 if (*p == *s) { 01843 STACK_PUSH_ALT(p + 1, s, sprev); 01844 } 01845 n = enclen(encode, s, end); 01846 DATA_ENSURE(n); 01847 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; 01848 sprev = s; 01849 s += n; 01850 } 01851 p++; 01852 MOP_OUT; 01853 break; 01854 01855 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); 01856 while (DATA_ENSURE_CHECK1) { 01857 if (*p == *s) { 01858 STACK_PUSH_ALT(p + 1, s, sprev); 01859 } 01860 n = enclen(encode, s, end); 01861 if (n > 1) { 01862 DATA_ENSURE(n); 01863 sprev = s; 01864 s += n; 01865 } 01866 else { 01867 sprev = s; 01868 s++; 01869 } 01870 } 01871 p++; 01872 MOP_OUT; 01873 break; 01874 01875 #ifdef USE_COMBINATION_EXPLOSION_CHECK 01876 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); 01877 GET_STATE_CHECK_NUM_INC(mem, p); 01878 while (DATA_ENSURE_CHECK1) { 01879 STATE_CHECK_VAL(scv, mem); 01880 if (scv) goto fail; 01881 01882 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); 01883 n = enclen(encode, s, end); 01884 DATA_ENSURE(n); 01885 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; 01886 sprev = s; 01887 s += n; 01888 } 01889 MOP_OUT; 01890 break; 01891 01892 case OP_STATE_CHECK_ANYCHAR_ML_STAR: 01893 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); 01894 01895 GET_STATE_CHECK_NUM_INC(mem, p); 01896 while (DATA_ENSURE_CHECK1) { 01897 STATE_CHECK_VAL(scv, mem); 01898 if (scv) goto fail; 01899 01900 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); 01901 n = enclen(encode, s, end); 01902 if (n > 1) { 01903 DATA_ENSURE(n); 01904 sprev = s; 01905 s += n; 01906 } 01907 else { 01908 sprev = s; 01909 s++; 01910 } 01911 } 01912 MOP_OUT; 01913 break; 01914 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 01915 01916 case OP_WORD: MOP_IN(OP_WORD); 01917 DATA_ENSURE(1); 01918 if (! ONIGENC_IS_MBC_WORD(encode, s, end)) 01919 goto fail; 01920 01921 s += enclen(encode, s, end); 01922 MOP_OUT; 01923 break; 01924 01925 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); 01926 DATA_ENSURE(1); 01927 if (ONIGENC_IS_MBC_WORD(encode, s, end)) 01928 goto fail; 01929 01930 s += enclen(encode, s, end); 01931 MOP_OUT; 01932 break; 01933 01934 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); 01935 if (ON_STR_BEGIN(s)) { 01936 DATA_ENSURE(1); 01937 if (! ONIGENC_IS_MBC_WORD(encode, s, end)) 01938 goto fail; 01939 } 01940 else if (ON_STR_END(s)) { 01941 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) 01942 goto fail; 01943 } 01944 else { 01945 if (ONIGENC_IS_MBC_WORD(encode, s, end) 01946 == ONIGENC_IS_MBC_WORD(encode, sprev, end)) 01947 goto fail; 01948 } 01949 MOP_OUT; 01950 continue; 01951 break; 01952 01953 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); 01954 if (ON_STR_BEGIN(s)) { 01955 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) 01956 goto fail; 01957 } 01958 else if (ON_STR_END(s)) { 01959 if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) 01960 goto fail; 01961 } 01962 else { 01963 if (ONIGENC_IS_MBC_WORD(encode, s, end) 01964 != ONIGENC_IS_MBC_WORD(encode, sprev, end)) 01965 goto fail; 01966 } 01967 MOP_OUT; 01968 continue; 01969 break; 01970 01971 #ifdef USE_WORD_BEGIN_END 01972 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); 01973 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { 01974 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { 01975 MOP_OUT; 01976 continue; 01977 } 01978 } 01979 goto fail; 01980 break; 01981 01982 case OP_WORD_END: MOP_IN(OP_WORD_END); 01983 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { 01984 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { 01985 MOP_OUT; 01986 continue; 01987 } 01988 } 01989 goto fail; 01990 break; 01991 #endif 01992 01993 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); 01994 if (! ON_STR_BEGIN(s)) goto fail; 01995 01996 MOP_OUT; 01997 continue; 01998 break; 01999 02000 case OP_END_BUF: MOP_IN(OP_END_BUF); 02001 if (! ON_STR_END(s)) goto fail; 02002 02003 MOP_OUT; 02004 continue; 02005 break; 02006 02007 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); 02008 if (ON_STR_BEGIN(s)) { 02009 if (IS_NOTBOL(msa->options)) goto fail; 02010 MOP_OUT; 02011 continue; 02012 } 02013 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { 02014 MOP_OUT; 02015 continue; 02016 } 02017 goto fail; 02018 break; 02019 02020 case OP_END_LINE: MOP_IN(OP_END_LINE); 02021 if (ON_STR_END(s)) { 02022 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 02023 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { 02024 #endif 02025 if (IS_NOTEOL(msa->options)) goto fail; 02026 MOP_OUT; 02027 continue; 02028 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 02029 } 02030 #endif 02031 } 02032 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { 02033 MOP_OUT; 02034 continue; 02035 } 02036 #ifdef USE_CRNL_AS_LINE_TERMINATOR 02037 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { 02038 MOP_OUT; 02039 continue; 02040 } 02041 #endif 02042 goto fail; 02043 break; 02044 02045 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); 02046 if (ON_STR_END(s)) { 02047 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 02048 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { 02049 #endif 02050 if (IS_NOTEOL(msa->options)) goto fail; 02051 MOP_OUT; 02052 continue; 02053 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 02054 } 02055 #endif 02056 } 02057 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && 02058 ON_STR_END(s + enclen(encode, s, end))) { 02059 MOP_OUT; 02060 continue; 02061 } 02062 #ifdef USE_CRNL_AS_LINE_TERMINATOR 02063 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { 02064 UChar* ss = s + enclen(encode, s); 02065 ss += enclen(encode, ss); 02066 if (ON_STR_END(ss)) { 02067 MOP_OUT; 02068 continue; 02069 } 02070 } 02071 #endif 02072 goto fail; 02073 break; 02074 02075 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); 02076 if (s != msa->start) 02077 goto fail; 02078 02079 MOP_OUT; 02080 continue; 02081 break; 02082 02083 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); 02084 GET_MEMNUM_INC(mem, p); 02085 STACK_PUSH_MEM_START(mem, s); 02086 MOP_OUT; 02087 continue; 02088 break; 02089 02090 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); 02091 GET_MEMNUM_INC(mem, p); 02092 mem_start_stk[mem] = (OnigStackIndex )((void* )s); 02093 MOP_OUT; 02094 continue; 02095 break; 02096 02097 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); 02098 GET_MEMNUM_INC(mem, p); 02099 STACK_PUSH_MEM_END(mem, s); 02100 MOP_OUT; 02101 continue; 02102 break; 02103 02104 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); 02105 GET_MEMNUM_INC(mem, p); 02106 mem_end_stk[mem] = (OnigStackIndex )((void* )s); 02107 MOP_OUT; 02108 continue; 02109 break; 02110 02111 #ifdef USE_SUBEXP_CALL 02112 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); 02113 GET_MEMNUM_INC(mem, p); 02114 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ 02115 STACK_PUSH_MEM_END(mem, s); 02116 mem_start_stk[mem] = GET_STACK_INDEX(stkp); 02117 MOP_OUT; 02118 continue; 02119 break; 02120 02121 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); 02122 GET_MEMNUM_INC(mem, p); 02123 mem_end_stk[mem] = (OnigStackIndex )((void* )s); 02124 STACK_GET_MEM_START(mem, stkp); 02125 02126 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02127 mem_start_stk[mem] = GET_STACK_INDEX(stkp); 02128 else 02129 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); 02130 02131 STACK_PUSH_MEM_END_MARK(mem); 02132 MOP_OUT; 02133 continue; 02134 break; 02135 #endif 02136 02137 case OP_BACKREF1: MOP_IN(OP_BACKREF1); 02138 mem = 1; 02139 goto backref; 02140 break; 02141 02142 case OP_BACKREF2: MOP_IN(OP_BACKREF2); 02143 mem = 2; 02144 goto backref; 02145 break; 02146 02147 case OP_BACKREFN: MOP_IN(OP_BACKREFN); 02148 GET_MEMNUM_INC(mem, p); 02149 backref: 02150 { 02151 int len; 02152 UChar *pstart, *pend; 02153 02154 /* if you want to remove following line, 02155 you should check in parse and compile time. */ 02156 if (mem > num_mem) goto fail; 02157 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; 02158 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; 02159 02160 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02161 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 02162 else 02163 pstart = (UChar* )((void* )mem_start_stk[mem]); 02164 02165 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 02166 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 02167 : (UChar* )((void* )mem_end_stk[mem])); 02168 n = pend - pstart; 02169 DATA_ENSURE(n); 02170 sprev = s; 02171 STRING_CMP(pstart, s, n); 02172 while (sprev + (len = enclen(encode, sprev, end)) < s) 02173 sprev += len; 02174 02175 MOP_OUT; 02176 continue; 02177 } 02178 break; 02179 02180 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); 02181 GET_MEMNUM_INC(mem, p); 02182 { 02183 int len; 02184 UChar *pstart, *pend; 02185 02186 /* if you want to remove following line, 02187 you should check in parse and compile time. */ 02188 if (mem > num_mem) goto fail; 02189 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; 02190 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; 02191 02192 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02193 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 02194 else 02195 pstart = (UChar* )((void* )mem_start_stk[mem]); 02196 02197 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 02198 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 02199 : (UChar* )((void* )mem_end_stk[mem])); 02200 n = pend - pstart; 02201 DATA_ENSURE(n); 02202 sprev = s; 02203 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end); 02204 while (sprev + (len = enclen(encode, sprev, end)) < s) 02205 sprev += len; 02206 02207 MOP_OUT; 02208 continue; 02209 } 02210 break; 02211 02212 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); 02213 { 02214 int len, is_fail; 02215 UChar *pstart, *pend, *swork; 02216 02217 GET_LENGTH_INC(tlen, p); 02218 for (i = 0; i < tlen; i++) { 02219 GET_MEMNUM_INC(mem, p); 02220 02221 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; 02222 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; 02223 02224 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02225 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 02226 else 02227 pstart = (UChar* )((void* )mem_start_stk[mem]); 02228 02229 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 02230 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 02231 : (UChar* )((void* )mem_end_stk[mem])); 02232 n = pend - pstart; 02233 DATA_ENSURE(n); 02234 sprev = s; 02235 swork = s; 02236 STRING_CMP_VALUE(pstart, swork, n, is_fail); 02237 if (is_fail) continue; 02238 s = swork; 02239 while (sprev + (len = enclen(encode, sprev, end)) < s) 02240 sprev += len; 02241 02242 p += (SIZE_MEMNUM * (tlen - i - 1)); 02243 break; /* success */ 02244 } 02245 if (i == tlen) goto fail; 02246 MOP_OUT; 02247 continue; 02248 } 02249 break; 02250 02251 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); 02252 { 02253 int len, is_fail; 02254 UChar *pstart, *pend, *swork; 02255 02256 GET_LENGTH_INC(tlen, p); 02257 for (i = 0; i < tlen; i++) { 02258 GET_MEMNUM_INC(mem, p); 02259 02260 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; 02261 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; 02262 02263 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02264 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 02265 else 02266 pstart = (UChar* )((void* )mem_start_stk[mem]); 02267 02268 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 02269 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 02270 : (UChar* )((void* )mem_end_stk[mem])); 02271 n = pend - pstart; 02272 DATA_ENSURE(n); 02273 sprev = s; 02274 swork = s; 02275 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, (int)n, end, is_fail); 02276 if (is_fail) continue; 02277 s = swork; 02278 while (sprev + (len = enclen(encode, sprev, end)) < s) 02279 sprev += len; 02280 02281 p += (SIZE_MEMNUM * (tlen - i - 1)); 02282 break; /* success */ 02283 } 02284 if (i == tlen) goto fail; 02285 MOP_OUT; 02286 continue; 02287 } 02288 break; 02289 02290 #ifdef USE_BACKREF_WITH_LEVEL 02291 case OP_BACKREF_WITH_LEVEL: 02292 { 02293 int len; 02294 OnigOptionType ic; 02295 LengthType level; 02296 02297 GET_OPTION_INC(ic, p); 02298 GET_LENGTH_INC(level, p); 02299 GET_LENGTH_INC(tlen, p); 02300 02301 sprev = s; 02302 if (backref_match_at_nested_level(reg, stk, stk_base, ic 02303 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { 02304 while (sprev + (len = enclen(encode, sprev, end)) < s) 02305 sprev += len; 02306 02307 p += (SIZE_MEMNUM * tlen); 02308 } 02309 else 02310 goto fail; 02311 02312 MOP_OUT; 02313 continue; 02314 } 02315 02316 break; 02317 #endif 02318 02319 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ 02320 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); 02321 GET_OPTION_INC(option, p); 02322 STACK_PUSH_ALT(p, s, sprev); 02323 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; 02324 MOP_OUT; 02325 continue; 02326 break; 02327 02328 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); 02329 GET_OPTION_INC(option, p); 02330 MOP_OUT; 02331 continue; 02332 break; 02333 #endif 02334 02335 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); 02336 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 02337 STACK_PUSH_NULL_CHECK_START(mem, s); 02338 MOP_OUT; 02339 continue; 02340 break; 02341 02342 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); 02343 { 02344 int isnull; 02345 02346 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 02347 STACK_NULL_CHECK(isnull, mem, s); 02348 if (isnull) { 02349 #ifdef ONIG_DEBUG_MATCH 02350 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIdPTR"\n", 02351 (int )mem, (intptr_t )s); 02352 #endif 02353 null_check_found: 02354 /* empty loop founded, skip next instruction */ 02355 switch (*p++) { 02356 case OP_JUMP: 02357 case OP_PUSH: 02358 p += SIZE_RELADDR; 02359 break; 02360 case OP_REPEAT_INC: 02361 case OP_REPEAT_INC_NG: 02362 case OP_REPEAT_INC_SG: 02363 case OP_REPEAT_INC_NG_SG: 02364 p += SIZE_MEMNUM; 02365 break; 02366 default: 02367 goto unexpected_bytecode_error; 02368 break; 02369 } 02370 } 02371 } 02372 MOP_OUT; 02373 continue; 02374 break; 02375 02376 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 02377 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); 02378 { 02379 int isnull; 02380 02381 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 02382 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); 02383 if (isnull) { 02384 #ifdef ONIG_DEBUG_MATCH 02385 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIdPTR"\n", 02386 (int )mem, (intptr_t )s); 02387 #endif 02388 if (isnull == -1) goto fail; 02389 goto null_check_found; 02390 } 02391 } 02392 MOP_OUT; 02393 continue; 02394 break; 02395 #endif 02396 02397 #ifdef USE_SUBEXP_CALL 02398 case OP_NULL_CHECK_END_MEMST_PUSH: 02399 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); 02400 { 02401 int isnull; 02402 02403 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 02404 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 02405 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); 02406 #else 02407 STACK_NULL_CHECK_REC(isnull, mem, s); 02408 #endif 02409 if (isnull) { 02410 #ifdef ONIG_DEBUG_MATCH 02411 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIdPTR"\n", 02412 (int )mem, (intptr_t )s); 02413 #endif 02414 if (isnull == -1) goto fail; 02415 goto null_check_found; 02416 } 02417 else { 02418 STACK_PUSH_NULL_CHECK_END(mem); 02419 } 02420 } 02421 MOP_OUT; 02422 continue; 02423 break; 02424 #endif 02425 02426 case OP_JUMP: MOP_IN(OP_JUMP); 02427 GET_RELADDR_INC(addr, p); 02428 p += addr; 02429 MOP_OUT; 02430 CHECK_INTERRUPT_IN_MATCH_AT; 02431 continue; 02432 break; 02433 02434 case OP_PUSH: MOP_IN(OP_PUSH); 02435 GET_RELADDR_INC(addr, p); 02436 STACK_PUSH_ALT(p + addr, s, sprev); 02437 MOP_OUT; 02438 continue; 02439 break; 02440 02441 #ifdef USE_COMBINATION_EXPLOSION_CHECK 02442 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); 02443 GET_STATE_CHECK_NUM_INC(mem, p); 02444 STATE_CHECK_VAL(scv, mem); 02445 if (scv) goto fail; 02446 02447 GET_RELADDR_INC(addr, p); 02448 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); 02449 MOP_OUT; 02450 continue; 02451 break; 02452 02453 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); 02454 GET_STATE_CHECK_NUM_INC(mem, p); 02455 GET_RELADDR_INC(addr, p); 02456 STATE_CHECK_VAL(scv, mem); 02457 if (scv) { 02458 p += addr; 02459 } 02460 else { 02461 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); 02462 } 02463 MOP_OUT; 02464 continue; 02465 break; 02466 02467 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); 02468 GET_STATE_CHECK_NUM_INC(mem, p); 02469 STATE_CHECK_VAL(scv, mem); 02470 if (scv) goto fail; 02471 02472 STACK_PUSH_STATE_CHECK(s, mem); 02473 MOP_OUT; 02474 continue; 02475 break; 02476 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 02477 02478 case OP_POP: MOP_IN(OP_POP); 02479 STACK_POP_ONE; 02480 MOP_OUT; 02481 continue; 02482 break; 02483 02484 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); 02485 GET_RELADDR_INC(addr, p); 02486 if (*p == *s && DATA_ENSURE_CHECK1) { 02487 p++; 02488 STACK_PUSH_ALT(p + addr, s, sprev); 02489 MOP_OUT; 02490 continue; 02491 } 02492 p += (addr + 1); 02493 MOP_OUT; 02494 continue; 02495 break; 02496 02497 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); 02498 GET_RELADDR_INC(addr, p); 02499 if (*p == *s) { 02500 p++; 02501 STACK_PUSH_ALT(p + addr, s, sprev); 02502 MOP_OUT; 02503 continue; 02504 } 02505 p++; 02506 MOP_OUT; 02507 continue; 02508 break; 02509 02510 case OP_REPEAT: MOP_IN(OP_REPEAT); 02511 { 02512 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02513 GET_RELADDR_INC(addr, p); 02514 02515 STACK_ENSURE(1); 02516 repeat_stk[mem] = GET_STACK_INDEX(stk); 02517 STACK_PUSH_REPEAT(mem, p); 02518 02519 if (reg->repeat_range[mem].lower == 0) { 02520 STACK_PUSH_ALT(p + addr, s, sprev); 02521 } 02522 } 02523 MOP_OUT; 02524 continue; 02525 break; 02526 02527 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); 02528 { 02529 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02530 GET_RELADDR_INC(addr, p); 02531 02532 STACK_ENSURE(1); 02533 repeat_stk[mem] = GET_STACK_INDEX(stk); 02534 STACK_PUSH_REPEAT(mem, p); 02535 02536 if (reg->repeat_range[mem].lower == 0) { 02537 STACK_PUSH_ALT(p, s, sprev); 02538 p += addr; 02539 } 02540 } 02541 MOP_OUT; 02542 continue; 02543 break; 02544 02545 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); 02546 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02547 si = repeat_stk[mem]; 02548 stkp = STACK_AT(si); 02549 02550 repeat_inc: 02551 stkp->u.repeat.count++; 02552 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { 02553 /* end of repeat. Nothing to do. */ 02554 } 02555 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { 02556 STACK_PUSH_ALT(p, s, sprev); 02557 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ 02558 } 02559 else { 02560 p = stkp->u.repeat.pcode; 02561 } 02562 STACK_PUSH_REPEAT_INC(si); 02563 MOP_OUT; 02564 CHECK_INTERRUPT_IN_MATCH_AT; 02565 continue; 02566 break; 02567 02568 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); 02569 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02570 STACK_GET_REPEAT(mem, stkp); 02571 si = GET_STACK_INDEX(stkp); 02572 goto repeat_inc; 02573 break; 02574 02575 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); 02576 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02577 si = repeat_stk[mem]; 02578 stkp = STACK_AT(si); 02579 02580 repeat_inc_ng: 02581 stkp->u.repeat.count++; 02582 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { 02583 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { 02584 UChar* pcode = stkp->u.repeat.pcode; 02585 02586 STACK_PUSH_REPEAT_INC(si); 02587 STACK_PUSH_ALT(pcode, s, sprev); 02588 } 02589 else { 02590 p = stkp->u.repeat.pcode; 02591 STACK_PUSH_REPEAT_INC(si); 02592 } 02593 } 02594 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { 02595 STACK_PUSH_REPEAT_INC(si); 02596 } 02597 MOP_OUT; 02598 CHECK_INTERRUPT_IN_MATCH_AT; 02599 continue; 02600 break; 02601 02602 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); 02603 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02604 STACK_GET_REPEAT(mem, stkp); 02605 si = GET_STACK_INDEX(stkp); 02606 goto repeat_inc_ng; 02607 break; 02608 02609 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); 02610 STACK_PUSH_POS(s, sprev); 02611 MOP_OUT; 02612 continue; 02613 break; 02614 02615 case OP_POP_POS: MOP_IN(OP_POP_POS); 02616 { 02617 STACK_POS_END(stkp); 02618 s = stkp->u.state.pstr; 02619 sprev = stkp->u.state.pstr_prev; 02620 } 02621 MOP_OUT; 02622 continue; 02623 break; 02624 02625 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); 02626 GET_RELADDR_INC(addr, p); 02627 STACK_PUSH_POS_NOT(p + addr, s, sprev); 02628 MOP_OUT; 02629 continue; 02630 break; 02631 02632 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); 02633 STACK_POP_TIL_POS_NOT; 02634 goto fail; 02635 break; 02636 02637 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); 02638 STACK_PUSH_STOP_BT; 02639 MOP_OUT; 02640 continue; 02641 break; 02642 02643 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); 02644 STACK_STOP_BT_END; 02645 MOP_OUT; 02646 continue; 02647 break; 02648 02649 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); 02650 GET_LENGTH_INC(tlen, p); 02651 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); 02652 if (IS_NULL(s)) goto fail; 02653 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); 02654 MOP_OUT; 02655 continue; 02656 break; 02657 02658 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); 02659 GET_RELADDR_INC(addr, p); 02660 GET_LENGTH_INC(tlen, p); 02661 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); 02662 if (IS_NULL(q)) { 02663 /* too short case -> success. ex. /(?<!XXX)a/.match("a") 02664 If you want to change to fail, replace following line. */ 02665 p += addr; 02666 /* goto fail; */ 02667 } 02668 else { 02669 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); 02670 s = q; 02671 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); 02672 } 02673 MOP_OUT; 02674 continue; 02675 break; 02676 02677 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); 02678 STACK_POP_TIL_LOOK_BEHIND_NOT; 02679 goto fail; 02680 break; 02681 02682 #ifdef USE_SUBEXP_CALL 02683 case OP_CALL: MOP_IN(OP_CALL); 02684 GET_ABSADDR_INC(addr, p); 02685 STACK_PUSH_CALL_FRAME(p); 02686 p = reg->p + addr; 02687 MOP_OUT; 02688 continue; 02689 break; 02690 02691 case OP_RETURN: MOP_IN(OP_RETURN); 02692 STACK_RETURN(p); 02693 STACK_PUSH_RETURN; 02694 MOP_OUT; 02695 continue; 02696 break; 02697 #endif 02698 02699 case OP_FINISH: 02700 goto finish; 02701 break; 02702 02703 fail: 02704 MOP_OUT; 02705 /* fall */ 02706 case OP_FAIL: MOP_IN(OP_FAIL); 02707 STACK_POP; 02708 p = stk->u.state.pcode; 02709 s = stk->u.state.pstr; 02710 sprev = stk->u.state.pstr_prev; 02711 02712 #ifdef USE_COMBINATION_EXPLOSION_CHECK 02713 if (stk->u.state.state_check != 0) { 02714 stk->type = STK_STATE_CHECK_MARK; 02715 stk++; 02716 } 02717 #endif 02718 02719 MOP_OUT; 02720 continue; 02721 break; 02722 02723 default: 02724 goto bytecode_error; 02725 02726 } /* end of switch */ 02727 sprev = sbegin; 02728 } /* end of while(1) */ 02729 02730 finish: 02731 STACK_SAVE; 02732 return best_len; 02733 02734 #ifdef ONIG_DEBUG 02735 stack_error: 02736 STACK_SAVE; 02737 return ONIGERR_STACK_BUG; 02738 #endif 02739 02740 bytecode_error: 02741 STACK_SAVE; 02742 return ONIGERR_UNDEFINED_BYTECODE; 02743 02744 unexpected_bytecode_error: 02745 STACK_SAVE; 02746 return ONIGERR_UNEXPECTED_BYTECODE; 02747 } 02748 02749 02750 static UChar* 02751 slow_search(OnigEncoding enc, UChar* target, UChar* target_end, 02752 const UChar* text, const UChar* text_end, UChar* text_range) 02753 { 02754 UChar *t, *p, *s, *end; 02755 02756 end = (UChar* )text_end; 02757 end -= target_end - target - 1; 02758 if (end > text_range) 02759 end = text_range; 02760 02761 s = (UChar* )text; 02762 02763 if (enc->max_enc_len == enc->min_enc_len) { 02764 int n = enc->max_enc_len; 02765 02766 while (s < end) { 02767 if (*s == *target) { 02768 p = s + 1; 02769 t = target + 1; 02770 if (target_end == t || memcmp(t, p, target_end - t) == 0) 02771 return s; 02772 } 02773 s += n; 02774 } 02775 return (UChar*)NULL; 02776 } 02777 while (s < end) { 02778 if (*s == *target) { 02779 p = s + 1; 02780 t = target + 1; 02781 if (target_end == t || memcmp(t, p, target_end - t) == 0) 02782 return s; 02783 } 02784 s += enclen(enc, s, text_end); 02785 } 02786 02787 return (UChar* )NULL; 02788 } 02789 02790 static int 02791 str_lower_case_match(OnigEncoding enc, int case_fold_flag, 02792 const UChar* t, const UChar* tend, 02793 const UChar* p, const UChar* end) 02794 { 02795 int lowlen; 02796 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 02797 02798 while (t < tend) { 02799 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); 02800 q = lowbuf; 02801 while (lowlen > 0) { 02802 if (*t++ != *q++) return 0; 02803 lowlen--; 02804 } 02805 } 02806 02807 return 1; 02808 } 02809 02810 static UChar* 02811 slow_search_ic(OnigEncoding enc, int case_fold_flag, 02812 UChar* target, UChar* target_end, 02813 const UChar* text, const UChar* text_end, UChar* text_range) 02814 { 02815 UChar *s, *end; 02816 02817 end = (UChar* )text_end; 02818 end -= target_end - target - 1; 02819 if (end > text_range) 02820 end = text_range; 02821 02822 s = (UChar* )text; 02823 02824 while (s < end) { 02825 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 02826 s, text_end)) 02827 return s; 02828 02829 s += enclen(enc, s, text_end); 02830 } 02831 02832 return (UChar* )NULL; 02833 } 02834 02835 static UChar* 02836 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, 02837 const UChar* text, const UChar* adjust_text, 02838 const UChar* text_end, const UChar* text_start) 02839 { 02840 UChar *t, *p, *s; 02841 02842 s = (UChar* )text_end; 02843 s -= (target_end - target); 02844 if (s > text_start) 02845 s = (UChar* )text_start; 02846 else 02847 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); 02848 02849 while (s >= text) { 02850 if (*s == *target) { 02851 p = s + 1; 02852 t = target + 1; 02853 while (t < target_end) { 02854 if (*t != *p++) 02855 break; 02856 t++; 02857 } 02858 if (t == target_end) 02859 return s; 02860 } 02861 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); 02862 } 02863 02864 return (UChar* )NULL; 02865 } 02866 02867 static UChar* 02868 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, 02869 UChar* target, UChar* target_end, 02870 const UChar* text, const UChar* adjust_text, 02871 const UChar* text_end, const UChar* text_start) 02872 { 02873 UChar *s; 02874 02875 s = (UChar* )text_end; 02876 s -= (target_end - target); 02877 if (s > text_start) 02878 s = (UChar* )text_start; 02879 else 02880 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); 02881 02882 while (s >= text) { 02883 if (str_lower_case_match(enc, case_fold_flag, 02884 target, target_end, s, text_end)) 02885 return s; 02886 02887 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); 02888 } 02889 02890 return (UChar* )NULL; 02891 } 02892 02893 static UChar* 02894 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, 02895 const UChar* text, const UChar* text_end, 02896 const UChar* text_range) 02897 { 02898 const UChar *s, *se, *t, *p, *end; 02899 const UChar *tail; 02900 ptrdiff_t skip, tlen1; 02901 02902 #ifdef ONIG_DEBUG_SEARCH 02903 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n", 02904 text, text_end, text_range); 02905 #endif 02906 02907 tail = target_end - 1; 02908 tlen1 = tail - target; 02909 end = text_range; 02910 if (end + tlen1 > text_end) 02911 end = text_end - tlen1; 02912 02913 s = text; 02914 02915 if (IS_NULL(reg->int_map)) { 02916 while (s < end) { 02917 p = se = s + tlen1; 02918 t = tail; 02919 while (*p == *t) { 02920 if (t == target) return (UChar* )s; 02921 p--; t--; 02922 } 02923 skip = reg->map[*se]; 02924 t = s; 02925 do { 02926 s += enclen(reg->enc, s, end); 02927 } while ((s - t) < skip && s < end); 02928 } 02929 } 02930 else { 02931 while (s < end) { 02932 p = se = s + tlen1; 02933 t = tail; 02934 while (*p == *t) { 02935 if (t == target) return (UChar* )s; 02936 p--; t--; 02937 } 02938 skip = reg->int_map[*se]; 02939 t = s; 02940 do { 02941 s += enclen(reg->enc, s, end); 02942 } while ((s - t) < skip && s < end); 02943 } 02944 } 02945 02946 return (UChar* )NULL; 02947 } 02948 02949 static UChar* 02950 bm_search(regex_t* reg, const UChar* target, const UChar* target_end, 02951 const UChar* text, const UChar* text_end, const UChar* text_range) 02952 { 02953 const UChar *s, *t, *p, *end; 02954 const UChar *tail; 02955 02956 #ifdef ONIG_DEBUG_SEARCH 02957 fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n", 02958 text, text_end, text_range); 02959 #endif 02960 02961 end = text_range + (target_end - target) - 1; 02962 if (end > text_end) 02963 end = text_end; 02964 02965 tail = target_end - 1; 02966 s = text + (target_end - target) - 1; 02967 if (IS_NULL(reg->int_map)) { 02968 while (s < end) { 02969 p = s; 02970 t = tail; 02971 #ifdef ONIG_DEBUG_SEARCH 02972 fprintf(stderr, "bm_search_loop: pos: %d %s\n", 02973 (int)(s - text), s); 02974 #endif 02975 while (*p == *t) { 02976 if (t == target) return (UChar* )p; 02977 p--; t--; 02978 } 02979 s += reg->map[*s]; 02980 } 02981 } 02982 else { /* see int_map[] */ 02983 while (s < end) { 02984 p = s; 02985 t = tail; 02986 while (*p == *t) { 02987 if (t == target) return (UChar* )p; 02988 p--; t--; 02989 } 02990 s += reg->int_map[*s]; 02991 } 02992 } 02993 return (UChar* )NULL; 02994 } 02995 02996 static int 02997 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, 02998 int** skip) 02999 03000 { 03001 int i, len; 03002 03003 if (IS_NULL(*skip)) { 03004 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 03005 if (IS_NULL(*skip)) return ONIGERR_MEMORY; 03006 } 03007 03008 len = (int)(end - s); 03009 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 03010 (*skip)[i] = len; 03011 03012 for (i = len - 1; i > 0; i--) 03013 (*skip)[s[i]] = i; 03014 03015 return 0; 03016 } 03017 03018 static UChar* 03019 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, 03020 const UChar* text, const UChar* adjust_text, 03021 const UChar* text_end, const UChar* text_start) 03022 { 03023 const UChar *s, *t, *p; 03024 03025 s = text_end - (target_end - target); 03026 if (text_start < s) 03027 s = text_start; 03028 else 03029 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); 03030 03031 while (s >= text) { 03032 p = s; 03033 t = target; 03034 while (t < target_end && *p == *t) { 03035 p++; t++; 03036 } 03037 if (t == target_end) 03038 return (UChar* )s; 03039 03040 s -= reg->int_map_backward[*s]; 03041 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); 03042 } 03043 03044 return (UChar* )NULL; 03045 } 03046 03047 static UChar* 03048 map_search(OnigEncoding enc, UChar map[], 03049 const UChar* text, const UChar* text_range, const UChar* text_end) 03050 { 03051 const UChar *s = text; 03052 03053 while (s < text_range) { 03054 if (map[*s]) return (UChar* )s; 03055 03056 s += enclen(enc, s, text_end); 03057 } 03058 return (UChar* )NULL; 03059 } 03060 03061 static UChar* 03062 map_search_backward(OnigEncoding enc, UChar map[], 03063 const UChar* text, const UChar* adjust_text, 03064 const UChar* text_start, const UChar* text_end) 03065 { 03066 const UChar *s = text_start; 03067 03068 while (s >= text) { 03069 if (map[*s]) return (UChar* )s; 03070 03071 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end); 03072 } 03073 return (UChar* )NULL; 03074 } 03075 03076 extern long 03077 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, 03078 OnigOptionType option) 03079 { 03080 long r; 03081 UChar *prev; 03082 OnigMatchArg msa; 03083 03084 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) 03085 start: 03086 THREAD_ATOMIC_START; 03087 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { 03088 ONIG_STATE_INC(reg); 03089 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 03090 onig_chain_reduce(reg); 03091 ONIG_STATE_INC(reg); 03092 } 03093 } 03094 else { 03095 int n; 03096 03097 THREAD_ATOMIC_END; 03098 n = 0; 03099 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { 03100 if (++n > THREAD_PASS_LIMIT_COUNT) 03101 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; 03102 THREAD_PASS; 03103 } 03104 goto start; 03105 } 03106 THREAD_ATOMIC_END; 03107 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ 03108 03109 MATCH_ARG_INIT(msa, option, region, at); 03110 #ifdef USE_COMBINATION_EXPLOSION_CHECK 03111 { 03112 int offset = at - str; 03113 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); 03114 } 03115 #endif 03116 03117 if (region 03118 #ifdef USE_POSIX_API_REGION_OPTION 03119 && !IS_POSIX_REGION(option) 03120 #endif 03121 ) { 03122 r = onig_region_resize_clear(region, reg->num_mem + 1); 03123 } 03124 else 03125 r = 0; 03126 03127 if (r == 0) { 03128 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); 03129 r = match_at(reg, str, end, 03130 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 03131 end, 03132 #endif 03133 at, prev, &msa); 03134 } 03135 03136 MATCH_ARG_FREE(msa); 03137 ONIG_STATE_DEC_THREAD(reg); 03138 return r; 03139 } 03140 03141 static int 03142 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, 03143 UChar* range, UChar** low, UChar** high, UChar** low_prev) 03144 { 03145 UChar *p, *pprev = (UChar* )NULL; 03146 03147 #ifdef ONIG_DEBUG_SEARCH 03148 fprintf(stderr, "forward_search_range: str: %"PRIuPTR", end: %"PRIuPTR", s: %"PRIuPTR", range: %"PRIuPTR"\n", 03149 str, end, s, range); 03150 #endif 03151 03152 p = s; 03153 if (reg->dmin > 0) { 03154 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { 03155 p += reg->dmin; 03156 } 03157 else { 03158 UChar *q = p + reg->dmin; 03159 while (p < q) p += enclen(reg->enc, p, end); 03160 } 03161 } 03162 03163 retry: 03164 switch (reg->optimize) { 03165 case ONIG_OPTIMIZE_EXACT: 03166 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); 03167 break; 03168 case ONIG_OPTIMIZE_EXACT_IC: 03169 p = slow_search_ic(reg->enc, reg->case_fold_flag, 03170 reg->exact, reg->exact_end, p, end, range); 03171 break; 03172 03173 case ONIG_OPTIMIZE_EXACT_BM: 03174 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); 03175 break; 03176 03177 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: 03178 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); 03179 break; 03180 03181 case ONIG_OPTIMIZE_MAP: 03182 p = map_search(reg->enc, reg->map, p, range, end); 03183 break; 03184 } 03185 03186 if (p && p < range) { 03187 if (p - reg->dmin < s) { 03188 retry_gate: 03189 pprev = p; 03190 p += enclen(reg->enc, p, end); 03191 goto retry; 03192 } 03193 03194 if (reg->sub_anchor) { 03195 UChar* prev; 03196 03197 switch (reg->sub_anchor) { 03198 case ANCHOR_BEGIN_LINE: 03199 if (!ON_STR_BEGIN(p)) { 03200 prev = onigenc_get_prev_char_head(reg->enc, 03201 (pprev ? pprev : str), p, end); 03202 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) 03203 goto retry_gate; 03204 } 03205 break; 03206 03207 case ANCHOR_END_LINE: 03208 if (ON_STR_END(p)) { 03209 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 03210 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, 03211 (pprev ? pprev : str), p); 03212 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) 03213 goto retry_gate; 03214 #endif 03215 } 03216 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) 03217 #ifdef USE_CRNL_AS_LINE_TERMINATOR 03218 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) 03219 #endif 03220 ) 03221 goto retry_gate; 03222 break; 03223 } 03224 } 03225 03226 if (reg->dmax == 0) { 03227 *low = p; 03228 if (low_prev) { 03229 if (*low > s) 03230 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); 03231 else 03232 *low_prev = onigenc_get_prev_char_head(reg->enc, 03233 (pprev ? pprev : str), p, end); 03234 } 03235 } 03236 else { 03237 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 03238 *low = p - reg->dmax; 03239 if (*low > s) { 03240 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, 03241 *low, end, (const UChar** )low_prev); 03242 if (low_prev && IS_NULL(*low_prev)) 03243 *low_prev = onigenc_get_prev_char_head(reg->enc, 03244 (pprev ? pprev : s), *low, end); 03245 } 03246 else { 03247 if (low_prev) 03248 *low_prev = onigenc_get_prev_char_head(reg->enc, 03249 (pprev ? pprev : str), *low, end); 03250 } 03251 } 03252 } 03253 /* no needs to adjust *high, *high is used as range check only */ 03254 *high = p - reg->dmin; 03255 03256 #ifdef ONIG_DEBUG_SEARCH 03257 fprintf(stderr, 03258 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", 03259 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); 03260 #endif 03261 return 1; /* success */ 03262 } 03263 03264 return 0; /* fail */ 03265 } 03266 03267 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, 03268 int** skip)); 03269 03270 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 03271 03272 static long 03273 backward_search_range(regex_t* reg, const UChar* str, const UChar* end, 03274 UChar* s, const UChar* range, UChar* adjrange, 03275 UChar** low, UChar** high) 03276 { 03277 int r; 03278 UChar *p; 03279 03280 range += reg->dmin; 03281 p = s; 03282 03283 retry: 03284 switch (reg->optimize) { 03285 case ONIG_OPTIMIZE_EXACT: 03286 exact_method: 03287 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, 03288 range, adjrange, end, p); 03289 break; 03290 03291 case ONIG_OPTIMIZE_EXACT_IC: 03292 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, 03293 reg->exact, reg->exact_end, 03294 range, adjrange, end, p); 03295 break; 03296 03297 case ONIG_OPTIMIZE_EXACT_BM: 03298 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: 03299 if (IS_NULL(reg->int_map_backward)) { 03300 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) 03301 goto exact_method; 03302 03303 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, 03304 &(reg->int_map_backward)); 03305 if (r) return r; 03306 } 03307 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, 03308 end, p); 03309 break; 03310 03311 case ONIG_OPTIMIZE_MAP: 03312 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end); 03313 break; 03314 } 03315 03316 if (p) { 03317 if (reg->sub_anchor) { 03318 UChar* prev; 03319 03320 switch (reg->sub_anchor) { 03321 case ANCHOR_BEGIN_LINE: 03322 if (!ON_STR_BEGIN(p)) { 03323 prev = onigenc_get_prev_char_head(reg->enc, str, p, end); 03324 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { 03325 p = prev; 03326 goto retry; 03327 } 03328 } 03329 break; 03330 03331 case ANCHOR_END_LINE: 03332 if (ON_STR_END(p)) { 03333 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 03334 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); 03335 if (IS_NULL(prev)) goto fail; 03336 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { 03337 p = prev; 03338 goto retry; 03339 } 03340 #endif 03341 } 03342 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) 03343 #ifdef USE_CRNL_AS_LINE_TERMINATOR 03344 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) 03345 #endif 03346 ) { 03347 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); 03348 if (IS_NULL(p)) goto fail; 03349 goto retry; 03350 } 03351 break; 03352 } 03353 } 03354 03355 /* no needs to adjust *high, *high is used as range check only */ 03356 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 03357 *low = p - reg->dmax; 03358 *high = p - reg->dmin; 03359 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); 03360 } 03361 03362 #ifdef ONIG_DEBUG_SEARCH 03363 fprintf(stderr, "backward_search_range: low: %d, high: %d\n", 03364 (int )(*low - str), (int )(*high - str)); 03365 #endif 03366 return 1; /* success */ 03367 } 03368 03369 fail: 03370 #ifdef ONIG_DEBUG_SEARCH 03371 fprintf(stderr, "backward_search_range: fail.\n"); 03372 #endif 03373 return 0; /* fail */ 03374 } 03375 03376 03377 extern long 03378 onig_search(regex_t* reg, const UChar* str, const UChar* end, 03379 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) 03380 { 03381 long r; 03382 UChar *s, *prev; 03383 OnigMatchArg msa; 03384 const UChar *orig_start = start; 03385 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 03386 const UChar *orig_range = range; 03387 #endif 03388 03389 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) 03390 start: 03391 THREAD_ATOMIC_START; 03392 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { 03393 ONIG_STATE_INC(reg); 03394 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 03395 onig_chain_reduce(reg); 03396 ONIG_STATE_INC(reg); 03397 } 03398 } 03399 else { 03400 int n; 03401 03402 THREAD_ATOMIC_END; 03403 n = 0; 03404 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { 03405 if (++n > THREAD_PASS_LIMIT_COUNT) 03406 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; 03407 THREAD_PASS; 03408 } 03409 goto start; 03410 } 03411 THREAD_ATOMIC_END; 03412 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ 03413 03414 #ifdef ONIG_DEBUG_SEARCH 03415 fprintf(stderr, 03416 "onig_search (entry point): str: %"PRIuPTR", end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n", 03417 str, end - str, start - str, range - str); 03418 #endif 03419 03420 if (region 03421 #ifdef USE_POSIX_API_REGION_OPTION 03422 && !IS_POSIX_REGION(option) 03423 #endif 03424 ) { 03425 r = onig_region_resize_clear(region, reg->num_mem + 1); 03426 if (r) goto finish_no_msa; 03427 } 03428 03429 if (start > end || start < str) goto mismatch_no_msa; 03430 03431 03432 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 03433 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 03434 #define MATCH_AND_RETURN_CHECK(upper_range) \ 03435 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ 03436 if (r != ONIG_MISMATCH) {\ 03437 if (r >= 0) {\ 03438 if (! IS_FIND_LONGEST(reg->options)) {\ 03439 goto match;\ 03440 }\ 03441 }\ 03442 else goto finish; /* error */ \ 03443 } 03444 #else 03445 #define MATCH_AND_RETURN_CHECK(upper_range) \ 03446 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ 03447 if (r != ONIG_MISMATCH) {\ 03448 if (r >= 0) {\ 03449 goto match;\ 03450 }\ 03451 else goto finish; /* error */ \ 03452 } 03453 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ 03454 #else 03455 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 03456 #define MATCH_AND_RETURN_CHECK(none) \ 03457 r = match_at(reg, str, end, s, prev, &msa);\ 03458 if (r != ONIG_MISMATCH) {\ 03459 if (r >= 0) {\ 03460 if (! IS_FIND_LONGEST(reg->options)) {\ 03461 goto match;\ 03462 }\ 03463 }\ 03464 else goto finish; /* error */ \ 03465 } 03466 #else 03467 #define MATCH_AND_RETURN_CHECK(none) \ 03468 r = match_at(reg, str, end, s, prev, &msa);\ 03469 if (r != ONIG_MISMATCH) {\ 03470 if (r >= 0) {\ 03471 goto match;\ 03472 }\ 03473 else goto finish; /* error */ \ 03474 } 03475 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ 03476 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ 03477 03478 03479 /* anchor optimize: resume search range */ 03480 if (reg->anchor != 0 && str < end) { 03481 UChar *min_semi_end, *max_semi_end; 03482 03483 if (reg->anchor & ANCHOR_BEGIN_POSITION) { 03484 /* search start-position only */ 03485 begin_position: 03486 if (range > start) 03487 range = start + 1; 03488 else 03489 range = start; 03490 } 03491 else if (reg->anchor & ANCHOR_BEGIN_BUF) { 03492 /* search str-position only */ 03493 if (range > start) { 03494 if (start != str) goto mismatch_no_msa; 03495 range = str + 1; 03496 } 03497 else { 03498 if (range <= str) { 03499 start = str; 03500 range = str; 03501 } 03502 else 03503 goto mismatch_no_msa; 03504 } 03505 } 03506 else if (reg->anchor & ANCHOR_END_BUF) { 03507 min_semi_end = max_semi_end = (UChar* )end; 03508 03509 end_buf: 03510 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) 03511 goto mismatch_no_msa; 03512 03513 if (range > start) { 03514 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { 03515 start = min_semi_end - reg->anchor_dmax; 03516 if (start < end) 03517 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); 03518 else { /* match with empty at end */ 03519 start = onigenc_get_prev_char_head(reg->enc, str, end, end); 03520 } 03521 } 03522 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { 03523 range = max_semi_end - reg->anchor_dmin + 1; 03524 } 03525 03526 if (start >= range) goto mismatch_no_msa; 03527 } 03528 else { 03529 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { 03530 range = min_semi_end - reg->anchor_dmax; 03531 } 03532 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { 03533 start = max_semi_end - reg->anchor_dmin; 03534 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); 03535 } 03536 if (range > start) goto mismatch_no_msa; 03537 } 03538 } 03539 else if (reg->anchor & ANCHOR_SEMI_END_BUF) { 03540 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1); 03541 03542 max_semi_end = (UChar* )end; 03543 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { 03544 min_semi_end = pre_end; 03545 03546 #ifdef USE_CRNL_AS_LINE_TERMINATOR 03547 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); 03548 if (IS_NOT_NULL(pre_end) && 03549 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { 03550 min_semi_end = pre_end; 03551 } 03552 #endif 03553 if (min_semi_end > str && start <= min_semi_end) { 03554 goto end_buf; 03555 } 03556 } 03557 else { 03558 min_semi_end = (UChar* )end; 03559 goto end_buf; 03560 } 03561 } 03562 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { 03563 goto begin_position; 03564 } 03565 } 03566 else if (str == end) { /* empty string */ 03567 static const UChar address_for_empty_string[] = ""; 03568 03569 #ifdef ONIG_DEBUG_SEARCH 03570 fprintf(stderr, "onig_search: empty string.\n"); 03571 #endif 03572 03573 if (reg->threshold_len == 0) { 03574 start = end = str = address_for_empty_string; 03575 s = (UChar* )start; 03576 prev = (UChar* )NULL; 03577 03578 MATCH_ARG_INIT(msa, option, region, start); 03579 #ifdef USE_COMBINATION_EXPLOSION_CHECK 03580 msa.state_check_buff = (void* )0; 03581 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ 03582 #endif 03583 MATCH_AND_RETURN_CHECK(end); 03584 goto mismatch; 03585 } 03586 goto mismatch_no_msa; 03587 } 03588 03589 #ifdef ONIG_DEBUG_SEARCH 03590 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", 03591 (int )(end - str), (int )(start - str), (int )(range - str)); 03592 #endif 03593 03594 MATCH_ARG_INIT(msa, option, region, orig_start); 03595 #ifdef USE_COMBINATION_EXPLOSION_CHECK 03596 { 03597 int offset = (MIN(start, range) - str); 03598 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); 03599 } 03600 #endif 03601 03602 s = (UChar* )start; 03603 if (range > start) { /* forward search */ 03604 if (s > str) 03605 prev = onigenc_get_prev_char_head(reg->enc, str, s, end); 03606 else 03607 prev = (UChar* )NULL; 03608 03609 if (reg->optimize != ONIG_OPTIMIZE_NONE) { 03610 UChar *sch_range, *low, *high, *low_prev; 03611 03612 sch_range = (UChar* )range; 03613 if (reg->dmax != 0) { 03614 if (reg->dmax == ONIG_INFINITE_DISTANCE) 03615 sch_range = (UChar* )end; 03616 else { 03617 sch_range += reg->dmax; 03618 if (sch_range > end) sch_range = (UChar* )end; 03619 } 03620 } 03621 03622 if ((end - start) < reg->threshold_len) 03623 goto mismatch; 03624 03625 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 03626 do { 03627 if (! forward_search_range(reg, str, end, s, sch_range, 03628 &low, &high, &low_prev)) goto mismatch; 03629 if (s < low) { 03630 s = low; 03631 prev = low_prev; 03632 } 03633 while (s <= high) { 03634 MATCH_AND_RETURN_CHECK(orig_range); 03635 prev = s; 03636 s += enclen(reg->enc, s, end); 03637 } 03638 } while (s < range); 03639 goto mismatch; 03640 } 03641 else { /* check only. */ 03642 if (! forward_search_range(reg, str, end, s, sch_range, 03643 &low, &high, (UChar** )NULL)) goto mismatch; 03644 03645 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { 03646 do { 03647 MATCH_AND_RETURN_CHECK(orig_range); 03648 prev = s; 03649 s += enclen(reg->enc, s, end); 03650 } while (s < range); 03651 goto mismatch; 03652 } 03653 } 03654 } 03655 03656 do { 03657 MATCH_AND_RETURN_CHECK(orig_range); 03658 prev = s; 03659 s += enclen(reg->enc, s, end); 03660 } while (s < range); 03661 03662 if (s == range) { /* because empty match with /$/. */ 03663 MATCH_AND_RETURN_CHECK(orig_range); 03664 } 03665 } 03666 else { /* backward search */ 03667 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 03668 if (orig_start < end) 03669 orig_start += enclen(reg->enc, orig_start, end); /* is upper range */ 03670 #endif 03671 03672 if (reg->optimize != ONIG_OPTIMIZE_NONE) { 03673 UChar *low, *high, *adjrange, *sch_start; 03674 03675 if (range < end) 03676 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); 03677 else 03678 adjrange = (UChar* )end; 03679 03680 if (reg->dmax != ONIG_INFINITE_DISTANCE && 03681 (end - range) >= reg->threshold_len) { 03682 do { 03683 sch_start = s + reg->dmax; 03684 if (sch_start > end) sch_start = (UChar* )end; 03685 if (backward_search_range(reg, str, end, sch_start, range, adjrange, 03686 &low, &high) <= 0) 03687 goto mismatch; 03688 03689 if (s > high) 03690 s = high; 03691 03692 while (s >= low) { 03693 prev = onigenc_get_prev_char_head(reg->enc, str, s, end); 03694 MATCH_AND_RETURN_CHECK(orig_start); 03695 s = prev; 03696 } 03697 } while (s >= range); 03698 goto mismatch; 03699 } 03700 else { /* check only. */ 03701 if ((end - range) < reg->threshold_len) goto mismatch; 03702 03703 sch_start = s; 03704 if (reg->dmax != 0) { 03705 if (reg->dmax == ONIG_INFINITE_DISTANCE) 03706 sch_start = (UChar* )end; 03707 else { 03708 sch_start += reg->dmax; 03709 if (sch_start > end) sch_start = (UChar* )end; 03710 else 03711 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, 03712 start, sch_start, end); 03713 } 03714 } 03715 if (backward_search_range(reg, str, end, sch_start, range, adjrange, 03716 &low, &high) <= 0) goto mismatch; 03717 } 03718 } 03719 03720 do { 03721 prev = onigenc_get_prev_char_head(reg->enc, str, s, end); 03722 MATCH_AND_RETURN_CHECK(orig_start); 03723 s = prev; 03724 } while (s >= range); 03725 } 03726 03727 mismatch: 03728 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 03729 if (IS_FIND_LONGEST(reg->options)) { 03730 if (msa.best_len >= 0) { 03731 s = msa.best_s; 03732 goto match; 03733 } 03734 } 03735 #endif 03736 r = ONIG_MISMATCH; 03737 03738 finish: 03739 MATCH_ARG_FREE(msa); 03740 ONIG_STATE_DEC_THREAD(reg); 03741 03742 /* If result is mismatch and no FIND_NOT_EMPTY option, 03743 then the region is not setted in match_at(). */ 03744 if (IS_FIND_NOT_EMPTY(reg->options) && region 03745 #ifdef USE_POSIX_API_REGION_OPTION 03746 && !IS_POSIX_REGION(option) 03747 #endif 03748 ) { 03749 onig_region_clear(region); 03750 } 03751 03752 #ifdef ONIG_DEBUG 03753 if (r != ONIG_MISMATCH) 03754 fprintf(stderr, "onig_search: error %d\n", r); 03755 #endif 03756 return r; 03757 03758 mismatch_no_msa: 03759 r = ONIG_MISMATCH; 03760 finish_no_msa: 03761 ONIG_STATE_DEC_THREAD(reg); 03762 #ifdef ONIG_DEBUG 03763 if (r != ONIG_MISMATCH) 03764 fprintf(stderr, "onig_search: error %d\n", r); 03765 #endif 03766 return r; 03767 03768 match: 03769 ONIG_STATE_DEC_THREAD(reg); 03770 MATCH_ARG_FREE(msa); 03771 return s - str; 03772 } 03773 03774 extern OnigEncoding 03775 onig_get_encoding(regex_t* reg) 03776 { 03777 return reg->enc; 03778 } 03779 03780 extern OnigOptionType 03781 onig_get_options(regex_t* reg) 03782 { 03783 return reg->options; 03784 } 03785 03786 extern OnigCaseFoldType 03787 onig_get_case_fold_flag(regex_t* reg) 03788 { 03789 return reg->case_fold_flag; 03790 } 03791 03792 extern const OnigSyntaxType* 03793 onig_get_syntax(regex_t* reg) 03794 { 03795 return reg->syntax; 03796 } 03797 03798 extern int 03799 onig_number_of_captures(regex_t* reg) 03800 { 03801 return reg->num_mem; 03802 } 03803 03804 extern int 03805 onig_number_of_capture_histories(regex_t* reg) 03806 { 03807 #ifdef USE_CAPTURE_HISTORY 03808 int i, n; 03809 03810 n = 0; 03811 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { 03812 if (BIT_STATUS_AT(reg->capture_history, i) != 0) 03813 n++; 03814 } 03815 return n; 03816 #else 03817 return 0; 03818 #endif 03819 } 03820 03821 extern void 03822 onig_copy_encoding(OnigEncoding to, OnigEncoding from) 03823 { 03824 *to = *from; 03825 } 03826 03827