Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /********************************************************************** 00002 regcomp.c - Oniguruma (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * All rights reserved. 00007 * 00008 * Redistribution and use in source and binary forms, with or without 00009 * modification, are permitted provided that the following conditions 00010 * are met: 00011 * 1. Redistributions of source code must retain the above copyright 00012 * notice, this list of conditions and the following disclaimer. 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in the 00015 * documentation and/or other materials provided with the distribution. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00018 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00020 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00021 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00022 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00023 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00024 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00025 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00026 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00027 * SUCH DAMAGE. 00028 */ 00029 00030 #include "regparse.h" 00031 00032 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; 00033 00034 extern OnigCaseFoldType 00035 onig_get_default_case_fold_flag(void) 00036 { 00037 return OnigDefaultCaseFoldFlag; 00038 } 00039 00040 extern int 00041 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) 00042 { 00043 OnigDefaultCaseFoldFlag = case_fold_flag; 00044 return 0; 00045 } 00046 00047 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 00048 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; 00049 #endif 00050 00051 static UChar* 00052 str_dup(UChar* s, UChar* end) 00053 { 00054 ptrdiff_t len = end - s; 00055 00056 if (len > 0) { 00057 UChar* r = (UChar* )xmalloc(len + 1); 00058 CHECK_NULL_RETURN(r); 00059 xmemcpy(r, s, len); 00060 r[len] = (UChar )0; 00061 return r; 00062 } 00063 else return NULL; 00064 } 00065 00066 static void 00067 swap_node(Node* a, Node* b) 00068 { 00069 Node c; 00070 c = *a; *a = *b; *b = c; 00071 00072 if (NTYPE(a) == NT_STR) { 00073 StrNode* sn = NSTR(a); 00074 if (sn->capa == 0) { 00075 size_t len = sn->end - sn->s; 00076 sn->s = sn->buf; 00077 sn->end = sn->s + len; 00078 } 00079 } 00080 00081 if (NTYPE(b) == NT_STR) { 00082 StrNode* sn = NSTR(b); 00083 if (sn->capa == 0) { 00084 size_t len = sn->end - sn->s; 00085 sn->s = sn->buf; 00086 sn->end = sn->s + len; 00087 } 00088 } 00089 } 00090 00091 static OnigDistance 00092 distance_add(OnigDistance d1, OnigDistance d2) 00093 { 00094 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) 00095 return ONIG_INFINITE_DISTANCE; 00096 else { 00097 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; 00098 else return ONIG_INFINITE_DISTANCE; 00099 } 00100 } 00101 00102 static OnigDistance 00103 distance_multiply(OnigDistance d, int m) 00104 { 00105 if (m == 0) return 0; 00106 00107 if (d < ONIG_INFINITE_DISTANCE / m) 00108 return d * m; 00109 else 00110 return ONIG_INFINITE_DISTANCE; 00111 } 00112 00113 static int 00114 bitset_is_empty(BitSetRef bs) 00115 { 00116 int i; 00117 for (i = 0; i < (int )BITSET_SIZE; i++) { 00118 if (bs[i] != 0) return 0; 00119 } 00120 return 1; 00121 } 00122 00123 #ifdef ONIG_DEBUG 00124 static int 00125 onig_is_prelude(void) 00126 { 00127 return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE")); 00128 } 00129 00130 static int 00131 bitset_on_num(BitSetRef bs) 00132 { 00133 int i, n; 00134 00135 n = 0; 00136 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 00137 if (BITSET_AT(bs, i)) n++; 00138 } 00139 return n; 00140 } 00141 #endif 00142 00143 extern int 00144 onig_bbuf_init(BBuf* buf, OnigDistance size) 00145 { 00146 if (size <= 0) { 00147 size = 0; 00148 buf->p = NULL; 00149 } 00150 else { 00151 buf->p = (UChar* )xmalloc(size); 00152 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); 00153 } 00154 00155 buf->alloc = (unsigned int)size; 00156 buf->used = 0; 00157 return 0; 00158 } 00159 00160 00161 #ifdef USE_SUBEXP_CALL 00162 00163 static int 00164 unset_addr_list_init(UnsetAddrList* uslist, int size) 00165 { 00166 UnsetAddr* p; 00167 00168 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); 00169 CHECK_NULL_RETURN_MEMERR(p); 00170 uslist->num = 0; 00171 uslist->alloc = size; 00172 uslist->us = p; 00173 return 0; 00174 } 00175 00176 static void 00177 unset_addr_list_end(UnsetAddrList* uslist) 00178 { 00179 if (IS_NOT_NULL(uslist->us)) 00180 xfree(uslist->us); 00181 } 00182 00183 static int 00184 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) 00185 { 00186 UnsetAddr* p; 00187 int size; 00188 00189 if (uslist->num >= uslist->alloc) { 00190 size = uslist->alloc * 2; 00191 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); 00192 CHECK_NULL_RETURN_MEMERR(p); 00193 uslist->alloc = size; 00194 uslist->us = p; 00195 } 00196 00197 uslist->us[uslist->num].offset = offset; 00198 uslist->us[uslist->num].target = node; 00199 uslist->num++; 00200 return 0; 00201 } 00202 #endif /* USE_SUBEXP_CALL */ 00203 00204 00205 static int 00206 add_opcode(regex_t* reg, int opcode) 00207 { 00208 BBUF_ADD1(reg, opcode); 00209 return 0; 00210 } 00211 00212 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00213 static int 00214 add_state_check_num(regex_t* reg, int num) 00215 { 00216 StateCheckNumType n = (StateCheckNumType )num; 00217 00218 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); 00219 return 0; 00220 } 00221 #endif 00222 00223 static int 00224 add_rel_addr(regex_t* reg, int addr) 00225 { 00226 RelAddrType ra = (RelAddrType )addr; 00227 00228 BBUF_ADD(reg, &ra, SIZE_RELADDR); 00229 return 0; 00230 } 00231 00232 static int 00233 add_abs_addr(regex_t* reg, int addr) 00234 { 00235 AbsAddrType ra = (AbsAddrType )addr; 00236 00237 BBUF_ADD(reg, &ra, SIZE_ABSADDR); 00238 return 0; 00239 } 00240 00241 static int 00242 add_length(regex_t* reg, OnigDistance len) 00243 { 00244 LengthType l = (LengthType )len; 00245 00246 BBUF_ADD(reg, &l, SIZE_LENGTH); 00247 return 0; 00248 } 00249 00250 static int 00251 add_mem_num(regex_t* reg, int num) 00252 { 00253 MemNumType n = (MemNumType )num; 00254 00255 BBUF_ADD(reg, &n, SIZE_MEMNUM); 00256 return 0; 00257 } 00258 00259 static int 00260 add_pointer(regex_t* reg, void* addr) 00261 { 00262 PointerType ptr = (PointerType )addr; 00263 00264 BBUF_ADD(reg, &ptr, SIZE_POINTER); 00265 return 0; 00266 } 00267 00268 static int 00269 add_option(regex_t* reg, OnigOptionType option) 00270 { 00271 BBUF_ADD(reg, &option, SIZE_OPTION); 00272 return 0; 00273 } 00274 00275 static int 00276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr) 00277 { 00278 int r; 00279 00280 r = add_opcode(reg, opcode); 00281 if (r) return r; 00282 r = add_rel_addr(reg, addr); 00283 return r; 00284 } 00285 00286 static int 00287 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len) 00288 { 00289 BBUF_ADD(reg, bytes, len); 00290 return 0; 00291 } 00292 00293 static int 00294 add_bitset(regex_t* reg, BitSetRef bs) 00295 { 00296 BBUF_ADD(reg, bs, SIZE_BITSET); 00297 return 0; 00298 } 00299 00300 static int 00301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) 00302 { 00303 int r; 00304 00305 r = add_opcode(reg, opcode); 00306 if (r) return r; 00307 r = add_option(reg, option); 00308 return r; 00309 } 00310 00311 static int compile_length_tree(Node* node, regex_t* reg); 00312 static int compile_tree(Node* node, regex_t* reg); 00313 00314 00315 #define IS_NEED_STR_LEN_OP_EXACT(op) \ 00316 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ 00317 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) 00318 00319 static int 00320 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case) 00321 { 00322 int op; 00323 00324 if (ignore_case) { 00325 switch (str_len) { 00326 case 1: op = OP_EXACT1_IC; break; 00327 default: op = OP_EXACTN_IC; break; 00328 } 00329 } 00330 else { 00331 switch (mb_len) { 00332 case 1: 00333 switch (str_len) { 00334 case 1: op = OP_EXACT1; break; 00335 case 2: op = OP_EXACT2; break; 00336 case 3: op = OP_EXACT3; break; 00337 case 4: op = OP_EXACT4; break; 00338 case 5: op = OP_EXACT5; break; 00339 default: op = OP_EXACTN; break; 00340 } 00341 break; 00342 00343 case 2: 00344 switch (str_len) { 00345 case 1: op = OP_EXACTMB2N1; break; 00346 case 2: op = OP_EXACTMB2N2; break; 00347 case 3: op = OP_EXACTMB2N3; break; 00348 default: op = OP_EXACTMB2N; break; 00349 } 00350 break; 00351 00352 case 3: 00353 op = OP_EXACTMB3N; 00354 break; 00355 00356 default: 00357 op = OP_EXACTMBN; 00358 break; 00359 } 00360 } 00361 return op; 00362 } 00363 00364 static int 00365 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) 00366 { 00367 int r; 00368 int saved_num_null_check = reg->num_null_check; 00369 00370 if (empty_info != 0) { 00371 r = add_opcode(reg, OP_NULL_CHECK_START); 00372 if (r) return r; 00373 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ 00374 if (r) return r; 00375 reg->num_null_check++; 00376 } 00377 00378 r = compile_tree(node, reg); 00379 if (r) return r; 00380 00381 if (empty_info != 0) { 00382 if (empty_info == NQ_TARGET_IS_EMPTY) 00383 r = add_opcode(reg, OP_NULL_CHECK_END); 00384 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) 00385 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); 00386 else if (empty_info == NQ_TARGET_IS_EMPTY_REC) 00387 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); 00388 00389 if (r) return r; 00390 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ 00391 } 00392 return r; 00393 } 00394 00395 #ifdef USE_SUBEXP_CALL 00396 static int 00397 compile_call(CallNode* node, regex_t* reg) 00398 { 00399 int r; 00400 00401 r = add_opcode(reg, OP_CALL); 00402 if (r) return r; 00403 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), 00404 node->target); 00405 if (r) return r; 00406 r = add_abs_addr(reg, 0 /*dummy addr.*/); 00407 return r; 00408 } 00409 #endif 00410 00411 static int 00412 compile_tree_n_times(Node* node, int n, regex_t* reg) 00413 { 00414 int i, r; 00415 00416 for (i = 0; i < n; i++) { 00417 r = compile_tree(node, reg); 00418 if (r) return r; 00419 } 00420 return 0; 00421 } 00422 00423 static int 00424 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, 00425 regex_t* reg ARG_UNUSED, int ignore_case) 00426 { 00427 int len; 00428 int op = select_str_opcode(mb_len, str_len, ignore_case); 00429 00430 len = SIZE_OPCODE; 00431 00432 if (op == OP_EXACTMBN) len += SIZE_LENGTH; 00433 if (IS_NEED_STR_LEN_OP_EXACT(op)) 00434 len += SIZE_LENGTH; 00435 00436 len += mb_len * str_len; 00437 return len; 00438 } 00439 00440 static int 00441 add_compile_string(UChar* s, int mb_len, OnigDistance str_len, 00442 regex_t* reg, int ignore_case) 00443 { 00444 int op = select_str_opcode(mb_len, str_len, ignore_case); 00445 add_opcode(reg, op); 00446 00447 if (op == OP_EXACTMBN) 00448 add_length(reg, mb_len); 00449 00450 if (IS_NEED_STR_LEN_OP_EXACT(op)) { 00451 if (op == OP_EXACTN_IC) 00452 add_length(reg, mb_len * str_len); 00453 else 00454 add_length(reg, str_len); 00455 } 00456 00457 add_bytes(reg, s, mb_len * str_len); 00458 return 0; 00459 } 00460 00461 00462 static int 00463 compile_length_string_node(Node* node, regex_t* reg) 00464 { 00465 int rlen, r, len, prev_len, slen, ambig; 00466 OnigEncoding enc = reg->enc; 00467 UChar *p, *prev; 00468 StrNode* sn; 00469 00470 sn = NSTR(node); 00471 if (sn->end <= sn->s) 00472 return 0; 00473 00474 ambig = NSTRING_IS_AMBIG(node); 00475 00476 p = prev = sn->s; 00477 prev_len = enclen(enc, p, sn->end); 00478 p += prev_len; 00479 slen = 1; 00480 rlen = 0; 00481 00482 for (; p < sn->end; ) { 00483 len = enclen(enc, p, sn->end); 00484 if (len == prev_len) { 00485 slen++; 00486 } 00487 else { 00488 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 00489 rlen += r; 00490 prev = p; 00491 slen = 1; 00492 prev_len = len; 00493 } 00494 p += len; 00495 } 00496 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 00497 rlen += r; 00498 return rlen; 00499 } 00500 00501 static int 00502 compile_length_string_raw_node(StrNode* sn, regex_t* reg) 00503 { 00504 if (sn->end <= sn->s) 00505 return 0; 00506 00507 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); 00508 } 00509 00510 static int 00511 compile_string_node(Node* node, regex_t* reg) 00512 { 00513 int r, len, prev_len, slen, ambig; 00514 OnigEncoding enc = reg->enc; 00515 UChar *p, *prev, *end; 00516 StrNode* sn; 00517 00518 sn = NSTR(node); 00519 if (sn->end <= sn->s) 00520 return 0; 00521 00522 end = sn->end; 00523 ambig = NSTRING_IS_AMBIG(node); 00524 00525 p = prev = sn->s; 00526 prev_len = enclen(enc, p, end); 00527 p += prev_len; 00528 slen = 1; 00529 00530 for (; p < end; ) { 00531 len = enclen(enc, p, end); 00532 if (len == prev_len) { 00533 slen++; 00534 } 00535 else { 00536 r = add_compile_string(prev, prev_len, slen, reg, ambig); 00537 if (r) return r; 00538 00539 prev = p; 00540 slen = 1; 00541 prev_len = len; 00542 } 00543 00544 p += len; 00545 } 00546 return add_compile_string(prev, prev_len, slen, reg, ambig); 00547 } 00548 00549 static int 00550 compile_string_raw_node(StrNode* sn, regex_t* reg) 00551 { 00552 if (sn->end <= sn->s) 00553 return 0; 00554 00555 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); 00556 } 00557 00558 static int 00559 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) 00560 { 00561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00562 add_length(reg, mbuf->used); 00563 return add_bytes(reg, mbuf->p, mbuf->used); 00564 #else 00565 int r, pad_size; 00566 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; 00567 00568 GET_ALIGNMENT_PAD_SIZE(p, pad_size); 00569 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); 00570 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 00571 00572 r = add_bytes(reg, mbuf->p, mbuf->used); 00573 00574 /* padding for return value from compile_length_cclass_node() to be fix. */ 00575 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; 00576 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 00577 return r; 00578 #endif 00579 } 00580 00581 static int 00582 compile_length_cclass_node(CClassNode* cc, regex_t* reg) 00583 { 00584 int len; 00585 00586 if (IS_NCCLASS_SHARE(cc)) { 00587 len = SIZE_OPCODE + SIZE_POINTER; 00588 return len; 00589 } 00590 00591 if (IS_NULL(cc->mbuf)) { 00592 len = SIZE_OPCODE + SIZE_BITSET; 00593 } 00594 else { 00595 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 00596 len = SIZE_OPCODE; 00597 } 00598 else { 00599 len = SIZE_OPCODE + SIZE_BITSET; 00600 } 00601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00602 len += SIZE_LENGTH + cc->mbuf->used; 00603 #else 00604 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); 00605 #endif 00606 } 00607 00608 return len; 00609 } 00610 00611 static int 00612 compile_cclass_node(CClassNode* cc, regex_t* reg) 00613 { 00614 int r; 00615 00616 if (IS_NCCLASS_SHARE(cc)) { 00617 add_opcode(reg, OP_CCLASS_NODE); 00618 r = add_pointer(reg, cc); 00619 return r; 00620 } 00621 00622 if (IS_NULL(cc->mbuf)) { 00623 if (IS_NCCLASS_NOT(cc)) 00624 add_opcode(reg, OP_CCLASS_NOT); 00625 else 00626 add_opcode(reg, OP_CCLASS); 00627 00628 r = add_bitset(reg, cc->bs); 00629 } 00630 else { 00631 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 00632 if (IS_NCCLASS_NOT(cc)) 00633 add_opcode(reg, OP_CCLASS_MB_NOT); 00634 else 00635 add_opcode(reg, OP_CCLASS_MB); 00636 00637 r = add_multi_byte_cclass(cc->mbuf, reg); 00638 } 00639 else { 00640 if (IS_NCCLASS_NOT(cc)) 00641 add_opcode(reg, OP_CCLASS_MIX_NOT); 00642 else 00643 add_opcode(reg, OP_CCLASS_MIX); 00644 00645 r = add_bitset(reg, cc->bs); 00646 if (r) return r; 00647 r = add_multi_byte_cclass(cc->mbuf, reg); 00648 } 00649 } 00650 00651 return r; 00652 } 00653 00654 static int 00655 entry_repeat_range(regex_t* reg, int id, int lower, int upper) 00656 { 00657 #define REPEAT_RANGE_ALLOC 4 00658 00659 OnigRepeatRange* p; 00660 00661 if (reg->repeat_range_alloc == 0) { 00662 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); 00663 CHECK_NULL_RETURN_MEMERR(p); 00664 reg->repeat_range = p; 00665 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; 00666 } 00667 else if (reg->repeat_range_alloc <= id) { 00668 int n; 00669 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; 00670 p = (OnigRepeatRange* )xrealloc(reg->repeat_range, 00671 sizeof(OnigRepeatRange) * n); 00672 CHECK_NULL_RETURN_MEMERR(p); 00673 reg->repeat_range = p; 00674 reg->repeat_range_alloc = n; 00675 } 00676 else { 00677 p = reg->repeat_range; 00678 } 00679 00680 p[id].lower = lower; 00681 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); 00682 return 0; 00683 } 00684 00685 static int 00686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, 00687 regex_t* reg) 00688 { 00689 int r; 00690 int num_repeat = reg->num_repeat; 00691 00692 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); 00693 if (r) return r; 00694 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 00695 reg->num_repeat++; 00696 if (r) return r; 00697 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); 00698 if (r) return r; 00699 00700 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); 00701 if (r) return r; 00702 00703 r = compile_tree_empty_check(qn->target, reg, empty_info); 00704 if (r) return r; 00705 00706 if ( 00707 #ifdef USE_SUBEXP_CALL 00708 reg->num_call > 0 || 00709 #endif 00710 IS_QUANTIFIER_IN_REPEAT(qn)) { 00711 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); 00712 } 00713 else { 00714 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); 00715 } 00716 if (r) return r; 00717 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 00718 return r; 00719 } 00720 00721 static int 00722 is_anychar_star_quantifier(QtfrNode* qn) 00723 { 00724 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && 00725 NTYPE(qn->target) == NT_CANY) 00726 return 1; 00727 else 00728 return 0; 00729 } 00730 00731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50 00732 #define CKN_ON (ckn > 0) 00733 00734 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00735 00736 static int 00737 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 00738 { 00739 int len, mod_tlen, cklen; 00740 int ckn; 00741 int infinite = IS_REPEAT_INFINITE(qn->upper); 00742 int empty_info = qn->target_empty_info; 00743 int tlen = compile_length_tree(qn->target, reg); 00744 00745 if (tlen < 0) return tlen; 00746 00747 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 00748 00749 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); 00750 00751 /* anychar repeat */ 00752 if (NTYPE(qn->target) == NT_CANY) { 00753 if (qn->greedy && infinite) { 00754 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) 00755 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; 00756 else 00757 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; 00758 } 00759 } 00760 00761 if (empty_info != 0) 00762 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00763 else 00764 mod_tlen = tlen; 00765 00766 if (infinite && qn->lower <= 1) { 00767 if (qn->greedy) { 00768 if (qn->lower == 1) 00769 len = SIZE_OP_JUMP; 00770 else 00771 len = 0; 00772 00773 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; 00774 } 00775 else { 00776 if (qn->lower == 0) 00777 len = SIZE_OP_JUMP; 00778 else 00779 len = 0; 00780 00781 len += mod_tlen + SIZE_OP_PUSH + cklen; 00782 } 00783 } 00784 else if (qn->upper == 0) { 00785 if (qn->is_refered != 0) /* /(?<n>..){0}/ */ 00786 len = SIZE_OP_JUMP + tlen; 00787 else 00788 len = 0; 00789 } 00790 else if (qn->upper == 1 && qn->greedy) { 00791 if (qn->lower == 0) { 00792 if (CKN_ON) { 00793 len = SIZE_OP_STATE_CHECK_PUSH + tlen; 00794 } 00795 else { 00796 len = SIZE_OP_PUSH + tlen; 00797 } 00798 } 00799 else { 00800 len = tlen; 00801 } 00802 } 00803 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 00804 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; 00805 } 00806 else { 00807 len = SIZE_OP_REPEAT_INC 00808 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 00809 if (CKN_ON) 00810 len += SIZE_OP_STATE_CHECK; 00811 } 00812 00813 return len; 00814 } 00815 00816 static int 00817 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 00818 { 00819 int r, mod_tlen; 00820 int ckn; 00821 int infinite = IS_REPEAT_INFINITE(qn->upper); 00822 int empty_info = qn->target_empty_info; 00823 int tlen = compile_length_tree(qn->target, reg); 00824 00825 if (tlen < 0) return tlen; 00826 00827 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 00828 00829 if (is_anychar_star_quantifier(qn)) { 00830 r = compile_tree_n_times(qn->target, qn->lower, reg); 00831 if (r) return r; 00832 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { 00833 if (IS_MULTILINE(reg->options)) 00834 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 00835 else 00836 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 00837 if (r) return r; 00838 if (CKN_ON) { 00839 r = add_state_check_num(reg, ckn); 00840 if (r) return r; 00841 } 00842 00843 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 00844 } 00845 else { 00846 if (IS_MULTILINE(reg->options)) { 00847 r = add_opcode(reg, (CKN_ON ? 00848 OP_STATE_CHECK_ANYCHAR_ML_STAR 00849 : OP_ANYCHAR_ML_STAR)); 00850 } 00851 else { 00852 r = add_opcode(reg, (CKN_ON ? 00853 OP_STATE_CHECK_ANYCHAR_STAR 00854 : OP_ANYCHAR_STAR)); 00855 } 00856 if (r) return r; 00857 if (CKN_ON) 00858 r = add_state_check_num(reg, ckn); 00859 00860 return r; 00861 } 00862 } 00863 00864 if (empty_info != 0) 00865 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00866 else 00867 mod_tlen = tlen; 00868 00869 if (infinite && qn->lower <= 1) { 00870 if (qn->greedy) { 00871 if (qn->lower == 1) { 00872 r = add_opcode_rel_addr(reg, OP_JUMP, 00873 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); 00874 if (r) return r; 00875 } 00876 00877 if (CKN_ON) { 00878 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00879 if (r) return r; 00880 r = add_state_check_num(reg, ckn); 00881 if (r) return r; 00882 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); 00883 } 00884 else { 00885 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 00886 } 00887 if (r) return r; 00888 r = compile_tree_empty_check(qn->target, reg, empty_info); 00889 if (r) return r; 00890 r = add_opcode_rel_addr(reg, OP_JUMP, 00891 -(mod_tlen + (int )SIZE_OP_JUMP 00892 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); 00893 } 00894 else { 00895 if (qn->lower == 0) { 00896 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 00897 if (r) return r; 00898 } 00899 r = compile_tree_empty_check(qn->target, reg, empty_info); 00900 if (r) return r; 00901 if (CKN_ON) { 00902 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); 00903 if (r) return r; 00904 r = add_state_check_num(reg, ckn); 00905 if (r) return r; 00906 r = add_rel_addr(reg, 00907 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); 00908 } 00909 else 00910 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 00911 } 00912 } 00913 else if (qn->upper == 0) { 00914 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ 00915 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 00916 if (r) return r; 00917 r = compile_tree(qn->target, reg); 00918 } 00919 else 00920 r = 0; 00921 } 00922 else if (qn->upper == 1 && qn->greedy) { 00923 if (qn->lower == 0) { 00924 if (CKN_ON) { 00925 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00926 if (r) return r; 00927 r = add_state_check_num(reg, ckn); 00928 if (r) return r; 00929 r = add_rel_addr(reg, tlen); 00930 } 00931 else { 00932 r = add_opcode_rel_addr(reg, OP_PUSH, tlen); 00933 } 00934 if (r) return r; 00935 } 00936 00937 r = compile_tree(qn->target, reg); 00938 } 00939 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 00940 if (CKN_ON) { 00941 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00942 if (r) return r; 00943 r = add_state_check_num(reg, ckn); 00944 if (r) return r; 00945 r = add_rel_addr(reg, SIZE_OP_JUMP); 00946 } 00947 else { 00948 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 00949 } 00950 00951 if (r) return r; 00952 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 00953 if (r) return r; 00954 r = compile_tree(qn->target, reg); 00955 } 00956 else { 00957 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 00958 if (CKN_ON) { 00959 if (r) return r; 00960 r = add_opcode(reg, OP_STATE_CHECK); 00961 if (r) return r; 00962 r = add_state_check_num(reg, ckn); 00963 } 00964 } 00965 return r; 00966 } 00967 00968 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 00969 00970 static int 00971 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 00972 { 00973 int len, mod_tlen; 00974 int infinite = IS_REPEAT_INFINITE(qn->upper); 00975 int empty_info = qn->target_empty_info; 00976 int tlen = compile_length_tree(qn->target, reg); 00977 00978 if (tlen < 0) return tlen; 00979 00980 /* anychar repeat */ 00981 if (NTYPE(qn->target) == NT_CANY) { 00982 if (qn->greedy && infinite) { 00983 if (IS_NOT_NULL(qn->next_head_exact)) 00984 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; 00985 else 00986 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; 00987 } 00988 } 00989 00990 if (empty_info != 0) 00991 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00992 else 00993 mod_tlen = tlen; 00994 00995 if (infinite && 00996 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 00997 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 00998 len = SIZE_OP_JUMP; 00999 } 01000 else { 01001 len = tlen * qn->lower; 01002 } 01003 01004 if (qn->greedy) { 01005 if (IS_NOT_NULL(qn->head_exact)) 01006 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; 01007 else if (IS_NOT_NULL(qn->next_head_exact)) 01008 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; 01009 else 01010 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; 01011 } 01012 else 01013 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; 01014 } 01015 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 01016 len = SIZE_OP_JUMP + tlen; 01017 } 01018 else if (!infinite && qn->greedy && 01019 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 01020 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01021 len = tlen * qn->lower; 01022 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); 01023 } 01024 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 01025 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; 01026 } 01027 else { 01028 len = SIZE_OP_REPEAT_INC 01029 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 01030 } 01031 01032 return len; 01033 } 01034 01035 static int 01036 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 01037 { 01038 int i, r, mod_tlen; 01039 int infinite = IS_REPEAT_INFINITE(qn->upper); 01040 int empty_info = qn->target_empty_info; 01041 int tlen = compile_length_tree(qn->target, reg); 01042 01043 if (tlen < 0) return tlen; 01044 01045 if (is_anychar_star_quantifier(qn)) { 01046 r = compile_tree_n_times(qn->target, qn->lower, reg); 01047 if (r) return r; 01048 if (IS_NOT_NULL(qn->next_head_exact)) { 01049 if (IS_MULTILINE(reg->options)) 01050 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 01051 else 01052 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 01053 if (r) return r; 01054 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 01055 } 01056 else { 01057 if (IS_MULTILINE(reg->options)) 01058 return add_opcode(reg, OP_ANYCHAR_ML_STAR); 01059 else 01060 return add_opcode(reg, OP_ANYCHAR_STAR); 01061 } 01062 } 01063 01064 if (empty_info != 0) 01065 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 01066 else 01067 mod_tlen = tlen; 01068 01069 if (infinite && 01070 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01071 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 01072 if (qn->greedy) { 01073 if (IS_NOT_NULL(qn->head_exact)) 01074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); 01075 else if (IS_NOT_NULL(qn->next_head_exact)) 01076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); 01077 else 01078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); 01079 } 01080 else { 01081 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); 01082 } 01083 if (r) return r; 01084 } 01085 else { 01086 r = compile_tree_n_times(qn->target, qn->lower, reg); 01087 if (r) return r; 01088 } 01089 01090 if (qn->greedy) { 01091 if (IS_NOT_NULL(qn->head_exact)) { 01092 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, 01093 mod_tlen + SIZE_OP_JUMP); 01094 if (r) return r; 01095 add_bytes(reg, NSTR(qn->head_exact)->s, 1); 01096 r = compile_tree_empty_check(qn->target, reg, empty_info); 01097 if (r) return r; 01098 r = add_opcode_rel_addr(reg, OP_JUMP, 01099 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); 01100 } 01101 else if (IS_NOT_NULL(qn->next_head_exact)) { 01102 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, 01103 mod_tlen + SIZE_OP_JUMP); 01104 if (r) return r; 01105 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 01106 r = compile_tree_empty_check(qn->target, reg, empty_info); 01107 if (r) return r; 01108 r = add_opcode_rel_addr(reg, OP_JUMP, 01109 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); 01110 } 01111 else { 01112 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 01113 if (r) return r; 01114 r = compile_tree_empty_check(qn->target, reg, empty_info); 01115 if (r) return r; 01116 r = add_opcode_rel_addr(reg, OP_JUMP, 01117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); 01118 } 01119 } 01120 else { 01121 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 01122 if (r) return r; 01123 r = compile_tree_empty_check(qn->target, reg, empty_info); 01124 if (r) return r; 01125 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 01126 } 01127 } 01128 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 01129 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 01130 if (r) return r; 01131 r = compile_tree(qn->target, reg); 01132 } 01133 else if (!infinite && qn->greedy && 01134 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 01135 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01136 int n = qn->upper - qn->lower; 01137 01138 r = compile_tree_n_times(qn->target, qn->lower, reg); 01139 if (r) return r; 01140 01141 for (i = 0; i < n; i++) { 01142 r = add_opcode_rel_addr(reg, OP_PUSH, 01143 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); 01144 if (r) return r; 01145 r = compile_tree(qn->target, reg); 01146 if (r) return r; 01147 } 01148 } 01149 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 01150 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 01151 if (r) return r; 01152 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 01153 if (r) return r; 01154 r = compile_tree(qn->target, reg); 01155 } 01156 else { 01157 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 01158 } 01159 return r; 01160 } 01161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 01162 01163 static int 01164 compile_length_option_node(EncloseNode* node, regex_t* reg) 01165 { 01166 int tlen; 01167 OnigOptionType prev = reg->options; 01168 01169 reg->options = node->option; 01170 tlen = compile_length_tree(node->target, reg); 01171 reg->options = prev; 01172 01173 if (tlen < 0) return tlen; 01174 01175 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01176 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL 01177 + tlen + SIZE_OP_SET_OPTION; 01178 } 01179 else 01180 return tlen; 01181 } 01182 01183 static int 01184 compile_option_node(EncloseNode* node, regex_t* reg) 01185 { 01186 int r; 01187 OnigOptionType prev = reg->options; 01188 01189 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01190 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); 01191 if (r) return r; 01192 r = add_opcode_option(reg, OP_SET_OPTION, prev); 01193 if (r) return r; 01194 r = add_opcode(reg, OP_FAIL); 01195 if (r) return r; 01196 } 01197 01198 reg->options = node->option; 01199 r = compile_tree(node->target, reg); 01200 reg->options = prev; 01201 01202 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01203 if (r) return r; 01204 r = add_opcode_option(reg, OP_SET_OPTION, prev); 01205 } 01206 return r; 01207 } 01208 01209 static int 01210 compile_length_enclose_node(EncloseNode* node, regex_t* reg) 01211 { 01212 int len; 01213 int tlen; 01214 01215 if (node->type == ENCLOSE_OPTION) 01216 return compile_length_option_node(node, reg); 01217 01218 if (node->target) { 01219 tlen = compile_length_tree(node->target, reg); 01220 if (tlen < 0) return tlen; 01221 } 01222 else 01223 tlen = 0; 01224 01225 switch (node->type) { 01226 case ENCLOSE_MEMORY: 01227 #ifdef USE_SUBEXP_CALL 01228 if (IS_ENCLOSE_CALLED(node)) { 01229 len = SIZE_OP_MEMORY_START_PUSH + tlen 01230 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; 01231 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01232 len += (IS_ENCLOSE_RECURSION(node) 01233 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 01234 else 01235 len += (IS_ENCLOSE_RECURSION(node) 01236 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 01237 } 01238 else 01239 #endif 01240 { 01241 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 01242 len = SIZE_OP_MEMORY_START_PUSH; 01243 else 01244 len = SIZE_OP_MEMORY_START; 01245 01246 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) 01247 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); 01248 } 01249 break; 01250 01251 case ENCLOSE_STOP_BACKTRACK: 01252 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 01253 QtfrNode* qn = NQTFR(node->target); 01254 tlen = compile_length_tree(qn->target, reg); 01255 if (tlen < 0) return tlen; 01256 01257 len = tlen * qn->lower 01258 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; 01259 } 01260 else { 01261 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; 01262 } 01263 break; 01264 01265 default: 01266 return ONIGERR_TYPE_BUG; 01267 break; 01268 } 01269 01270 return len; 01271 } 01272 01273 static int get_char_length_tree(Node* node, regex_t* reg, int* len); 01274 01275 static int 01276 compile_enclose_node(EncloseNode* node, regex_t* reg) 01277 { 01278 int r, len; 01279 01280 if (node->type == ENCLOSE_OPTION) 01281 return compile_option_node(node, reg); 01282 01283 switch (node->type) { 01284 case ENCLOSE_MEMORY: 01285 #ifdef USE_SUBEXP_CALL 01286 if (IS_ENCLOSE_CALLED(node)) { 01287 r = add_opcode(reg, OP_CALL); 01288 if (r) return r; 01289 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; 01290 node->state |= NST_ADDR_FIXED; 01291 r = add_abs_addr(reg, (int )node->call_addr); 01292 if (r) return r; 01293 len = compile_length_tree(node->target, reg); 01294 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); 01295 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01296 len += (IS_ENCLOSE_RECURSION(node) 01297 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 01298 else 01299 len += (IS_ENCLOSE_RECURSION(node) 01300 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 01301 01302 r = add_opcode_rel_addr(reg, OP_JUMP, len); 01303 if (r) return r; 01304 } 01305 #endif 01306 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 01307 r = add_opcode(reg, OP_MEMORY_START_PUSH); 01308 else 01309 r = add_opcode(reg, OP_MEMORY_START); 01310 if (r) return r; 01311 r = add_mem_num(reg, node->regnum); 01312 if (r) return r; 01313 r = compile_tree(node->target, reg); 01314 if (r) return r; 01315 #ifdef USE_SUBEXP_CALL 01316 if (IS_ENCLOSE_CALLED(node)) { 01317 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01318 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 01319 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); 01320 else 01321 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 01322 ? OP_MEMORY_END_REC : OP_MEMORY_END)); 01323 01324 if (r) return r; 01325 r = add_mem_num(reg, node->regnum); 01326 if (r) return r; 01327 r = add_opcode(reg, OP_RETURN); 01328 } 01329 else 01330 #endif 01331 { 01332 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01333 r = add_opcode(reg, OP_MEMORY_END_PUSH); 01334 else 01335 r = add_opcode(reg, OP_MEMORY_END); 01336 if (r) return r; 01337 r = add_mem_num(reg, node->regnum); 01338 } 01339 break; 01340 01341 case ENCLOSE_STOP_BACKTRACK: 01342 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 01343 QtfrNode* qn = NQTFR(node->target); 01344 r = compile_tree_n_times(qn->target, qn->lower, reg); 01345 if (r) return r; 01346 01347 len = compile_length_tree(qn->target, reg); 01348 if (len < 0) return len; 01349 01350 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); 01351 if (r) return r; 01352 r = compile_tree(qn->target, reg); 01353 if (r) return r; 01354 r = add_opcode(reg, OP_POP); 01355 if (r) return r; 01356 r = add_opcode_rel_addr(reg, OP_JUMP, 01357 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); 01358 } 01359 else { 01360 r = add_opcode(reg, OP_PUSH_STOP_BT); 01361 if (r) return r; 01362 r = compile_tree(node->target, reg); 01363 if (r) return r; 01364 r = add_opcode(reg, OP_POP_STOP_BT); 01365 } 01366 break; 01367 01368 default: 01369 return ONIGERR_TYPE_BUG; 01370 break; 01371 } 01372 01373 return r; 01374 } 01375 01376 static int 01377 compile_length_anchor_node(AnchorNode* node, regex_t* reg) 01378 { 01379 int len; 01380 int tlen = 0; 01381 01382 if (node->target) { 01383 tlen = compile_length_tree(node->target, reg); 01384 if (tlen < 0) return tlen; 01385 } 01386 01387 switch (node->type) { 01388 case ANCHOR_PREC_READ: 01389 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; 01390 break; 01391 case ANCHOR_PREC_READ_NOT: 01392 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; 01393 break; 01394 case ANCHOR_LOOK_BEHIND: 01395 len = SIZE_OP_LOOK_BEHIND + tlen; 01396 break; 01397 case ANCHOR_LOOK_BEHIND_NOT: 01398 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; 01399 break; 01400 01401 default: 01402 len = SIZE_OPCODE; 01403 break; 01404 } 01405 01406 return len; 01407 } 01408 01409 static int 01410 compile_anchor_node(AnchorNode* node, regex_t* reg) 01411 { 01412 int r, len; 01413 01414 switch (node->type) { 01415 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; 01416 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; 01417 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; 01418 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; 01419 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; 01420 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; 01421 01422 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; 01423 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; 01424 #ifdef USE_WORD_BEGIN_END 01425 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; 01426 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; 01427 #endif 01428 01429 case ANCHOR_PREC_READ: 01430 r = add_opcode(reg, OP_PUSH_POS); 01431 if (r) return r; 01432 r = compile_tree(node->target, reg); 01433 if (r) return r; 01434 r = add_opcode(reg, OP_POP_POS); 01435 break; 01436 01437 case ANCHOR_PREC_READ_NOT: 01438 len = compile_length_tree(node->target, reg); 01439 if (len < 0) return len; 01440 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); 01441 if (r) return r; 01442 r = compile_tree(node->target, reg); 01443 if (r) return r; 01444 r = add_opcode(reg, OP_FAIL_POS); 01445 break; 01446 01447 case ANCHOR_LOOK_BEHIND: 01448 { 01449 int n; 01450 r = add_opcode(reg, OP_LOOK_BEHIND); 01451 if (r) return r; 01452 if (node->char_len < 0) { 01453 r = get_char_length_tree(node->target, reg, &n); 01454 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 01455 } 01456 else 01457 n = node->char_len; 01458 r = add_length(reg, n); 01459 if (r) return r; 01460 r = compile_tree(node->target, reg); 01461 } 01462 break; 01463 01464 case ANCHOR_LOOK_BEHIND_NOT: 01465 { 01466 int n; 01467 len = compile_length_tree(node->target, reg); 01468 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, 01469 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); 01470 if (r) return r; 01471 if (node->char_len < 0) { 01472 r = get_char_length_tree(node->target, reg, &n); 01473 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 01474 } 01475 else 01476 n = node->char_len; 01477 r = add_length(reg, n); 01478 if (r) return r; 01479 r = compile_tree(node->target, reg); 01480 if (r) return r; 01481 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); 01482 } 01483 break; 01484 01485 default: 01486 return ONIGERR_TYPE_BUG; 01487 break; 01488 } 01489 01490 return r; 01491 } 01492 01493 static int 01494 compile_length_tree(Node* node, regex_t* reg) 01495 { 01496 int len, type, r; 01497 01498 type = NTYPE(node); 01499 switch (type) { 01500 case NT_LIST: 01501 len = 0; 01502 do { 01503 r = compile_length_tree(NCAR(node), reg); 01504 if (r < 0) return r; 01505 len += r; 01506 } while (IS_NOT_NULL(node = NCDR(node))); 01507 r = len; 01508 break; 01509 01510 case NT_ALT: 01511 { 01512 int n; 01513 01514 n = r = 0; 01515 do { 01516 r += compile_length_tree(NCAR(node), reg); 01517 n++; 01518 } while (IS_NOT_NULL(node = NCDR(node))); 01519 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); 01520 } 01521 break; 01522 01523 case NT_STR: 01524 if (NSTRING_IS_RAW(node)) 01525 r = compile_length_string_raw_node(NSTR(node), reg); 01526 else 01527 r = compile_length_string_node(node, reg); 01528 break; 01529 01530 case NT_CCLASS: 01531 r = compile_length_cclass_node(NCCLASS(node), reg); 01532 break; 01533 01534 case NT_CTYPE: 01535 case NT_CANY: 01536 r = SIZE_OPCODE; 01537 break; 01538 01539 case NT_BREF: 01540 { 01541 BRefNode* br = NBREF(node); 01542 01543 #ifdef USE_BACKREF_WITH_LEVEL 01544 if (IS_BACKREF_NEST_LEVEL(br)) { 01545 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + 01546 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 01547 } 01548 else 01549 #endif 01550 if (br->back_num == 1) { 01551 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) 01552 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); 01553 } 01554 else { 01555 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 01556 } 01557 } 01558 break; 01559 01560 #ifdef USE_SUBEXP_CALL 01561 case NT_CALL: 01562 r = SIZE_OP_CALL; 01563 break; 01564 #endif 01565 01566 case NT_QTFR: 01567 r = compile_length_quantifier_node(NQTFR(node), reg); 01568 break; 01569 01570 case NT_ENCLOSE: 01571 r = compile_length_enclose_node(NENCLOSE(node), reg); 01572 break; 01573 01574 case NT_ANCHOR: 01575 r = compile_length_anchor_node(NANCHOR(node), reg); 01576 break; 01577 01578 default: 01579 return ONIGERR_TYPE_BUG; 01580 break; 01581 } 01582 01583 return r; 01584 } 01585 01586 static int 01587 compile_tree(Node* node, regex_t* reg) 01588 { 01589 int n, type, len, pos, r = 0; 01590 01591 type = NTYPE(node); 01592 switch (type) { 01593 case NT_LIST: 01594 do { 01595 r = compile_tree(NCAR(node), reg); 01596 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01597 break; 01598 01599 case NT_ALT: 01600 { 01601 Node* x = node; 01602 len = 0; 01603 do { 01604 len += compile_length_tree(NCAR(x), reg); 01605 if (NCDR(x) != NULL) { 01606 len += SIZE_OP_PUSH + SIZE_OP_JUMP; 01607 } 01608 } while (IS_NOT_NULL(x = NCDR(x))); 01609 pos = reg->used + len; /* goal position */ 01610 01611 do { 01612 len = compile_length_tree(NCAR(node), reg); 01613 if (IS_NOT_NULL(NCDR(node))) { 01614 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); 01615 if (r) break; 01616 } 01617 r = compile_tree(NCAR(node), reg); 01618 if (r) break; 01619 if (IS_NOT_NULL(NCDR(node))) { 01620 len = pos - (reg->used + SIZE_OP_JUMP); 01621 r = add_opcode_rel_addr(reg, OP_JUMP, len); 01622 if (r) break; 01623 } 01624 } while (IS_NOT_NULL(node = NCDR(node))); 01625 } 01626 break; 01627 01628 case NT_STR: 01629 if (NSTRING_IS_RAW(node)) 01630 r = compile_string_raw_node(NSTR(node), reg); 01631 else 01632 r = compile_string_node(node, reg); 01633 break; 01634 01635 case NT_CCLASS: 01636 r = compile_cclass_node(NCCLASS(node), reg); 01637 break; 01638 01639 case NT_CTYPE: 01640 { 01641 int op; 01642 01643 switch (NCTYPE(node)->ctype) { 01644 case ONIGENC_CTYPE_WORD: 01645 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; 01646 else op = OP_WORD; 01647 break; 01648 default: 01649 return ONIGERR_TYPE_BUG; 01650 break; 01651 } 01652 r = add_opcode(reg, op); 01653 } 01654 break; 01655 01656 case NT_CANY: 01657 if (IS_MULTILINE(reg->options)) 01658 r = add_opcode(reg, OP_ANYCHAR_ML); 01659 else 01660 r = add_opcode(reg, OP_ANYCHAR); 01661 break; 01662 01663 case NT_BREF: 01664 { 01665 BRefNode* br = NBREF(node); 01666 01667 #ifdef USE_BACKREF_WITH_LEVEL 01668 if (IS_BACKREF_NEST_LEVEL(br)) { 01669 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); 01670 if (r) return r; 01671 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); 01672 if (r) return r; 01673 r = add_length(reg, br->nest_level); 01674 if (r) return r; 01675 01676 goto add_bacref_mems; 01677 } 01678 else 01679 #endif 01680 if (br->back_num == 1) { 01681 n = br->back_static[0]; 01682 if (IS_IGNORECASE(reg->options)) { 01683 r = add_opcode(reg, OP_BACKREFN_IC); 01684 if (r) return r; 01685 r = add_mem_num(reg, n); 01686 } 01687 else { 01688 switch (n) { 01689 case 1: r = add_opcode(reg, OP_BACKREF1); break; 01690 case 2: r = add_opcode(reg, OP_BACKREF2); break; 01691 default: 01692 r = add_opcode(reg, OP_BACKREFN); 01693 if (r) return r; 01694 r = add_mem_num(reg, n); 01695 break; 01696 } 01697 } 01698 } 01699 else { 01700 int i; 01701 int* p; 01702 01703 if (IS_IGNORECASE(reg->options)) { 01704 r = add_opcode(reg, OP_BACKREF_MULTI_IC); 01705 } 01706 else { 01707 r = add_opcode(reg, OP_BACKREF_MULTI); 01708 } 01709 if (r) return r; 01710 01711 #ifdef USE_BACKREF_WITH_LEVEL 01712 add_bacref_mems: 01713 #endif 01714 r = add_length(reg, br->back_num); 01715 if (r) return r; 01716 p = BACKREFS_P(br); 01717 for (i = br->back_num - 1; i >= 0; i--) { 01718 r = add_mem_num(reg, p[i]); 01719 if (r) return r; 01720 } 01721 } 01722 } 01723 break; 01724 01725 #ifdef USE_SUBEXP_CALL 01726 case NT_CALL: 01727 r = compile_call(NCALL(node), reg); 01728 break; 01729 #endif 01730 01731 case NT_QTFR: 01732 r = compile_quantifier_node(NQTFR(node), reg); 01733 break; 01734 01735 case NT_ENCLOSE: 01736 r = compile_enclose_node(NENCLOSE(node), reg); 01737 break; 01738 01739 case NT_ANCHOR: 01740 r = compile_anchor_node(NANCHOR(node), reg); 01741 break; 01742 01743 default: 01744 #ifdef ONIG_DEBUG 01745 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); 01746 #endif 01747 break; 01748 } 01749 01750 return r; 01751 } 01752 01753 #ifdef USE_NAMED_GROUP 01754 01755 static int 01756 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) 01757 { 01758 int r = 0; 01759 Node* node = *plink; 01760 01761 switch (NTYPE(node)) { 01762 case NT_LIST: 01763 case NT_ALT: 01764 do { 01765 r = noname_disable_map(&(NCAR(node)), map, counter); 01766 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01767 break; 01768 01769 case NT_QTFR: 01770 { 01771 Node** ptarget = &(NQTFR(node)->target); 01772 Node* old = *ptarget; 01773 r = noname_disable_map(ptarget, map, counter); 01774 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { 01775 onig_reduce_nested_quantifier(node, *ptarget); 01776 } 01777 } 01778 break; 01779 01780 case NT_ENCLOSE: 01781 { 01782 EncloseNode* en = NENCLOSE(node); 01783 if (en->type == ENCLOSE_MEMORY) { 01784 if (IS_ENCLOSE_NAMED_GROUP(en)) { 01785 (*counter)++; 01786 map[en->regnum].new_val = *counter; 01787 en->regnum = *counter; 01788 r = noname_disable_map(&(en->target), map, counter); 01789 } 01790 else { 01791 *plink = en->target; 01792 en->target = NULL_NODE; 01793 onig_node_free(node); 01794 r = noname_disable_map(plink, map, counter); 01795 } 01796 } 01797 else 01798 r = noname_disable_map(&(en->target), map, counter); 01799 } 01800 break; 01801 01802 case NT_ANCHOR: 01803 { 01804 AnchorNode* an = NANCHOR(node); 01805 switch (an->type) { 01806 case ANCHOR_PREC_READ: 01807 case ANCHOR_PREC_READ_NOT: 01808 case ANCHOR_LOOK_BEHIND: 01809 case ANCHOR_LOOK_BEHIND_NOT: 01810 r = noname_disable_map(&(an->target), map, counter); 01811 break; 01812 } 01813 } 01814 break; 01815 01816 default: 01817 break; 01818 } 01819 01820 return r; 01821 } 01822 01823 static int 01824 renumber_node_backref(Node* node, GroupNumRemap* map) 01825 { 01826 int i, pos, n, old_num; 01827 int *backs; 01828 BRefNode* bn = NBREF(node); 01829 01830 if (! IS_BACKREF_NAME_REF(bn)) 01831 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 01832 01833 old_num = bn->back_num; 01834 if (IS_NULL(bn->back_dynamic)) 01835 backs = bn->back_static; 01836 else 01837 backs = bn->back_dynamic; 01838 01839 for (i = 0, pos = 0; i < old_num; i++) { 01840 n = map[backs[i]].new_val; 01841 if (n > 0) { 01842 backs[pos] = n; 01843 pos++; 01844 } 01845 } 01846 01847 bn->back_num = pos; 01848 return 0; 01849 } 01850 01851 static int 01852 renumber_by_map(Node* node, GroupNumRemap* map) 01853 { 01854 int r = 0; 01855 01856 switch (NTYPE(node)) { 01857 case NT_LIST: 01858 case NT_ALT: 01859 do { 01860 r = renumber_by_map(NCAR(node), map); 01861 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01862 break; 01863 case NT_QTFR: 01864 r = renumber_by_map(NQTFR(node)->target, map); 01865 break; 01866 case NT_ENCLOSE: 01867 r = renumber_by_map(NENCLOSE(node)->target, map); 01868 break; 01869 01870 case NT_BREF: 01871 r = renumber_node_backref(node, map); 01872 break; 01873 01874 case NT_ANCHOR: 01875 { 01876 AnchorNode* an = NANCHOR(node); 01877 switch (an->type) { 01878 case ANCHOR_PREC_READ: 01879 case ANCHOR_PREC_READ_NOT: 01880 case ANCHOR_LOOK_BEHIND: 01881 case ANCHOR_LOOK_BEHIND_NOT: 01882 r = renumber_by_map(an->target, map); 01883 break; 01884 } 01885 } 01886 break; 01887 01888 default: 01889 break; 01890 } 01891 01892 return r; 01893 } 01894 01895 static int 01896 numbered_ref_check(Node* node) 01897 { 01898 int r = 0; 01899 01900 switch (NTYPE(node)) { 01901 case NT_LIST: 01902 case NT_ALT: 01903 do { 01904 r = numbered_ref_check(NCAR(node)); 01905 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01906 break; 01907 case NT_QTFR: 01908 r = numbered_ref_check(NQTFR(node)->target); 01909 break; 01910 case NT_ENCLOSE: 01911 r = numbered_ref_check(NENCLOSE(node)->target); 01912 break; 01913 01914 case NT_BREF: 01915 if (! IS_BACKREF_NAME_REF(NBREF(node))) 01916 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 01917 break; 01918 01919 default: 01920 break; 01921 } 01922 01923 return r; 01924 } 01925 01926 static int 01927 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) 01928 { 01929 int r, i, pos, counter; 01930 BitStatusType loc; 01931 GroupNumRemap* map; 01932 01933 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); 01934 CHECK_NULL_RETURN_MEMERR(map); 01935 for (i = 1; i <= env->num_mem; i++) { 01936 map[i].new_val = 0; 01937 } 01938 counter = 0; 01939 r = noname_disable_map(root, map, &counter); 01940 if (r != 0) return r; 01941 01942 r = renumber_by_map(*root, map); 01943 if (r != 0) return r; 01944 01945 for (i = 1, pos = 1; i <= env->num_mem; i++) { 01946 if (map[i].new_val > 0) { 01947 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; 01948 pos++; 01949 } 01950 } 01951 01952 loc = env->capture_history; 01953 BIT_STATUS_CLEAR(env->capture_history); 01954 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { 01955 if (BIT_STATUS_AT(loc, i)) { 01956 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); 01957 } 01958 } 01959 01960 env->num_mem = env->num_named; 01961 reg->num_mem = env->num_named; 01962 01963 return onig_renumber_name_table(reg, map); 01964 } 01965 #endif /* USE_NAMED_GROUP */ 01966 01967 #ifdef USE_SUBEXP_CALL 01968 static int 01969 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) 01970 { 01971 int i, offset; 01972 EncloseNode* en; 01973 AbsAddrType addr; 01974 01975 for (i = 0; i < uslist->num; i++) { 01976 en = NENCLOSE(uslist->us[i].target); 01977 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; 01978 addr = en->call_addr; 01979 offset = uslist->us[i].offset; 01980 01981 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); 01982 } 01983 return 0; 01984 } 01985 #endif 01986 01987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 01988 static int 01989 quantifiers_memory_node_info(Node* node) 01990 { 01991 int r = 0; 01992 01993 switch (NTYPE(node)) { 01994 case NT_LIST: 01995 case NT_ALT: 01996 { 01997 int v; 01998 do { 01999 v = quantifiers_memory_node_info(NCAR(node)); 02000 if (v > r) r = v; 02001 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); 02002 } 02003 break; 02004 02005 #ifdef USE_SUBEXP_CALL 02006 case NT_CALL: 02007 if (IS_CALL_RECURSION(NCALL(node))) { 02008 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ 02009 } 02010 else 02011 r = quantifiers_memory_node_info(NCALL(node)->target); 02012 break; 02013 #endif 02014 02015 case NT_QTFR: 02016 { 02017 QtfrNode* qn = NQTFR(node); 02018 if (qn->upper != 0) { 02019 r = quantifiers_memory_node_info(qn->target); 02020 } 02021 } 02022 break; 02023 02024 case NT_ENCLOSE: 02025 { 02026 EncloseNode* en = NENCLOSE(node); 02027 switch (en->type) { 02028 case ENCLOSE_MEMORY: 02029 return NQ_TARGET_IS_EMPTY_MEM; 02030 break; 02031 02032 case ENCLOSE_OPTION: 02033 case ENCLOSE_STOP_BACKTRACK: 02034 r = quantifiers_memory_node_info(en->target); 02035 break; 02036 default: 02037 break; 02038 } 02039 } 02040 break; 02041 02042 case NT_BREF: 02043 case NT_STR: 02044 case NT_CTYPE: 02045 case NT_CCLASS: 02046 case NT_CANY: 02047 case NT_ANCHOR: 02048 default: 02049 break; 02050 } 02051 02052 return r; 02053 } 02054 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ 02055 02056 static int 02057 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) 02058 { 02059 OnigDistance tmin; 02060 int r = 0; 02061 02062 *min = 0; 02063 switch (NTYPE(node)) { 02064 case NT_BREF: 02065 { 02066 int i; 02067 int* backs; 02068 Node** nodes = SCANENV_MEM_NODES(env); 02069 BRefNode* br = NBREF(node); 02070 if (br->state & NST_RECURSION) break; 02071 02072 backs = BACKREFS_P(br); 02073 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02074 r = get_min_match_length(nodes[backs[0]], min, env); 02075 if (r != 0) break; 02076 for (i = 1; i < br->back_num; i++) { 02077 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02078 r = get_min_match_length(nodes[backs[i]], &tmin, env); 02079 if (r != 0) break; 02080 if (*min > tmin) *min = tmin; 02081 } 02082 } 02083 break; 02084 02085 #ifdef USE_SUBEXP_CALL 02086 case NT_CALL: 02087 if (IS_CALL_RECURSION(NCALL(node))) { 02088 EncloseNode* en = NENCLOSE(NCALL(node)->target); 02089 if (IS_ENCLOSE_MIN_FIXED(en)) 02090 *min = en->min_len; 02091 } 02092 else 02093 r = get_min_match_length(NCALL(node)->target, min, env); 02094 break; 02095 #endif 02096 02097 case NT_LIST: 02098 do { 02099 r = get_min_match_length(NCAR(node), &tmin, env); 02100 if (r == 0) *min += tmin; 02101 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02102 break; 02103 02104 case NT_ALT: 02105 { 02106 Node *x, *y; 02107 y = node; 02108 do { 02109 x = NCAR(y); 02110 r = get_min_match_length(x, &tmin, env); 02111 if (r != 0) break; 02112 if (y == node) *min = tmin; 02113 else if (*min > tmin) *min = tmin; 02114 } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); 02115 } 02116 break; 02117 02118 case NT_STR: 02119 { 02120 StrNode* sn = NSTR(node); 02121 *min = sn->end - sn->s; 02122 } 02123 break; 02124 02125 case NT_CTYPE: 02126 *min = 1; 02127 break; 02128 02129 case NT_CCLASS: 02130 case NT_CANY: 02131 *min = 1; 02132 break; 02133 02134 case NT_QTFR: 02135 { 02136 QtfrNode* qn = NQTFR(node); 02137 02138 if (qn->lower > 0) { 02139 r = get_min_match_length(qn->target, min, env); 02140 if (r == 0) 02141 *min = distance_multiply(*min, qn->lower); 02142 } 02143 } 02144 break; 02145 02146 case NT_ENCLOSE: 02147 { 02148 EncloseNode* en = NENCLOSE(node); 02149 switch (en->type) { 02150 case ENCLOSE_MEMORY: 02151 #ifdef USE_SUBEXP_CALL 02152 if (IS_ENCLOSE_MIN_FIXED(en)) 02153 *min = en->min_len; 02154 else { 02155 r = get_min_match_length(en->target, min, env); 02156 if (r == 0) { 02157 en->min_len = *min; 02158 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); 02159 } 02160 } 02161 break; 02162 #endif 02163 case ENCLOSE_OPTION: 02164 case ENCLOSE_STOP_BACKTRACK: 02165 r = get_min_match_length(en->target, min, env); 02166 break; 02167 } 02168 } 02169 break; 02170 02171 case NT_ANCHOR: 02172 default: 02173 break; 02174 } 02175 02176 return r; 02177 } 02178 02179 static int 02180 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) 02181 { 02182 OnigDistance tmax; 02183 int r = 0; 02184 02185 *max = 0; 02186 switch (NTYPE(node)) { 02187 case NT_LIST: 02188 do { 02189 r = get_max_match_length(NCAR(node), &tmax, env); 02190 if (r == 0) 02191 *max = distance_add(*max, tmax); 02192 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02193 break; 02194 02195 case NT_ALT: 02196 do { 02197 r = get_max_match_length(NCAR(node), &tmax, env); 02198 if (r == 0 && *max < tmax) *max = tmax; 02199 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02200 break; 02201 02202 case NT_STR: 02203 { 02204 StrNode* sn = NSTR(node); 02205 *max = sn->end - sn->s; 02206 } 02207 break; 02208 02209 case NT_CTYPE: 02210 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 02211 break; 02212 02213 case NT_CCLASS: 02214 case NT_CANY: 02215 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 02216 break; 02217 02218 case NT_BREF: 02219 { 02220 int i; 02221 int* backs; 02222 Node** nodes = SCANENV_MEM_NODES(env); 02223 BRefNode* br = NBREF(node); 02224 if (br->state & NST_RECURSION) { 02225 *max = ONIG_INFINITE_DISTANCE; 02226 break; 02227 } 02228 backs = BACKREFS_P(br); 02229 for (i = 0; i < br->back_num; i++) { 02230 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02231 r = get_max_match_length(nodes[backs[i]], &tmax, env); 02232 if (r != 0) break; 02233 if (*max < tmax) *max = tmax; 02234 } 02235 } 02236 break; 02237 02238 #ifdef USE_SUBEXP_CALL 02239 case NT_CALL: 02240 if (! IS_CALL_RECURSION(NCALL(node))) 02241 r = get_max_match_length(NCALL(node)->target, max, env); 02242 else 02243 *max = ONIG_INFINITE_DISTANCE; 02244 break; 02245 #endif 02246 02247 case NT_QTFR: 02248 { 02249 QtfrNode* qn = NQTFR(node); 02250 02251 if (qn->upper != 0) { 02252 r = get_max_match_length(qn->target, max, env); 02253 if (r == 0 && *max != 0) { 02254 if (! IS_REPEAT_INFINITE(qn->upper)) 02255 *max = distance_multiply(*max, qn->upper); 02256 else 02257 *max = ONIG_INFINITE_DISTANCE; 02258 } 02259 } 02260 } 02261 break; 02262 02263 case NT_ENCLOSE: 02264 { 02265 EncloseNode* en = NENCLOSE(node); 02266 switch (en->type) { 02267 case ENCLOSE_MEMORY: 02268 #ifdef USE_SUBEXP_CALL 02269 if (IS_ENCLOSE_MAX_FIXED(en)) 02270 *max = en->max_len; 02271 else { 02272 r = get_max_match_length(en->target, max, env); 02273 if (r == 0) { 02274 en->max_len = *max; 02275 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); 02276 } 02277 } 02278 break; 02279 #endif 02280 case ENCLOSE_OPTION: 02281 case ENCLOSE_STOP_BACKTRACK: 02282 r = get_max_match_length(en->target, max, env); 02283 break; 02284 } 02285 } 02286 break; 02287 02288 case NT_ANCHOR: 02289 default: 02290 break; 02291 } 02292 02293 return r; 02294 } 02295 02296 #define GET_CHAR_LEN_VARLEN -1 02297 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2 02298 02299 /* fixed size pattern node only */ 02300 static int 02301 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) 02302 { 02303 int tlen; 02304 int r = 0; 02305 02306 level++; 02307 *len = 0; 02308 switch (NTYPE(node)) { 02309 case NT_LIST: 02310 do { 02311 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 02312 if (r == 0) 02313 *len = (int)distance_add(*len, tlen); 02314 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02315 break; 02316 02317 case NT_ALT: 02318 { 02319 int tlen2; 02320 int varlen = 0; 02321 02322 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 02323 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { 02324 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); 02325 if (r == 0) { 02326 if (tlen != tlen2) 02327 varlen = 1; 02328 } 02329 } 02330 if (r == 0) { 02331 if (varlen != 0) { 02332 if (level == 1) 02333 r = GET_CHAR_LEN_TOP_ALT_VARLEN; 02334 else 02335 r = GET_CHAR_LEN_VARLEN; 02336 } 02337 else 02338 *len = tlen; 02339 } 02340 } 02341 break; 02342 02343 case NT_STR: 02344 { 02345 StrNode* sn = NSTR(node); 02346 UChar *s = sn->s; 02347 while (s < sn->end) { 02348 s += enclen(reg->enc, s, sn->end); 02349 (*len)++; 02350 } 02351 } 02352 break; 02353 02354 case NT_QTFR: 02355 { 02356 QtfrNode* qn = NQTFR(node); 02357 if (qn->lower == qn->upper) { 02358 r = get_char_length_tree1(qn->target, reg, &tlen, level); 02359 if (r == 0) 02360 *len = (int)distance_multiply(tlen, qn->lower); 02361 } 02362 else 02363 r = GET_CHAR_LEN_VARLEN; 02364 } 02365 break; 02366 02367 #ifdef USE_SUBEXP_CALL 02368 case NT_CALL: 02369 if (! IS_CALL_RECURSION(NCALL(node))) 02370 r = get_char_length_tree1(NCALL(node)->target, reg, len, level); 02371 else 02372 r = GET_CHAR_LEN_VARLEN; 02373 break; 02374 #endif 02375 02376 case NT_CTYPE: 02377 *len = 1; 02378 break; 02379 02380 case NT_CCLASS: 02381 case NT_CANY: 02382 *len = 1; 02383 break; 02384 02385 case NT_ENCLOSE: 02386 { 02387 EncloseNode* en = NENCLOSE(node); 02388 switch (en->type) { 02389 case ENCLOSE_MEMORY: 02390 #ifdef USE_SUBEXP_CALL 02391 if (IS_ENCLOSE_CLEN_FIXED(en)) 02392 *len = en->char_len; 02393 else { 02394 r = get_char_length_tree1(en->target, reg, len, level); 02395 if (r == 0) { 02396 en->char_len = *len; 02397 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); 02398 } 02399 } 02400 break; 02401 #endif 02402 case ENCLOSE_OPTION: 02403 case ENCLOSE_STOP_BACKTRACK: 02404 r = get_char_length_tree1(en->target, reg, len, level); 02405 break; 02406 default: 02407 break; 02408 } 02409 } 02410 break; 02411 02412 case NT_ANCHOR: 02413 break; 02414 02415 default: 02416 r = GET_CHAR_LEN_VARLEN; 02417 break; 02418 } 02419 02420 return r; 02421 } 02422 02423 static int 02424 get_char_length_tree(Node* node, regex_t* reg, int* len) 02425 { 02426 return get_char_length_tree1(node, reg, len, 0); 02427 } 02428 02429 /* x is not included y ==> 1 : 0 */ 02430 static int 02431 is_not_included(Node* x, Node* y, regex_t* reg) 02432 { 02433 int i; 02434 OnigDistance len; 02435 OnigCodePoint code; 02436 UChar *p, c; 02437 int ytype; 02438 02439 retry: 02440 ytype = NTYPE(y); 02441 switch (NTYPE(x)) { 02442 case NT_CTYPE: 02443 { 02444 switch (ytype) { 02445 case NT_CTYPE: 02446 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && 02447 NCTYPE(y)->not != NCTYPE(x)->not) 02448 return 1; 02449 else 02450 return 0; 02451 break; 02452 02453 case NT_CCLASS: 02454 swap: 02455 { 02456 Node* tmp; 02457 tmp = x; x = y; y = tmp; 02458 goto retry; 02459 } 02460 break; 02461 02462 case NT_STR: 02463 goto swap; 02464 break; 02465 02466 default: 02467 break; 02468 } 02469 } 02470 break; 02471 02472 case NT_CCLASS: 02473 { 02474 CClassNode* xc = NCCLASS(x); 02475 switch (ytype) { 02476 case NT_CTYPE: 02477 switch (NCTYPE(y)->ctype) { 02478 case ONIGENC_CTYPE_WORD: 02479 if (NCTYPE(y)->not == 0) { 02480 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { 02481 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02482 if (BITSET_AT(xc->bs, i)) { 02483 if (IS_CODE_SB_WORD(reg->enc, i)) return 0; 02484 } 02485 } 02486 return 1; 02487 } 02488 return 0; 02489 } 02490 else { 02491 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02492 if (! IS_CODE_SB_WORD(reg->enc, i)) { 02493 if (!IS_NCCLASS_NOT(xc)) { 02494 if (BITSET_AT(xc->bs, i)) 02495 return 0; 02496 } 02497 else { 02498 if (! BITSET_AT(xc->bs, i)) 02499 return 0; 02500 } 02501 } 02502 } 02503 return 1; 02504 } 02505 break; 02506 02507 default: 02508 break; 02509 } 02510 break; 02511 02512 case NT_CCLASS: 02513 { 02514 int v; 02515 CClassNode* yc = NCCLASS(y); 02516 02517 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02518 v = BITSET_AT(xc->bs, i); 02519 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || 02520 (v == 0 && IS_NCCLASS_NOT(xc))) { 02521 v = BITSET_AT(yc->bs, i); 02522 if ((v != 0 && !IS_NCCLASS_NOT(yc)) || 02523 (v == 0 && IS_NCCLASS_NOT(yc))) 02524 return 0; 02525 } 02526 } 02527 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || 02528 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) 02529 return 1; 02530 return 0; 02531 } 02532 break; 02533 02534 case NT_STR: 02535 goto swap; 02536 break; 02537 02538 default: 02539 break; 02540 } 02541 } 02542 break; 02543 02544 case NT_STR: 02545 { 02546 StrNode* xs = NSTR(x); 02547 if (NSTRING_LEN(x) == 0) 02548 break; 02549 02550 c = *(xs->s); 02551 switch (ytype) { 02552 case NT_CTYPE: 02553 switch (NCTYPE(y)->ctype) { 02554 case ONIGENC_CTYPE_WORD: 02555 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) 02556 return NCTYPE(y)->not; 02557 else 02558 return !(NCTYPE(y)->not); 02559 break; 02560 default: 02561 break; 02562 } 02563 break; 02564 02565 case NT_CCLASS: 02566 { 02567 CClassNode* cc = NCCLASS(y); 02568 02569 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, 02570 xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); 02571 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); 02572 } 02573 break; 02574 02575 case NT_STR: 02576 { 02577 UChar *q; 02578 StrNode* ys = NSTR(y); 02579 len = NSTRING_LEN(x); 02580 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); 02581 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { 02582 /* tiny version */ 02583 return 0; 02584 } 02585 else { 02586 for (i = 0, p = ys->s, q = xs->s; (OnigDistance)i < len; i++, p++, q++) { 02587 if (*p != *q) return 1; 02588 } 02589 } 02590 } 02591 break; 02592 02593 default: 02594 break; 02595 } 02596 } 02597 break; 02598 02599 default: 02600 break; 02601 } 02602 02603 return 0; 02604 } 02605 02606 static Node* 02607 get_head_value_node(Node* node, int exact, regex_t* reg) 02608 { 02609 Node* n = NULL_NODE; 02610 02611 switch (NTYPE(node)) { 02612 case NT_BREF: 02613 case NT_ALT: 02614 case NT_CANY: 02615 #ifdef USE_SUBEXP_CALL 02616 case NT_CALL: 02617 #endif 02618 break; 02619 02620 case NT_CTYPE: 02621 case NT_CCLASS: 02622 if (exact == 0) { 02623 n = node; 02624 } 02625 break; 02626 02627 case NT_LIST: 02628 n = get_head_value_node(NCAR(node), exact, reg); 02629 break; 02630 02631 case NT_STR: 02632 { 02633 StrNode* sn = NSTR(node); 02634 02635 if (sn->end <= sn->s) 02636 break; 02637 02638 if (exact != 0 && 02639 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { 02640 } 02641 else { 02642 n = node; 02643 } 02644 } 02645 break; 02646 02647 case NT_QTFR: 02648 { 02649 QtfrNode* qn = NQTFR(node); 02650 if (qn->lower > 0) { 02651 if (IS_NOT_NULL(qn->head_exact)) 02652 n = qn->head_exact; 02653 else 02654 n = get_head_value_node(qn->target, exact, reg); 02655 } 02656 } 02657 break; 02658 02659 case NT_ENCLOSE: 02660 { 02661 EncloseNode* en = NENCLOSE(node); 02662 switch (en->type) { 02663 case ENCLOSE_OPTION: 02664 { 02665 OnigOptionType options = reg->options; 02666 02667 reg->options = NENCLOSE(node)->option; 02668 n = get_head_value_node(NENCLOSE(node)->target, exact, reg); 02669 reg->options = options; 02670 } 02671 break; 02672 02673 case ENCLOSE_MEMORY: 02674 case ENCLOSE_STOP_BACKTRACK: 02675 n = get_head_value_node(en->target, exact, reg); 02676 break; 02677 } 02678 } 02679 break; 02680 02681 case NT_ANCHOR: 02682 if (NANCHOR(node)->type == ANCHOR_PREC_READ) 02683 n = get_head_value_node(NANCHOR(node)->target, exact, reg); 02684 break; 02685 02686 default: 02687 break; 02688 } 02689 02690 return n; 02691 } 02692 02693 static int 02694 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) 02695 { 02696 int type, r = 0; 02697 02698 type = NTYPE(node); 02699 if ((NTYPE2BIT(type) & type_mask) == 0) 02700 return 1; 02701 02702 switch (type) { 02703 case NT_LIST: 02704 case NT_ALT: 02705 do { 02706 r = check_type_tree(NCAR(node), type_mask, enclose_mask, 02707 anchor_mask); 02708 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02709 break; 02710 02711 case NT_QTFR: 02712 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, 02713 anchor_mask); 02714 break; 02715 02716 case NT_ENCLOSE: 02717 { 02718 EncloseNode* en = NENCLOSE(node); 02719 if ((en->type & enclose_mask) == 0) 02720 return 1; 02721 02722 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); 02723 } 02724 break; 02725 02726 case NT_ANCHOR: 02727 type = NANCHOR(node)->type; 02728 if ((type & anchor_mask) == 0) 02729 return 1; 02730 02731 if (NANCHOR(node)->target) 02732 r = check_type_tree(NANCHOR(node)->target, 02733 type_mask, enclose_mask, anchor_mask); 02734 break; 02735 02736 default: 02737 break; 02738 } 02739 return r; 02740 } 02741 02742 #ifdef USE_SUBEXP_CALL 02743 02744 #define RECURSION_EXIST 1 02745 #define RECURSION_INFINITE 2 02746 02747 static int 02748 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) 02749 { 02750 int type; 02751 int r = 0; 02752 02753 type = NTYPE(node); 02754 switch (type) { 02755 case NT_LIST: 02756 { 02757 Node *x; 02758 OnigDistance min; 02759 int ret; 02760 02761 x = node; 02762 do { 02763 ret = subexp_inf_recursive_check(NCAR(x), env, head); 02764 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 02765 r |= ret; 02766 if (head) { 02767 ret = get_min_match_length(NCAR(x), &min, env); 02768 if (ret != 0) return ret; 02769 if (min != 0) head = 0; 02770 } 02771 } while (IS_NOT_NULL(x = NCDR(x))); 02772 } 02773 break; 02774 02775 case NT_ALT: 02776 { 02777 int ret; 02778 r = RECURSION_EXIST; 02779 do { 02780 ret = subexp_inf_recursive_check(NCAR(node), env, head); 02781 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 02782 r &= ret; 02783 } while (IS_NOT_NULL(node = NCDR(node))); 02784 } 02785 break; 02786 02787 case NT_QTFR: 02788 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); 02789 if (r == RECURSION_EXIST) { 02790 if (NQTFR(node)->lower == 0) r = 0; 02791 } 02792 break; 02793 02794 case NT_ANCHOR: 02795 { 02796 AnchorNode* an = NANCHOR(node); 02797 switch (an->type) { 02798 case ANCHOR_PREC_READ: 02799 case ANCHOR_PREC_READ_NOT: 02800 case ANCHOR_LOOK_BEHIND: 02801 case ANCHOR_LOOK_BEHIND_NOT: 02802 r = subexp_inf_recursive_check(an->target, env, head); 02803 break; 02804 } 02805 } 02806 break; 02807 02808 case NT_CALL: 02809 r = subexp_inf_recursive_check(NCALL(node)->target, env, head); 02810 break; 02811 02812 case NT_ENCLOSE: 02813 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 02814 return 0; 02815 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 02816 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); 02817 else { 02818 SET_ENCLOSE_STATUS(node, NST_MARK2); 02819 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); 02820 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 02821 } 02822 break; 02823 02824 default: 02825 break; 02826 } 02827 02828 return r; 02829 } 02830 02831 static int 02832 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) 02833 { 02834 int type; 02835 int r = 0; 02836 02837 type = NTYPE(node); 02838 switch (type) { 02839 case NT_LIST: 02840 case NT_ALT: 02841 do { 02842 r = subexp_inf_recursive_check_trav(NCAR(node), env); 02843 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02844 break; 02845 02846 case NT_QTFR: 02847 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); 02848 break; 02849 02850 case NT_ANCHOR: 02851 { 02852 AnchorNode* an = NANCHOR(node); 02853 switch (an->type) { 02854 case ANCHOR_PREC_READ: 02855 case ANCHOR_PREC_READ_NOT: 02856 case ANCHOR_LOOK_BEHIND: 02857 case ANCHOR_LOOK_BEHIND_NOT: 02858 r = subexp_inf_recursive_check_trav(an->target, env); 02859 break; 02860 } 02861 } 02862 break; 02863 02864 case NT_ENCLOSE: 02865 { 02866 EncloseNode* en = NENCLOSE(node); 02867 02868 if (IS_ENCLOSE_RECURSION(en)) { 02869 SET_ENCLOSE_STATUS(node, NST_MARK1); 02870 r = subexp_inf_recursive_check(en->target, env, 1); 02871 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; 02872 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 02873 } 02874 r = subexp_inf_recursive_check_trav(en->target, env); 02875 } 02876 02877 break; 02878 02879 default: 02880 break; 02881 } 02882 02883 return r; 02884 } 02885 02886 static int 02887 subexp_recursive_check(Node* node) 02888 { 02889 int r = 0; 02890 02891 switch (NTYPE(node)) { 02892 case NT_LIST: 02893 case NT_ALT: 02894 do { 02895 r |= subexp_recursive_check(NCAR(node)); 02896 } while (IS_NOT_NULL(node = NCDR(node))); 02897 break; 02898 02899 case NT_QTFR: 02900 r = subexp_recursive_check(NQTFR(node)->target); 02901 break; 02902 02903 case NT_ANCHOR: 02904 { 02905 AnchorNode* an = NANCHOR(node); 02906 switch (an->type) { 02907 case ANCHOR_PREC_READ: 02908 case ANCHOR_PREC_READ_NOT: 02909 case ANCHOR_LOOK_BEHIND: 02910 case ANCHOR_LOOK_BEHIND_NOT: 02911 r = subexp_recursive_check(an->target); 02912 break; 02913 } 02914 } 02915 break; 02916 02917 case NT_CALL: 02918 r = subexp_recursive_check(NCALL(node)->target); 02919 if (r != 0) SET_CALL_RECURSION(node); 02920 break; 02921 02922 case NT_ENCLOSE: 02923 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 02924 return 0; 02925 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 02926 return 1; /* recursion */ 02927 else { 02928 SET_ENCLOSE_STATUS(node, NST_MARK2); 02929 r = subexp_recursive_check(NENCLOSE(node)->target); 02930 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 02931 } 02932 break; 02933 02934 default: 02935 break; 02936 } 02937 02938 return r; 02939 } 02940 02941 02942 static int 02943 subexp_recursive_check_trav(Node* node, ScanEnv* env) 02944 { 02945 #define FOUND_CALLED_NODE 1 02946 02947 int type; 02948 int r = 0; 02949 02950 type = NTYPE(node); 02951 switch (type) { 02952 case NT_LIST: 02953 case NT_ALT: 02954 { 02955 int ret; 02956 do { 02957 ret = subexp_recursive_check_trav(NCAR(node), env); 02958 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; 02959 else if (ret < 0) return ret; 02960 } while (IS_NOT_NULL(node = NCDR(node))); 02961 } 02962 break; 02963 02964 case NT_QTFR: 02965 r = subexp_recursive_check_trav(NQTFR(node)->target, env); 02966 if (NQTFR(node)->upper == 0) { 02967 if (r == FOUND_CALLED_NODE) 02968 NQTFR(node)->is_refered = 1; 02969 } 02970 break; 02971 02972 case NT_ANCHOR: 02973 { 02974 AnchorNode* an = NANCHOR(node); 02975 switch (an->type) { 02976 case ANCHOR_PREC_READ: 02977 case ANCHOR_PREC_READ_NOT: 02978 case ANCHOR_LOOK_BEHIND: 02979 case ANCHOR_LOOK_BEHIND_NOT: 02980 r = subexp_recursive_check_trav(an->target, env); 02981 break; 02982 } 02983 } 02984 break; 02985 02986 case NT_ENCLOSE: 02987 { 02988 EncloseNode* en = NENCLOSE(node); 02989 02990 if (! IS_ENCLOSE_RECURSION(en)) { 02991 if (IS_ENCLOSE_CALLED(en)) { 02992 SET_ENCLOSE_STATUS(node, NST_MARK1); 02993 r = subexp_recursive_check(en->target); 02994 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); 02995 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 02996 } 02997 } 02998 r = subexp_recursive_check_trav(en->target, env); 02999 if (IS_ENCLOSE_CALLED(en)) 03000 r |= FOUND_CALLED_NODE; 03001 } 03002 break; 03003 03004 default: 03005 break; 03006 } 03007 03008 return r; 03009 } 03010 03011 static int 03012 setup_subexp_call(Node* node, ScanEnv* env) 03013 { 03014 int type; 03015 int r = 0; 03016 03017 type = NTYPE(node); 03018 switch (type) { 03019 case NT_LIST: 03020 do { 03021 r = setup_subexp_call(NCAR(node), env); 03022 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03023 break; 03024 03025 case NT_ALT: 03026 do { 03027 r = setup_subexp_call(NCAR(node), env); 03028 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03029 break; 03030 03031 case NT_QTFR: 03032 r = setup_subexp_call(NQTFR(node)->target, env); 03033 break; 03034 case NT_ENCLOSE: 03035 r = setup_subexp_call(NENCLOSE(node)->target, env); 03036 break; 03037 03038 case NT_CALL: 03039 { 03040 CallNode* cn = NCALL(node); 03041 Node** nodes = SCANENV_MEM_NODES(env); 03042 03043 if (cn->group_num != 0) { 03044 int gnum = cn->group_num; 03045 03046 #ifdef USE_NAMED_GROUP 03047 if (env->num_named > 0 && 03048 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 03049 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { 03050 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 03051 } 03052 #endif 03053 if (gnum > env->num_mem) { 03054 onig_scan_env_set_error_string(env, 03055 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); 03056 return ONIGERR_UNDEFINED_GROUP_REFERENCE; 03057 } 03058 03059 #ifdef USE_NAMED_GROUP 03060 set_call_attr: 03061 #endif 03062 cn->target = nodes[cn->group_num]; 03063 if (IS_NULL(cn->target)) { 03064 onig_scan_env_set_error_string(env, 03065 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 03066 return ONIGERR_UNDEFINED_NAME_REFERENCE; 03067 } 03068 SET_ENCLOSE_STATUS(cn->target, NST_CALLED); 03069 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); 03070 cn->unset_addr_list = env->unset_addr_list; 03071 } 03072 #ifdef USE_NAMED_GROUP 03073 else { 03074 int *refs; 03075 03076 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, 03077 &refs); 03078 if (n <= 0) { 03079 onig_scan_env_set_error_string(env, 03080 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 03081 return ONIGERR_UNDEFINED_NAME_REFERENCE; 03082 } 03083 else if (n > 1) { 03084 onig_scan_env_set_error_string(env, 03085 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); 03086 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; 03087 } 03088 else { 03089 cn->group_num = refs[0]; 03090 goto set_call_attr; 03091 } 03092 } 03093 #endif 03094 } 03095 break; 03096 03097 case NT_ANCHOR: 03098 { 03099 AnchorNode* an = NANCHOR(node); 03100 03101 switch (an->type) { 03102 case ANCHOR_PREC_READ: 03103 case ANCHOR_PREC_READ_NOT: 03104 case ANCHOR_LOOK_BEHIND: 03105 case ANCHOR_LOOK_BEHIND_NOT: 03106 r = setup_subexp_call(an->target, env); 03107 break; 03108 } 03109 } 03110 break; 03111 03112 default: 03113 break; 03114 } 03115 03116 return r; 03117 } 03118 #endif 03119 03120 /* divide different length alternatives in look-behind. 03121 (?<=A|B) ==> (?<=A)|(?<=B) 03122 (?<!A|B) ==> (?<!A)(?<!B) 03123 */ 03124 static int 03125 divide_look_behind_alternatives(Node* node) 03126 { 03127 Node *head, *np, *insert_node; 03128 AnchorNode* an = NANCHOR(node); 03129 int anc_type = an->type; 03130 03131 head = an->target; 03132 np = NCAR(head); 03133 swap_node(node, head); 03134 NCAR(node) = head; 03135 NANCHOR(head)->target = np; 03136 03137 np = node; 03138 while ((np = NCDR(np)) != NULL_NODE) { 03139 insert_node = onig_node_new_anchor(anc_type); 03140 CHECK_NULL_RETURN_MEMERR(insert_node); 03141 NANCHOR(insert_node)->target = NCAR(np); 03142 NCAR(np) = insert_node; 03143 } 03144 03145 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { 03146 np = node; 03147 do { 03148 SET_NTYPE(np, NT_LIST); /* alt -> list */ 03149 } while ((np = NCDR(np)) != NULL_NODE); 03150 } 03151 return 0; 03152 } 03153 03154 static int 03155 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) 03156 { 03157 int r, len; 03158 AnchorNode* an = NANCHOR(node); 03159 03160 r = get_char_length_tree(an->target, reg, &len); 03161 if (r == 0) 03162 an->char_len = len; 03163 else if (r == GET_CHAR_LEN_VARLEN) 03164 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03165 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { 03166 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) 03167 r = divide_look_behind_alternatives(node); 03168 else 03169 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03170 } 03171 03172 return r; 03173 } 03174 03175 static int 03176 next_setup(Node* node, Node* next_node, regex_t* reg) 03177 { 03178 int type; 03179 03180 retry: 03181 type = NTYPE(node); 03182 if (type == NT_QTFR) { 03183 QtfrNode* qn = NQTFR(node); 03184 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { 03185 #ifdef USE_QTFR_PEEK_NEXT 03186 Node* n = get_head_value_node(next_node, 1, reg); 03187 /* '\0': for UTF-16BE etc... */ 03188 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { 03189 qn->next_head_exact = n; 03190 } 03191 #endif 03192 /* automatic posseivation a*b ==> (?>a*)b */ 03193 if (qn->lower <= 1) { 03194 int ttype = NTYPE(qn->target); 03195 if (IS_NODE_TYPE_SIMPLE(ttype)) { 03196 Node *x, *y; 03197 x = get_head_value_node(qn->target, 0, reg); 03198 if (IS_NOT_NULL(x)) { 03199 y = get_head_value_node(next_node, 0, reg); 03200 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { 03201 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); 03202 CHECK_NULL_RETURN_MEMERR(en); 03203 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); 03204 swap_node(node, en); 03205 NENCLOSE(node)->target = en; 03206 } 03207 } 03208 } 03209 } 03210 } 03211 } 03212 else if (type == NT_ENCLOSE) { 03213 EncloseNode* en = NENCLOSE(node); 03214 if (en->type == ENCLOSE_MEMORY) { 03215 node = en->target; 03216 goto retry; 03217 } 03218 } 03219 return 0; 03220 } 03221 03222 03223 static int 03224 update_string_node_case_fold(regex_t* reg, Node *node) 03225 { 03226 UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 03227 UChar *sbuf, *ebuf, *sp; 03228 int r, i, len; 03229 OnigDistance sbuf_size; 03230 StrNode* sn = NSTR(node); 03231 03232 end = sn->end; 03233 sbuf_size = (end - sn->s) * 2; 03234 sbuf = (UChar* )xmalloc(sbuf_size); 03235 CHECK_NULL_RETURN_MEMERR(sbuf); 03236 ebuf = sbuf + sbuf_size; 03237 03238 sp = sbuf; 03239 p = sn->s; 03240 while (p < end) { 03241 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); 03242 q = buf; 03243 for (i = 0; i < len; i++) { 03244 if (sp >= ebuf) { 03245 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); 03246 CHECK_NULL_RETURN_MEMERR(sbuf); 03247 sp = sbuf + sbuf_size; 03248 sbuf_size *= 2; 03249 ebuf = sbuf + sbuf_size; 03250 } 03251 03252 *sp++ = buf[i]; 03253 } 03254 } 03255 03256 r = onig_node_str_set(node, sbuf, sp); 03257 if (r != 0) { 03258 xfree(sbuf); 03259 return r; 03260 } 03261 03262 xfree(sbuf); 03263 return 0; 03264 } 03265 03266 static int 03267 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, 03268 regex_t* reg) 03269 { 03270 int r; 03271 Node *node; 03272 03273 node = onig_node_new_str(s, end); 03274 if (IS_NULL(node)) return ONIGERR_MEMORY; 03275 03276 r = update_string_node_case_fold(reg, node); 03277 if (r != 0) { 03278 onig_node_free(node); 03279 return r; 03280 } 03281 03282 NSTRING_SET_AMBIG(node); 03283 NSTRING_SET_DONT_GET_OPT_INFO(node); 03284 *rnode = node; 03285 return 0; 03286 } 03287 03288 static int 03289 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], 03290 UChar *p, int slen, UChar *end, 03291 regex_t* reg, Node **rnode) 03292 { 03293 int r, i, j, len, varlen; 03294 Node *anode, *var_anode, *snode, *xnode, *an; 03295 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 03296 03297 *rnode = var_anode = NULL_NODE; 03298 03299 varlen = 0; 03300 for (i = 0; i < item_num; i++) { 03301 if (items[i].byte_len != slen) { 03302 varlen = 1; 03303 break; 03304 } 03305 } 03306 03307 if (varlen != 0) { 03308 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03309 if (IS_NULL(var_anode)) return ONIGERR_MEMORY; 03310 03311 xnode = onig_node_new_list(NULL, NULL); 03312 if (IS_NULL(xnode)) goto mem_err; 03313 NCAR(var_anode) = xnode; 03314 03315 anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03316 if (IS_NULL(anode)) goto mem_err; 03317 NCAR(xnode) = anode; 03318 } 03319 else { 03320 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03321 if (IS_NULL(anode)) return ONIGERR_MEMORY; 03322 } 03323 03324 snode = onig_node_new_str(p, p + slen); 03325 if (IS_NULL(snode)) goto mem_err; 03326 03327 NCAR(anode) = snode; 03328 03329 for (i = 0; i < item_num; i++) { 03330 snode = onig_node_new_str(NULL, NULL); 03331 if (IS_NULL(snode)) goto mem_err; 03332 03333 for (j = 0; j < items[i].code_len; j++) { 03334 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); 03335 if (len < 0) { 03336 r = len; 03337 goto mem_err2; 03338 } 03339 03340 r = onig_node_str_cat(snode, buf, buf + len); 03341 if (r != 0) goto mem_err2; 03342 } 03343 03344 an = onig_node_new_alt(NULL_NODE, NULL_NODE); 03345 if (IS_NULL(an)) { 03346 goto mem_err2; 03347 } 03348 03349 if (items[i].byte_len != slen) { 03350 Node *rem; 03351 UChar *q = p + items[i].byte_len; 03352 03353 if (q < end) { 03354 r = expand_case_fold_make_rem_string(&rem, q, end, reg); 03355 if (r != 0) { 03356 onig_node_free(an); 03357 goto mem_err2; 03358 } 03359 03360 xnode = onig_node_list_add(NULL_NODE, snode); 03361 if (IS_NULL(xnode)) { 03362 onig_node_free(an); 03363 onig_node_free(rem); 03364 goto mem_err2; 03365 } 03366 if (IS_NULL(onig_node_list_add(xnode, rem))) { 03367 onig_node_free(an); 03368 onig_node_free(xnode); 03369 onig_node_free(rem); 03370 goto mem_err; 03371 } 03372 03373 NCAR(an) = xnode; 03374 } 03375 else { 03376 NCAR(an) = snode; 03377 } 03378 03379 NCDR(var_anode) = an; 03380 var_anode = an; 03381 } 03382 else { 03383 NCAR(an) = snode; 03384 NCDR(anode) = an; 03385 anode = an; 03386 } 03387 } 03388 03389 return varlen; 03390 03391 mem_err2: 03392 onig_node_free(snode); 03393 03394 mem_err: 03395 onig_node_free(*rnode); 03396 03397 return ONIGERR_MEMORY; 03398 } 03399 03400 static int 03401 expand_case_fold_string(Node* node, regex_t* reg) 03402 { 03403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 03404 03405 int r, n, len, alt_num; 03406 UChar *start, *end, *p; 03407 Node *top_root, *root, *snode, *prev_node; 03408 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 03409 StrNode* sn = NSTR(node); 03410 03411 if (NSTRING_IS_AMBIG(node)) return 0; 03412 03413 start = sn->s; 03414 end = sn->end; 03415 if (start >= end) return 0; 03416 03417 r = 0; 03418 top_root = root = prev_node = snode = NULL_NODE; 03419 alt_num = 1; 03420 p = start; 03421 while (p < end) { 03422 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, 03423 p, end, items); 03424 if (n < 0) { 03425 r = n; 03426 goto err; 03427 } 03428 03429 len = enclen(reg->enc, p, end); 03430 03431 if (n == 0) { 03432 if (IS_NULL(snode)) { 03433 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 03434 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03435 if (IS_NULL(root)) { 03436 onig_node_free(prev_node); 03437 goto mem_err; 03438 } 03439 } 03440 03441 prev_node = snode = onig_node_new_str(NULL, NULL); 03442 if (IS_NULL(snode)) goto mem_err; 03443 if (IS_NOT_NULL(root)) { 03444 if (IS_NULL(onig_node_list_add(root, snode))) { 03445 onig_node_free(snode); 03446 goto mem_err; 03447 } 03448 } 03449 } 03450 03451 r = onig_node_str_cat(snode, p, p + len); 03452 if (r != 0) goto err; 03453 } 03454 else { 03455 alt_num *= (n + 1); 03456 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; 03457 03458 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 03459 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03460 if (IS_NULL(root)) { 03461 onig_node_free(prev_node); 03462 goto mem_err; 03463 } 03464 } 03465 03466 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); 03467 if (r < 0) goto mem_err; 03468 if (r == 1) { 03469 if (IS_NULL(root)) { 03470 top_root = prev_node; 03471 } 03472 else { 03473 if (IS_NULL(onig_node_list_add(root, prev_node))) { 03474 onig_node_free(prev_node); 03475 goto mem_err; 03476 } 03477 } 03478 03479 root = NCAR(prev_node); 03480 } 03481 else { /* r == 0 */ 03482 if (IS_NOT_NULL(root)) { 03483 if (IS_NULL(onig_node_list_add(root, prev_node))) { 03484 onig_node_free(prev_node); 03485 goto mem_err; 03486 } 03487 } 03488 } 03489 03490 snode = NULL_NODE; 03491 } 03492 03493 p += len; 03494 } 03495 03496 if (p < end) { 03497 Node *srem; 03498 03499 r = expand_case_fold_make_rem_string(&srem, p, end, reg); 03500 if (r != 0) goto mem_err; 03501 03502 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { 03503 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03504 if (IS_NULL(root)) { 03505 onig_node_free(srem); 03506 onig_node_free(prev_node); 03507 goto mem_err; 03508 } 03509 } 03510 03511 if (IS_NULL(root)) { 03512 prev_node = srem; 03513 } 03514 else { 03515 if (IS_NULL(onig_node_list_add(root, srem))) { 03516 onig_node_free(srem); 03517 goto mem_err; 03518 } 03519 } 03520 } 03521 03522 /* ending */ 03523 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); 03524 swap_node(node, top_root); 03525 onig_node_free(top_root); 03526 return 0; 03527 03528 mem_err: 03529 r = ONIGERR_MEMORY; 03530 03531 err: 03532 onig_node_free(top_root); 03533 return r; 03534 } 03535 03536 03537 #ifdef USE_COMBINATION_EXPLOSION_CHECK 03538 03539 #define CEC_THRES_NUM_BIG_REPEAT 512 03540 #define CEC_INFINITE_NUM 0x7fffffff 03541 03542 #define CEC_IN_INFINITE_REPEAT (1<<0) 03543 #define CEC_IN_FINITE_REPEAT (1<<1) 03544 #define CEC_CONT_BIG_REPEAT (1<<2) 03545 03546 static int 03547 setup_comb_exp_check(Node* node, int state, ScanEnv* env) 03548 { 03549 int type; 03550 int r = state; 03551 03552 type = NTYPE(node); 03553 switch (type) { 03554 case NT_LIST: 03555 { 03556 Node* prev = NULL_NODE; 03557 do { 03558 r = setup_comb_exp_check(NCAR(node), r, env); 03559 prev = NCAR(node); 03560 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); 03561 } 03562 break; 03563 03564 case NT_ALT: 03565 { 03566 int ret; 03567 do { 03568 ret = setup_comb_exp_check(NCAR(node), state, env); 03569 r |= ret; 03570 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); 03571 } 03572 break; 03573 03574 case NT_QTFR: 03575 { 03576 int child_state = state; 03577 int add_state = 0; 03578 QtfrNode* qn = NQTFR(node); 03579 Node* target = qn->target; 03580 int var_num; 03581 03582 if (! IS_REPEAT_INFINITE(qn->upper)) { 03583 if (qn->upper > 1) { 03584 /* {0,1}, {1,1} are allowed */ 03585 child_state |= CEC_IN_FINITE_REPEAT; 03586 03587 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ 03588 if (env->backrefed_mem == 0) { 03589 if (NTYPE(qn->target) == NT_ENCLOSE) { 03590 EncloseNode* en = NENCLOSE(qn->target); 03591 if (en->type == ENCLOSE_MEMORY) { 03592 if (NTYPE(en->target) == NT_QTFR) { 03593 QtfrNode* q = NQTFR(en->target); 03594 if (IS_REPEAT_INFINITE(q->upper) 03595 && q->greedy == qn->greedy) { 03596 qn->upper = (qn->lower == 0 ? 1 : qn->lower); 03597 if (qn->upper == 1) 03598 child_state = state; 03599 } 03600 } 03601 } 03602 } 03603 } 03604 } 03605 } 03606 03607 if (state & CEC_IN_FINITE_REPEAT) { 03608 qn->comb_exp_check_num = -1; 03609 } 03610 else { 03611 if (IS_REPEAT_INFINITE(qn->upper)) { 03612 var_num = CEC_INFINITE_NUM; 03613 child_state |= CEC_IN_INFINITE_REPEAT; 03614 } 03615 else { 03616 var_num = qn->upper - qn->lower; 03617 } 03618 03619 if (var_num >= CEC_THRES_NUM_BIG_REPEAT) 03620 add_state |= CEC_CONT_BIG_REPEAT; 03621 03622 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || 03623 ((state & CEC_CONT_BIG_REPEAT) != 0 && 03624 var_num >= CEC_THRES_NUM_BIG_REPEAT)) { 03625 if (qn->comb_exp_check_num == 0) { 03626 env->num_comb_exp_check++; 03627 qn->comb_exp_check_num = env->num_comb_exp_check; 03628 if (env->curr_max_regnum > env->comb_exp_max_regnum) 03629 env->comb_exp_max_regnum = env->curr_max_regnum; 03630 } 03631 } 03632 } 03633 03634 r = setup_comb_exp_check(target, child_state, env); 03635 r |= add_state; 03636 } 03637 break; 03638 03639 case NT_ENCLOSE: 03640 { 03641 EncloseNode* en = NENCLOSE(node); 03642 03643 switch (en->type) { 03644 case ENCLOSE_MEMORY: 03645 { 03646 if (env->curr_max_regnum < en->regnum) 03647 env->curr_max_regnum = en->regnum; 03648 03649 r = setup_comb_exp_check(en->target, state, env); 03650 } 03651 break; 03652 03653 default: 03654 r = setup_comb_exp_check(en->target, state, env); 03655 break; 03656 } 03657 } 03658 break; 03659 03660 #ifdef USE_SUBEXP_CALL 03661 case NT_CALL: 03662 if (IS_CALL_RECURSION(NCALL(node))) 03663 env->has_recursion = 1; 03664 else 03665 r = setup_comb_exp_check(NCALL(node)->target, state, env); 03666 break; 03667 #endif 03668 03669 default: 03670 break; 03671 } 03672 03673 return r; 03674 } 03675 #endif 03676 03677 #define IN_ALT (1<<0) 03678 #define IN_NOT (1<<1) 03679 #define IN_REPEAT (1<<2) 03680 #define IN_VAR_REPEAT (1<<3) 03681 03682 /* setup_tree does the following work. 03683 1. check empty loop. (set qn->target_empty_info) 03684 2. expand ignore-case in char class. 03685 3. set memory status bit flags. (reg->mem_stats) 03686 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 03687 5. find invalid patterns in look-behind. 03688 6. expand repeated string. 03689 */ 03690 static int 03691 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) 03692 { 03693 int type; 03694 int r = 0; 03695 03696 restart: 03697 type = NTYPE(node); 03698 switch (type) { 03699 case NT_LIST: 03700 { 03701 Node* prev = NULL_NODE; 03702 do { 03703 r = setup_tree(NCAR(node), reg, state, env); 03704 if (IS_NOT_NULL(prev) && r == 0) { 03705 r = next_setup(prev, NCAR(node), reg); 03706 } 03707 prev = NCAR(node); 03708 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03709 } 03710 break; 03711 03712 case NT_ALT: 03713 do { 03714 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); 03715 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03716 break; 03717 03718 case NT_CCLASS: 03719 break; 03720 03721 case NT_STR: 03722 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { 03723 r = expand_case_fold_string(node, reg); 03724 } 03725 break; 03726 03727 case NT_CTYPE: 03728 case NT_CANY: 03729 break; 03730 03731 #ifdef USE_SUBEXP_CALL 03732 case NT_CALL: 03733 break; 03734 #endif 03735 03736 case NT_BREF: 03737 { 03738 int i; 03739 int* p; 03740 Node** nodes = SCANENV_MEM_NODES(env); 03741 BRefNode* br = NBREF(node); 03742 p = BACKREFS_P(br); 03743 for (i = 0; i < br->back_num; i++) { 03744 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 03745 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); 03746 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); 03747 #ifdef USE_BACKREF_WITH_LEVEL 03748 if (IS_BACKREF_NEST_LEVEL(br)) { 03749 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); 03750 } 03751 #endif 03752 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); 03753 } 03754 } 03755 break; 03756 03757 case NT_QTFR: 03758 { 03759 OnigDistance d; 03760 QtfrNode* qn = NQTFR(node); 03761 Node* target = qn->target; 03762 03763 if ((state & IN_REPEAT) != 0) { 03764 qn->state |= NST_IN_REPEAT; 03765 } 03766 03767 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { 03768 r = get_min_match_length(target, &d, env); 03769 if (r) break; 03770 if (d == 0) { 03771 qn->target_empty_info = NQ_TARGET_IS_EMPTY; 03772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 03773 r = quantifiers_memory_node_info(target); 03774 if (r < 0) break; 03775 if (r > 0) { 03776 qn->target_empty_info = r; 03777 } 03778 #endif 03779 #if 0 03780 r = get_max_match_length(target, &d, env); 03781 if (r == 0 && d == 0) { 03782 /* ()* ==> ()?, ()+ ==> () */ 03783 qn->upper = 1; 03784 if (qn->lower > 1) qn->lower = 1; 03785 if (NTYPE(target) == NT_STR) { 03786 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ 03787 } 03788 } 03789 #endif 03790 } 03791 } 03792 03793 state |= IN_REPEAT; 03794 if (qn->lower != qn->upper) 03795 state |= IN_VAR_REPEAT; 03796 r = setup_tree(target, reg, state, env); 03797 if (r) break; 03798 03799 /* expand string */ 03800 #define EXPAND_STRING_MAX_LENGTH 100 03801 if (NTYPE(target) == NT_STR) { 03802 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && 03803 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { 03804 OnigDistance len = NSTRING_LEN(target); 03805 StrNode* sn = NSTR(target); 03806 03807 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { 03808 int i, n = qn->lower; 03809 onig_node_conv_to_str_node(node, NSTR(target)->flag); 03810 for (i = 0; i < n; i++) { 03811 r = onig_node_str_cat(node, sn->s, sn->end); 03812 if (r) break; 03813 } 03814 onig_node_free(target); 03815 break; /* break case NT_QTFR: */ 03816 } 03817 } 03818 } 03819 03820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT 03821 if (qn->greedy && (qn->target_empty_info != 0)) { 03822 if (NTYPE(target) == NT_QTFR) { 03823 QtfrNode* tqn = NQTFR(target); 03824 if (IS_NOT_NULL(tqn->head_exact)) { 03825 qn->head_exact = tqn->head_exact; 03826 tqn->head_exact = NULL; 03827 } 03828 } 03829 else { 03830 qn->head_exact = get_head_value_node(qn->target, 1, reg); 03831 } 03832 } 03833 #endif 03834 } 03835 break; 03836 03837 case NT_ENCLOSE: 03838 { 03839 EncloseNode* en = NENCLOSE(node); 03840 03841 switch (en->type) { 03842 case ENCLOSE_OPTION: 03843 { 03844 OnigOptionType options = reg->options; 03845 reg->options = NENCLOSE(node)->option; 03846 r = setup_tree(NENCLOSE(node)->target, reg, state, env); 03847 reg->options = options; 03848 } 03849 break; 03850 03851 case ENCLOSE_MEMORY: 03852 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 03853 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); 03854 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 03855 } 03856 r = setup_tree(en->target, reg, state, env); 03857 break; 03858 03859 case ENCLOSE_STOP_BACKTRACK: 03860 { 03861 Node* target = en->target; 03862 r = setup_tree(target, reg, state, env); 03863 if (NTYPE(target) == NT_QTFR) { 03864 QtfrNode* tqn = NQTFR(target); 03865 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && 03866 tqn->greedy != 0) { /* (?>a*), a*+ etc... */ 03867 int qtype = NTYPE(tqn->target); 03868 if (IS_NODE_TYPE_SIMPLE(qtype)) 03869 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); 03870 } 03871 } 03872 } 03873 break; 03874 } 03875 } 03876 break; 03877 03878 case NT_ANCHOR: 03879 { 03880 AnchorNode* an = NANCHOR(node); 03881 03882 switch (an->type) { 03883 case ANCHOR_PREC_READ: 03884 r = setup_tree(an->target, reg, state, env); 03885 break; 03886 case ANCHOR_PREC_READ_NOT: 03887 r = setup_tree(an->target, reg, (state | IN_NOT), env); 03888 break; 03889 03890 /* allowed node types in look-behind */ 03891 #define ALLOWED_TYPE_IN_LB \ 03892 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ 03893 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) 03894 03895 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) 03896 #define ALLOWED_ENCLOSE_IN_LB_NOT 0 03897 03898 #define ALLOWED_ANCHOR_IN_LB \ 03899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) 03900 #define ALLOWED_ANCHOR_IN_LB_NOT \ 03901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) 03902 03903 case ANCHOR_LOOK_BEHIND: 03904 { 03905 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 03906 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); 03907 if (r < 0) return r; 03908 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03909 r = setup_look_behind(node, reg, env); 03910 if (r != 0) return r; 03911 if (NTYPE(node) != NT_ANCHOR) goto restart; 03912 r = setup_tree(an->target, reg, state, env); 03913 } 03914 break; 03915 03916 case ANCHOR_LOOK_BEHIND_NOT: 03917 { 03918 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 03919 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); 03920 if (r < 0) return r; 03921 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03922 r = setup_look_behind(node, reg, env); 03923 if (r != 0) return r; 03924 if (NTYPE(node) != NT_ANCHOR) goto restart; 03925 r = setup_tree(an->target, reg, (state | IN_NOT), env); 03926 } 03927 break; 03928 } 03929 } 03930 break; 03931 03932 default: 03933 break; 03934 } 03935 03936 return r; 03937 } 03938 03939 /* set skip map for Boyer-Moor search */ 03940 static int 03941 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, 03942 UChar skip[], int** int_skip) 03943 { 03944 OnigDistance i, len; 03945 03946 len = end - s; 03947 if (len < ONIG_CHAR_TABLE_SIZE) { 03948 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; 03949 03950 for (i = 0; i < len - 1; i++) 03951 skip[s[i]] = len - 1 - i; 03952 } 03953 else { 03954 if (IS_NULL(*int_skip)) { 03955 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 03956 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; 03957 } 03958 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int)len; 03959 03960 for (i = 0; i < len - 1; i++) 03961 (*int_skip)[s[i]] = (int)(len - 1 - i); 03962 } 03963 return 0; 03964 } 03965 03966 #define OPT_EXACT_MAXLEN 24 03967 03968 typedef struct { 03969 OnigDistance min; /* min byte length */ 03970 OnigDistance max; /* max byte length */ 03971 } MinMaxLen; 03972 03973 typedef struct { 03974 MinMaxLen mmd; 03975 OnigEncoding enc; 03976 OnigOptionType options; 03977 OnigCaseFoldType case_fold_flag; 03978 ScanEnv* scan_env; 03979 } OptEnv; 03980 03981 typedef struct { 03982 int left_anchor; 03983 int right_anchor; 03984 } OptAncInfo; 03985 03986 typedef struct { 03987 MinMaxLen mmd; /* info position */ 03988 OptAncInfo anc; 03989 03990 int reach_end; 03991 int ignore_case; 03992 int len; 03993 UChar s[OPT_EXACT_MAXLEN]; 03994 } OptExactInfo; 03995 03996 typedef struct { 03997 MinMaxLen mmd; /* info position */ 03998 OptAncInfo anc; 03999 04000 int value; /* weighted value */ 04001 UChar map[ONIG_CHAR_TABLE_SIZE]; 04002 } OptMapInfo; 04003 04004 typedef struct { 04005 MinMaxLen len; 04006 04007 OptAncInfo anc; 04008 OptExactInfo exb; /* boundary */ 04009 OptExactInfo exm; /* middle */ 04010 OptExactInfo expr; /* prec read (?=...) */ 04011 04012 OptMapInfo map; /* boundary */ 04013 } NodeOptInfo; 04014 04015 04016 static int 04017 map_position_value(OnigEncoding enc, int i) 04018 { 04019 static const short int ByteValTable[] = { 04020 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 04021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 04022 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 04023 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 04024 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 04025 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, 04026 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 04027 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 04028 }; 04029 04030 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { 04031 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) 04032 return 20; 04033 else 04034 return (int )ByteValTable[i]; 04035 } 04036 else 04037 return 4; /* Take it easy. */ 04038 } 04039 04040 static int 04041 distance_value(MinMaxLen* mm) 04042 { 04043 /* 1000 / (min-max-dist + 1) */ 04044 static const short int dist_vals[] = { 04045 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 04046 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 04047 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 04048 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 04049 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 04050 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 04051 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 04052 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 04053 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 04054 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 04055 }; 04056 04057 OnigDistance d; 04058 04059 if (mm->max == ONIG_INFINITE_DISTANCE) return 0; 04060 04061 d = mm->max - mm->min; 04062 if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) 04063 /* return dist_vals[d] * 16 / (mm->min + 12); */ 04064 return (int )dist_vals[d]; 04065 else 04066 return 1; 04067 } 04068 04069 static int 04070 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) 04071 { 04072 if (v2 <= 0) return -1; 04073 if (v1 <= 0) return 1; 04074 04075 v1 *= distance_value(d1); 04076 v2 *= distance_value(d2); 04077 04078 if (v2 > v1) return 1; 04079 if (v2 < v1) return -1; 04080 04081 if (d2->min < d1->min) return 1; 04082 if (d2->min > d1->min) return -1; 04083 return 0; 04084 } 04085 04086 static int 04087 is_equal_mml(MinMaxLen* a, MinMaxLen* b) 04088 { 04089 return (a->min == b->min && a->max == b->max) ? 1 : 0; 04090 } 04091 04092 04093 static void 04094 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) 04095 { 04096 mml->min = min; 04097 mml->max = max; 04098 } 04099 04100 static void 04101 clear_mml(MinMaxLen* mml) 04102 { 04103 mml->min = mml->max = 0; 04104 } 04105 04106 static void 04107 copy_mml(MinMaxLen* to, MinMaxLen* from) 04108 { 04109 to->min = from->min; 04110 to->max = from->max; 04111 } 04112 04113 static void 04114 add_mml(MinMaxLen* to, MinMaxLen* from) 04115 { 04116 to->min = distance_add(to->min, from->min); 04117 to->max = distance_add(to->max, from->max); 04118 } 04119 04120 #if 0 04121 static void 04122 add_len_mml(MinMaxLen* to, OnigDistance len) 04123 { 04124 to->min = distance_add(to->min, len); 04125 to->max = distance_add(to->max, len); 04126 } 04127 #endif 04128 04129 static void 04130 alt_merge_mml(MinMaxLen* to, MinMaxLen* from) 04131 { 04132 if (to->min > from->min) to->min = from->min; 04133 if (to->max < from->max) to->max = from->max; 04134 } 04135 04136 static void 04137 copy_opt_env(OptEnv* to, OptEnv* from) 04138 { 04139 *to = *from; 04140 } 04141 04142 static void 04143 clear_opt_anc_info(OptAncInfo* anc) 04144 { 04145 anc->left_anchor = 0; 04146 anc->right_anchor = 0; 04147 } 04148 04149 static void 04150 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) 04151 { 04152 *to = *from; 04153 } 04154 04155 static void 04156 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, 04157 OnigDistance left_len, OnigDistance right_len) 04158 { 04159 clear_opt_anc_info(to); 04160 04161 to->left_anchor = left->left_anchor; 04162 if (left_len == 0) { 04163 to->left_anchor |= right->left_anchor; 04164 } 04165 04166 to->right_anchor = right->right_anchor; 04167 if (right_len == 0) { 04168 to->right_anchor |= left->right_anchor; 04169 } 04170 } 04171 04172 static int 04173 is_left_anchor(int anc) 04174 { 04175 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || 04176 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || 04177 anc == ANCHOR_PREC_READ_NOT) 04178 return 0; 04179 04180 return 1; 04181 } 04182 04183 static int 04184 is_set_opt_anc_info(OptAncInfo* to, int anc) 04185 { 04186 if ((to->left_anchor & anc) != 0) return 1; 04187 04188 return ((to->right_anchor & anc) != 0 ? 1 : 0); 04189 } 04190 04191 static void 04192 add_opt_anc_info(OptAncInfo* to, int anc) 04193 { 04194 if (is_left_anchor(anc)) 04195 to->left_anchor |= anc; 04196 else 04197 to->right_anchor |= anc; 04198 } 04199 04200 static void 04201 remove_opt_anc_info(OptAncInfo* to, int anc) 04202 { 04203 if (is_left_anchor(anc)) 04204 to->left_anchor &= ~anc; 04205 else 04206 to->right_anchor &= ~anc; 04207 } 04208 04209 static void 04210 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) 04211 { 04212 to->left_anchor &= add->left_anchor; 04213 to->right_anchor &= add->right_anchor; 04214 } 04215 04216 static int 04217 is_full_opt_exact_info(OptExactInfo* ex) 04218 { 04219 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); 04220 } 04221 04222 static void 04223 clear_opt_exact_info(OptExactInfo* ex) 04224 { 04225 clear_mml(&ex->mmd); 04226 clear_opt_anc_info(&ex->anc); 04227 ex->reach_end = 0; 04228 ex->ignore_case = 0; 04229 ex->len = 0; 04230 ex->s[0] = '\0'; 04231 } 04232 04233 static void 04234 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) 04235 { 04236 *to = *from; 04237 } 04238 04239 static void 04240 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) 04241 { 04242 int i, j, len; 04243 UChar *p, *end; 04244 OptAncInfo tanc; 04245 04246 if (! to->ignore_case && add->ignore_case) { 04247 if (to->len >= add->len) return ; /* avoid */ 04248 04249 to->ignore_case = 1; 04250 } 04251 04252 p = add->s; 04253 end = p + add->len; 04254 for (i = to->len; p < end; ) { 04255 len = enclen(enc, p, end); 04256 if (i + len > OPT_EXACT_MAXLEN) break; 04257 for (j = 0; j < len && p < end; j++) 04258 to->s[i++] = *p++; 04259 } 04260 04261 to->len = i; 04262 to->reach_end = (p == end ? add->reach_end : 0); 04263 04264 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); 04265 if (! to->reach_end) tanc.right_anchor = 0; 04266 copy_opt_anc_info(&to->anc, &tanc); 04267 } 04268 04269 static void 04270 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, 04271 int raw ARG_UNUSED, OnigEncoding enc) 04272 { 04273 int i, j, len; 04274 UChar *p; 04275 04276 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { 04277 len = enclen(enc, p, end); 04278 if (i + len > OPT_EXACT_MAXLEN) break; 04279 for (j = 0; j < len && p < end; j++) 04280 to->s[i++] = *p++; 04281 } 04282 04283 to->len = i; 04284 } 04285 04286 static void 04287 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) 04288 { 04289 int i, j, len; 04290 04291 if (add->len == 0 || to->len == 0) { 04292 clear_opt_exact_info(to); 04293 return ; 04294 } 04295 04296 if (! is_equal_mml(&to->mmd, &add->mmd)) { 04297 clear_opt_exact_info(to); 04298 return ; 04299 } 04300 04301 for (i = 0; i < to->len && i < add->len; ) { 04302 if (to->s[i] != add->s[i]) break; 04303 len = enclen(env->enc, to->s + i, to->s + to->len); 04304 04305 for (j = 1; j < len; j++) { 04306 if (to->s[i+j] != add->s[i+j]) break; 04307 } 04308 if (j < len) break; 04309 i += len; 04310 } 04311 04312 if (! add->reach_end || i < add->len || i < to->len) { 04313 to->reach_end = 0; 04314 } 04315 to->len = i; 04316 to->ignore_case |= add->ignore_case; 04317 04318 alt_merge_opt_anc_info(&to->anc, &add->anc); 04319 if (! to->reach_end) to->anc.right_anchor = 0; 04320 } 04321 04322 static void 04323 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) 04324 { 04325 int v1, v2; 04326 04327 v1 = now->len; 04328 v2 = alt->len; 04329 04330 if (v2 == 0) { 04331 return ; 04332 } 04333 else if (v1 == 0) { 04334 copy_opt_exact_info(now, alt); 04335 return ; 04336 } 04337 else if (v1 <= 2 && v2 <= 2) { 04338 /* ByteValTable[x] is big value --> low price */ 04339 v2 = map_position_value(enc, now->s[0]); 04340 v1 = map_position_value(enc, alt->s[0]); 04341 04342 if (now->len > 1) v1 += 5; 04343 if (alt->len > 1) v2 += 5; 04344 } 04345 04346 if (now->ignore_case == 0) v1 *= 2; 04347 if (alt->ignore_case == 0) v2 *= 2; 04348 04349 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 04350 copy_opt_exact_info(now, alt); 04351 } 04352 04353 static void 04354 clear_opt_map_info(OptMapInfo* map) 04355 { 04356 static const OptMapInfo clean_info = { 04357 {0, 0}, {0, 0}, 0, 04358 { 04359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04369 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 04375 } 04376 }; 04377 04378 xmemcpy(map, &clean_info, sizeof(OptMapInfo)); 04379 } 04380 04381 static void 04382 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) 04383 { 04384 *to = *from; 04385 } 04386 04387 static void 04388 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) 04389 { 04390 if (map->map[c] == 0) { 04391 map->map[c] = 1; 04392 map->value += map_position_value(enc, c); 04393 } 04394 } 04395 04396 static int 04397 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, 04398 OnigEncoding enc, OnigCaseFoldType case_fold_flag) 04399 { 04400 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 04401 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 04402 int i, n; 04403 04404 add_char_opt_map_info(map, p[0], enc); 04405 04406 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); 04407 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); 04408 if (n < 0) return n; 04409 04410 for (i = 0; i < n; i++) { 04411 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); 04412 add_char_opt_map_info(map, buf[0], enc); 04413 } 04414 04415 return 0; 04416 } 04417 04418 static void 04419 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) 04420 { 04421 const int z = 1<<15; /* 32768: something big value */ 04422 04423 int v1, v2; 04424 04425 if (alt->value == 0) return ; 04426 if (now->value == 0) { 04427 copy_opt_map_info(now, alt); 04428 return ; 04429 } 04430 04431 v1 = z / now->value; 04432 v2 = z / alt->value; 04433 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 04434 copy_opt_map_info(now, alt); 04435 } 04436 04437 static int 04438 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) 04439 { 04440 #define COMP_EM_BASE 20 04441 int ve, vm; 04442 04443 if (m->value <= 0) return -1; 04444 04445 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); 04446 vm = COMP_EM_BASE * 5 * 2 / m->value; 04447 return comp_distance_value(&e->mmd, &m->mmd, ve, vm); 04448 } 04449 04450 static void 04451 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) 04452 { 04453 int i, val; 04454 04455 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ 04456 if (to->value == 0) return ; 04457 if (add->value == 0 || to->mmd.max < add->mmd.min) { 04458 clear_opt_map_info(to); 04459 return ; 04460 } 04461 04462 alt_merge_mml(&to->mmd, &add->mmd); 04463 04464 val = 0; 04465 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 04466 if (add->map[i]) 04467 to->map[i] = 1; 04468 04469 if (to->map[i]) 04470 val += map_position_value(enc, i); 04471 } 04472 to->value = val; 04473 04474 alt_merge_opt_anc_info(&to->anc, &add->anc); 04475 } 04476 04477 static void 04478 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) 04479 { 04480 copy_mml(&(opt->exb.mmd), mmd); 04481 copy_mml(&(opt->expr.mmd), mmd); 04482 copy_mml(&(opt->map.mmd), mmd); 04483 } 04484 04485 static void 04486 clear_node_opt_info(NodeOptInfo* opt) 04487 { 04488 clear_mml(&opt->len); 04489 clear_opt_anc_info(&opt->anc); 04490 clear_opt_exact_info(&opt->exb); 04491 clear_opt_exact_info(&opt->exm); 04492 clear_opt_exact_info(&opt->expr); 04493 clear_opt_map_info(&opt->map); 04494 } 04495 04496 static void 04497 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) 04498 { 04499 *to = *from; 04500 } 04501 04502 static void 04503 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) 04504 { 04505 int exb_reach, exm_reach; 04506 OptAncInfo tanc; 04507 04508 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); 04509 copy_opt_anc_info(&to->anc, &tanc); 04510 04511 if (add->exb.len > 0 && to->len.max == 0) { 04512 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, 04513 to->len.max, add->len.max); 04514 copy_opt_anc_info(&add->exb.anc, &tanc); 04515 } 04516 04517 if (add->map.value > 0 && to->len.max == 0) { 04518 if (add->map.mmd.max == 0) 04519 add->map.anc.left_anchor |= to->anc.left_anchor; 04520 } 04521 04522 exb_reach = to->exb.reach_end; 04523 exm_reach = to->exm.reach_end; 04524 04525 if (add->len.max != 0) 04526 to->exb.reach_end = to->exm.reach_end = 0; 04527 04528 if (add->exb.len > 0) { 04529 if (exb_reach) { 04530 concat_opt_exact_info(&to->exb, &add->exb, enc); 04531 clear_opt_exact_info(&add->exb); 04532 } 04533 else if (exm_reach) { 04534 concat_opt_exact_info(&to->exm, &add->exb, enc); 04535 clear_opt_exact_info(&add->exb); 04536 } 04537 } 04538 select_opt_exact_info(enc, &to->exm, &add->exb); 04539 select_opt_exact_info(enc, &to->exm, &add->exm); 04540 04541 if (to->expr.len > 0) { 04542 if (add->len.max > 0) { 04543 if (to->expr.len > (int )add->len.max) 04544 to->expr.len = (int)add->len.max; 04545 04546 if (to->expr.mmd.max == 0) 04547 select_opt_exact_info(enc, &to->exb, &to->expr); 04548 else 04549 select_opt_exact_info(enc, &to->exm, &to->expr); 04550 } 04551 } 04552 else if (add->expr.len > 0) { 04553 copy_opt_exact_info(&to->expr, &add->expr); 04554 } 04555 04556 select_opt_map_info(&to->map, &add->map); 04557 04558 add_mml(&to->len, &add->len); 04559 } 04560 04561 static void 04562 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) 04563 { 04564 alt_merge_opt_anc_info (&to->anc, &add->anc); 04565 alt_merge_opt_exact_info(&to->exb, &add->exb, env); 04566 alt_merge_opt_exact_info(&to->exm, &add->exm, env); 04567 alt_merge_opt_exact_info(&to->expr, &add->expr, env); 04568 alt_merge_opt_map_info(env->enc, &to->map, &add->map); 04569 04570 alt_merge_mml(&to->len, &add->len); 04571 } 04572 04573 04574 #define MAX_NODE_OPT_INFO_REF_COUNT 5 04575 04576 static int 04577 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) 04578 { 04579 int type; 04580 int r = 0; 04581 04582 clear_node_opt_info(opt); 04583 set_bound_node_opt_info(opt, &env->mmd); 04584 04585 type = NTYPE(node); 04586 switch (type) { 04587 case NT_LIST: 04588 { 04589 OptEnv nenv; 04590 NodeOptInfo nopt; 04591 Node* nd = node; 04592 04593 copy_opt_env(&nenv, env); 04594 do { 04595 r = optimize_node_left(NCAR(nd), &nopt, &nenv); 04596 if (r == 0) { 04597 add_mml(&nenv.mmd, &nopt.len); 04598 concat_left_node_opt_info(env->enc, opt, &nopt); 04599 } 04600 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); 04601 } 04602 break; 04603 04604 case NT_ALT: 04605 { 04606 NodeOptInfo nopt; 04607 Node* nd = node; 04608 04609 do { 04610 r = optimize_node_left(NCAR(nd), &nopt, env); 04611 if (r == 0) { 04612 if (nd == node) copy_node_opt_info(opt, &nopt); 04613 else alt_merge_node_opt_info(opt, &nopt, env); 04614 } 04615 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); 04616 } 04617 break; 04618 04619 case NT_STR: 04620 { 04621 StrNode* sn = NSTR(node); 04622 OnigDistance slen = sn->end - sn->s; 04623 int is_raw = NSTRING_IS_RAW(node); 04624 04625 if (! NSTRING_IS_AMBIG(node)) { 04626 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 04627 NSTRING_IS_RAW(node), env->enc); 04628 if (slen > 0) { 04629 add_char_opt_map_info(&opt->map, *(sn->s), env->enc); 04630 } 04631 set_mml(&opt->len, slen, slen); 04632 } 04633 else { 04634 OnigDistance max; 04635 04636 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { 04637 int n = onigenc_strlen(env->enc, sn->s, sn->end); 04638 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; 04639 } 04640 else { 04641 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 04642 is_raw, env->enc); 04643 opt->exb.ignore_case = 1; 04644 04645 if (slen > 0) { 04646 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, 04647 env->enc, env->case_fold_flag); 04648 if (r != 0) break; 04649 } 04650 04651 max = slen; 04652 } 04653 04654 set_mml(&opt->len, slen, max); 04655 } 04656 04657 if ((OnigDistance)opt->exb.len == slen) 04658 opt->exb.reach_end = 1; 04659 } 04660 break; 04661 04662 case NT_CCLASS: 04663 { 04664 int i, z; 04665 CClassNode* cc = NCCLASS(node); 04666 04667 /* no need to check ignore case. (setted in setup_tree()) */ 04668 04669 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { 04670 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 04671 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 04672 04673 set_mml(&opt->len, min, max); 04674 } 04675 else { 04676 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 04677 z = BITSET_AT(cc->bs, i); 04678 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { 04679 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 04680 } 04681 } 04682 set_mml(&opt->len, 1, 1); 04683 } 04684 } 04685 break; 04686 04687 case NT_CTYPE: 04688 { 04689 int i, min, max; 04690 04691 max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 04692 04693 if (max == 1) { 04694 min = 1; 04695 04696 switch (NCTYPE(node)->ctype) { 04697 case ONIGENC_CTYPE_WORD: 04698 if (NCTYPE(node)->not != 0) { 04699 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 04700 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { 04701 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 04702 } 04703 } 04704 } 04705 else { 04706 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 04707 if (ONIGENC_IS_CODE_WORD(env->enc, i)) { 04708 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 04709 } 04710 } 04711 } 04712 break; 04713 } 04714 } 04715 else { 04716 min = ONIGENC_MBC_MINLEN(env->enc); 04717 } 04718 set_mml(&opt->len, min, max); 04719 } 04720 break; 04721 04722 case NT_CANY: 04723 { 04724 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 04725 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 04726 set_mml(&opt->len, min, max); 04727 } 04728 break; 04729 04730 case NT_ANCHOR: 04731 switch (NANCHOR(node)->type) { 04732 case ANCHOR_BEGIN_BUF: 04733 case ANCHOR_BEGIN_POSITION: 04734 case ANCHOR_BEGIN_LINE: 04735 case ANCHOR_END_BUF: 04736 case ANCHOR_SEMI_END_BUF: 04737 case ANCHOR_END_LINE: 04738 add_opt_anc_info(&opt->anc, NANCHOR(node)->type); 04739 break; 04740 04741 case ANCHOR_PREC_READ: 04742 { 04743 NodeOptInfo nopt; 04744 04745 r = optimize_node_left(NANCHOR(node)->target, &nopt, env); 04746 if (r == 0) { 04747 if (nopt.exb.len > 0) 04748 copy_opt_exact_info(&opt->expr, &nopt.exb); 04749 else if (nopt.exm.len > 0) 04750 copy_opt_exact_info(&opt->expr, &nopt.exm); 04751 04752 opt->expr.reach_end = 0; 04753 04754 if (nopt.map.value > 0) 04755 copy_opt_map_info(&opt->map, &nopt.map); 04756 } 04757 } 04758 break; 04759 04760 case ANCHOR_PREC_READ_NOT: 04761 case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ 04762 case ANCHOR_LOOK_BEHIND_NOT: 04763 break; 04764 } 04765 break; 04766 04767 case NT_BREF: 04768 { 04769 int i; 04770 int* backs; 04771 OnigDistance min, max, tmin, tmax; 04772 Node** nodes = SCANENV_MEM_NODES(env->scan_env); 04773 BRefNode* br = NBREF(node); 04774 04775 if (br->state & NST_RECURSION) { 04776 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 04777 break; 04778 } 04779 backs = BACKREFS_P(br); 04780 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); 04781 if (r != 0) break; 04782 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); 04783 if (r != 0) break; 04784 for (i = 1; i < br->back_num; i++) { 04785 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); 04786 if (r != 0) break; 04787 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); 04788 if (r != 0) break; 04789 if (min > tmin) min = tmin; 04790 if (max < tmax) max = tmax; 04791 } 04792 if (r == 0) set_mml(&opt->len, min, max); 04793 } 04794 break; 04795 04796 #ifdef USE_SUBEXP_CALL 04797 case NT_CALL: 04798 if (IS_CALL_RECURSION(NCALL(node))) 04799 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 04800 else { 04801 OnigOptionType save = env->options; 04802 env->options = NENCLOSE(NCALL(node)->target)->option; 04803 r = optimize_node_left(NCALL(node)->target, opt, env); 04804 env->options = save; 04805 } 04806 break; 04807 #endif 04808 04809 case NT_QTFR: 04810 { 04811 int i; 04812 OnigDistance min, max; 04813 NodeOptInfo nopt; 04814 QtfrNode* qn = NQTFR(node); 04815 04816 r = optimize_node_left(qn->target, &nopt, env); 04817 if (r) break; 04818 04819 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { 04820 if (env->mmd.max == 0 && 04821 NTYPE(qn->target) == NT_CANY && qn->greedy) { 04822 if (IS_MULTILINE(env->options)) 04823 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); 04824 else 04825 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); 04826 } 04827 } 04828 else { 04829 if (qn->lower > 0) { 04830 copy_node_opt_info(opt, &nopt); 04831 if (nopt.exb.len > 0) { 04832 if (nopt.exb.reach_end) { 04833 for (i = 2; i <= qn->lower && 04834 ! is_full_opt_exact_info(&opt->exb); i++) { 04835 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); 04836 } 04837 if (i < qn->lower) { 04838 opt->exb.reach_end = 0; 04839 } 04840 } 04841 } 04842 04843 if (qn->lower != qn->upper) { 04844 opt->exb.reach_end = 0; 04845 opt->exm.reach_end = 0; 04846 } 04847 if (qn->lower > 1) 04848 opt->exm.reach_end = 0; 04849 } 04850 } 04851 04852 min = distance_multiply(nopt.len.min, qn->lower); 04853 if (IS_REPEAT_INFINITE(qn->upper)) 04854 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); 04855 else 04856 max = distance_multiply(nopt.len.max, qn->upper); 04857 04858 set_mml(&opt->len, min, max); 04859 } 04860 break; 04861 04862 case NT_ENCLOSE: 04863 { 04864 EncloseNode* en = NENCLOSE(node); 04865 04866 switch (en->type) { 04867 case ENCLOSE_OPTION: 04868 { 04869 OnigOptionType save = env->options; 04870 04871 env->options = en->option; 04872 r = optimize_node_left(en->target, opt, env); 04873 env->options = save; 04874 } 04875 break; 04876 04877 case ENCLOSE_MEMORY: 04878 #ifdef USE_SUBEXP_CALL 04879 en->opt_count++; 04880 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { 04881 OnigDistance min, max; 04882 04883 min = 0; 04884 max = ONIG_INFINITE_DISTANCE; 04885 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; 04886 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; 04887 set_mml(&opt->len, min, max); 04888 } 04889 else 04890 #endif 04891 { 04892 r = optimize_node_left(en->target, opt, env); 04893 04894 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { 04895 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) 04896 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); 04897 } 04898 } 04899 break; 04900 04901 case ENCLOSE_STOP_BACKTRACK: 04902 r = optimize_node_left(en->target, opt, env); 04903 break; 04904 } 04905 } 04906 break; 04907 04908 default: 04909 #ifdef ONIG_DEBUG 04910 if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n", 04911 NTYPE(node)); 04912 #endif 04913 r = ONIGERR_TYPE_BUG; 04914 break; 04915 } 04916 04917 return r; 04918 } 04919 04920 static int 04921 set_optimize_exact_info(regex_t* reg, OptExactInfo* e) 04922 { 04923 int r; 04924 04925 if (e->len == 0) return 0; 04926 04927 if (e->ignore_case) { 04928 reg->exact = (UChar* )xmalloc(e->len); 04929 CHECK_NULL_RETURN_MEMERR(reg->exact); 04930 xmemcpy(reg->exact, e->s, e->len); 04931 reg->exact_end = reg->exact + e->len; 04932 reg->optimize = ONIG_OPTIMIZE_EXACT_IC; 04933 } 04934 else { 04935 int allow_reverse; 04936 04937 reg->exact = str_dup(e->s, e->s + e->len); 04938 CHECK_NULL_RETURN_MEMERR(reg->exact); 04939 reg->exact_end = reg->exact + e->len; 04940 04941 allow_reverse = 04942 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); 04943 04944 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { 04945 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, 04946 reg->map, &(reg->int_map)); 04947 if (r) return r; 04948 04949 reg->optimize = (allow_reverse != 0 04950 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); 04951 } 04952 else { 04953 reg->optimize = ONIG_OPTIMIZE_EXACT; 04954 } 04955 } 04956 04957 reg->dmin = e->mmd.min; 04958 reg->dmax = e->mmd.max; 04959 04960 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 04961 reg->threshold_len = (int)(reg->dmin + (reg->exact_end - reg->exact)); 04962 } 04963 04964 return 0; 04965 } 04966 04967 static void 04968 set_optimize_map_info(regex_t* reg, OptMapInfo* m) 04969 { 04970 int i; 04971 04972 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 04973 reg->map[i] = m->map[i]; 04974 04975 reg->optimize = ONIG_OPTIMIZE_MAP; 04976 reg->dmin = m->mmd.min; 04977 reg->dmax = m->mmd.max; 04978 04979 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 04980 reg->threshold_len = (int)(reg->dmin + 1); 04981 } 04982 } 04983 04984 static void 04985 set_sub_anchor(regex_t* reg, OptAncInfo* anc) 04986 { 04987 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; 04988 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; 04989 } 04990 04991 #ifdef ONIG_DEBUG 04992 static void print_optimize_info(FILE* f, regex_t* reg); 04993 #endif 04994 04995 static int 04996 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) 04997 { 04998 04999 int r; 05000 NodeOptInfo opt; 05001 OptEnv env; 05002 05003 env.enc = reg->enc; 05004 env.options = reg->options; 05005 env.case_fold_flag = reg->case_fold_flag; 05006 env.scan_env = scan_env; 05007 clear_mml(&env.mmd); 05008 05009 r = optimize_node_left(node, &opt, &env); 05010 if (r) return r; 05011 05012 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | 05013 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); 05014 05015 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); 05016 05017 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { 05018 reg->anchor_dmin = opt.len.min; 05019 reg->anchor_dmax = opt.len.max; 05020 } 05021 05022 if (opt.exb.len > 0 || opt.exm.len > 0) { 05023 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); 05024 if (opt.map.value > 0 && 05025 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { 05026 goto set_map; 05027 } 05028 else { 05029 r = set_optimize_exact_info(reg, &opt.exb); 05030 set_sub_anchor(reg, &opt.exb.anc); 05031 } 05032 } 05033 else if (opt.map.value > 0) { 05034 set_map: 05035 set_optimize_map_info(reg, &opt.map); 05036 set_sub_anchor(reg, &opt.map.anc); 05037 } 05038 else { 05039 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; 05040 if (opt.len.max == 0) 05041 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; 05042 } 05043 05044 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) 05045 if (!onig_is_prelude()) print_optimize_info(stderr, reg); 05046 #endif 05047 return r; 05048 } 05049 05050 static void 05051 clear_optimize_info(regex_t* reg) 05052 { 05053 reg->optimize = ONIG_OPTIMIZE_NONE; 05054 reg->anchor = 0; 05055 reg->anchor_dmin = 0; 05056 reg->anchor_dmax = 0; 05057 reg->sub_anchor = 0; 05058 reg->exact_end = (UChar* )NULL; 05059 reg->threshold_len = 0; 05060 if (IS_NOT_NULL(reg->exact)) { 05061 xfree(reg->exact); 05062 reg->exact = (UChar* )NULL; 05063 } 05064 } 05065 05066 #ifdef ONIG_DEBUG 05067 05068 static void print_enc_string(FILE* fp, OnigEncoding enc, 05069 const UChar *s, const UChar *end) 05070 { 05071 fprintf(fp, "\nPATTERN: /"); 05072 05073 if (ONIGENC_MBC_MINLEN(enc) > 1) { 05074 const UChar *p; 05075 OnigCodePoint code; 05076 05077 p = s; 05078 while (p < end) { 05079 code = ONIGENC_MBC_TO_CODE(enc, p, end); 05080 if (code >= 0x80) { 05081 fprintf(fp, " 0x%04x ", (int )code); 05082 } 05083 else { 05084 fputc((int )code, fp); 05085 } 05086 05087 p += enclen(enc, p, end); 05088 } 05089 } 05090 else { 05091 while (s < end) { 05092 fputc((int )*s, fp); 05093 s++; 05094 } 05095 } 05096 05097 fprintf(fp, "/ (%s)\n", enc->name); 05098 } 05099 05100 static void 05101 print_distance_range(FILE* f, OnigDistance a, OnigDistance b) 05102 { 05103 if (a == ONIG_INFINITE_DISTANCE) 05104 fputs("inf", f); 05105 else 05106 fprintf(f, "(%"PRIuSIZE")", a); 05107 05108 fputs("-", f); 05109 05110 if (b == ONIG_INFINITE_DISTANCE) 05111 fputs("inf", f); 05112 else 05113 fprintf(f, "(%"PRIuSIZE")", b); 05114 } 05115 05116 static void 05117 print_anchor(FILE* f, int anchor) 05118 { 05119 int q = 0; 05120 05121 fprintf(f, "["); 05122 05123 if (anchor & ANCHOR_BEGIN_BUF) { 05124 fprintf(f, "begin-buf"); 05125 q = 1; 05126 } 05127 if (anchor & ANCHOR_BEGIN_LINE) { 05128 if (q) fprintf(f, ", "); 05129 q = 1; 05130 fprintf(f, "begin-line"); 05131 } 05132 if (anchor & ANCHOR_BEGIN_POSITION) { 05133 if (q) fprintf(f, ", "); 05134 q = 1; 05135 fprintf(f, "begin-pos"); 05136 } 05137 if (anchor & ANCHOR_END_BUF) { 05138 if (q) fprintf(f, ", "); 05139 q = 1; 05140 fprintf(f, "end-buf"); 05141 } 05142 if (anchor & ANCHOR_SEMI_END_BUF) { 05143 if (q) fprintf(f, ", "); 05144 q = 1; 05145 fprintf(f, "semi-end-buf"); 05146 } 05147 if (anchor & ANCHOR_END_LINE) { 05148 if (q) fprintf(f, ", "); 05149 q = 1; 05150 fprintf(f, "end-line"); 05151 } 05152 if (anchor & ANCHOR_ANYCHAR_STAR) { 05153 if (q) fprintf(f, ", "); 05154 q = 1; 05155 fprintf(f, "anychar-star"); 05156 } 05157 if (anchor & ANCHOR_ANYCHAR_STAR_ML) { 05158 if (q) fprintf(f, ", "); 05159 fprintf(f, "anychar-star-pl"); 05160 } 05161 05162 fprintf(f, "]"); 05163 } 05164 05165 static void 05166 print_optimize_info(FILE* f, regex_t* reg) 05167 { 05168 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", 05169 "EXACT_IC", "MAP" }; 05170 05171 fprintf(f, "optimize: %s\n", on[reg->optimize]); 05172 fprintf(f, " anchor: "); print_anchor(f, reg->anchor); 05173 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) 05174 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); 05175 fprintf(f, "\n"); 05176 05177 if (reg->optimize) { 05178 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); 05179 fprintf(f, "\n"); 05180 } 05181 fprintf(f, "\n"); 05182 05183 if (reg->exact) { 05184 UChar *p; 05185 fprintf(f, "exact: ["); 05186 for (p = reg->exact; p < reg->exact_end; p++) { 05187 fputc(*p, f); 05188 } 05189 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact)); 05190 } 05191 else if (reg->optimize & ONIG_OPTIMIZE_MAP) { 05192 int c, i, n = 0; 05193 05194 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 05195 if (reg->map[i]) n++; 05196 05197 fprintf(f, "map: n=%d\n", n); 05198 if (n > 0) { 05199 c = 0; 05200 fputc('[', f); 05201 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 05202 if (reg->map[i] != 0) { 05203 if (c > 0) fputs(", ", f); 05204 c++; 05205 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && 05206 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) 05207 fputc(i, f); 05208 else 05209 fprintf(f, "%d", i); 05210 } 05211 } 05212 fprintf(f, "]\n"); 05213 } 05214 } 05215 } 05216 #endif /* ONIG_DEBUG */ 05217 05218 05219 extern void 05220 onig_free_body(regex_t* reg) 05221 { 05222 if (IS_NOT_NULL(reg)) { 05223 if (IS_NOT_NULL(reg->p)) xfree(reg->p); 05224 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); 05225 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); 05226 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); 05227 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); 05228 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); 05229 05230 #ifdef USE_NAMED_GROUP 05231 onig_names_free(reg); 05232 #endif 05233 } 05234 } 05235 05236 extern void 05237 onig_free(regex_t* reg) 05238 { 05239 if (IS_NOT_NULL(reg)) { 05240 onig_free_body(reg); 05241 xfree(reg); 05242 } 05243 } 05244 05245 size_t 05246 onig_memsize(const regex_t *reg) 05247 { 05248 size_t size = sizeof(regex_t); 05249 if (IS_NOT_NULL(reg->p)) size += reg->alloc; 05250 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; 05251 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; 05252 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; 05253 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); 05254 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); 05255 05256 return size; 05257 } 05258 05259 #define REGEX_TRANSFER(to,from) do {\ 05260 (to)->state = ONIG_STATE_MODIFY;\ 05261 onig_free_body(to);\ 05262 xmemcpy(to, from, sizeof(regex_t));\ 05263 xfree(from);\ 05264 } while (0) 05265 05266 extern void 05267 onig_transfer(regex_t* to, regex_t* from) 05268 { 05269 THREAD_ATOMIC_START; 05270 REGEX_TRANSFER(to, from); 05271 THREAD_ATOMIC_END; 05272 } 05273 05274 #define REGEX_CHAIN_HEAD(reg) do {\ 05275 while (IS_NOT_NULL((reg)->chain)) {\ 05276 (reg) = (reg)->chain;\ 05277 }\ 05278 } while (0) 05279 05280 extern void 05281 onig_chain_link_add(regex_t* to, regex_t* add) 05282 { 05283 THREAD_ATOMIC_START; 05284 REGEX_CHAIN_HEAD(to); 05285 to->chain = add; 05286 THREAD_ATOMIC_END; 05287 } 05288 05289 extern void 05290 onig_chain_reduce(regex_t* reg) 05291 { 05292 regex_t *head, *prev; 05293 05294 prev = reg; 05295 head = prev->chain; 05296 if (IS_NOT_NULL(head)) { 05297 reg->state = ONIG_STATE_MODIFY; 05298 while (IS_NOT_NULL(head->chain)) { 05299 prev = head; 05300 head = head->chain; 05301 } 05302 prev->chain = (regex_t* )NULL; 05303 REGEX_TRANSFER(reg, head); 05304 } 05305 } 05306 05307 #ifdef ONIG_DEBUG 05308 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); 05309 #endif 05310 #ifdef ONIG_DEBUG_PARSE_TREE 05311 static void print_tree P_((FILE* f, Node* node)); 05312 #endif 05313 05314 extern int 05315 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 05316 OnigErrorInfo* einfo, const char *sourcefile, int sourceline) 05317 { 05318 #define COMPILE_INIT_SIZE 20 05319 05320 int r; 05321 OnigDistance init_size; 05322 Node* root; 05323 ScanEnv scan_env = {0}; 05324 #ifdef USE_SUBEXP_CALL 05325 UnsetAddrList uslist; 05326 #endif 05327 05328 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; 05329 05330 scan_env.sourcefile = sourcefile; 05331 scan_env.sourceline = sourceline; 05332 reg->state = ONIG_STATE_COMPILING; 05333 05334 #ifdef ONIG_DEBUG 05335 if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end); 05336 #endif 05337 05338 if (reg->alloc == 0) { 05339 init_size = (pattern_end - pattern) * 2; 05340 if (init_size <= 0) init_size = COMPILE_INIT_SIZE; 05341 r = BBUF_INIT(reg, init_size); 05342 if (r != 0) goto end; 05343 } 05344 else 05345 reg->used = 0; 05346 05347 reg->num_mem = 0; 05348 reg->num_repeat = 0; 05349 reg->num_null_check = 0; 05350 reg->repeat_range_alloc = 0; 05351 reg->repeat_range = (OnigRepeatRange* )NULL; 05352 #ifdef USE_COMBINATION_EXPLOSION_CHECK 05353 reg->num_comb_exp_check = 0; 05354 #endif 05355 05356 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); 05357 if (r != 0) goto err; 05358 05359 #ifdef ONIG_DEBUG_PARSE_TREE 05360 # if 0 05361 fprintf(stderr, "ORIGINAL PARSE TREE:\n"); 05362 if (!onig_is_prelude()) { 05363 print_tree(stderr, root); 05364 } 05365 # endif 05366 #endif 05367 05368 #ifdef USE_NAMED_GROUP 05369 /* mixed use named group and no-named group */ 05370 if (scan_env.num_named > 0 && 05371 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 05372 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { 05373 if (scan_env.num_named != scan_env.num_mem) 05374 r = disable_noname_group_capture(&root, reg, &scan_env); 05375 else 05376 r = numbered_ref_check(root); 05377 05378 if (r != 0) goto err; 05379 } 05380 #endif 05381 05382 #ifdef USE_SUBEXP_CALL 05383 if (scan_env.num_call > 0) { 05384 r = unset_addr_list_init(&uslist, scan_env.num_call); 05385 if (r != 0) goto err; 05386 scan_env.unset_addr_list = &uslist; 05387 r = setup_subexp_call(root, &scan_env); 05388 if (r != 0) goto err_unset; 05389 r = subexp_recursive_check_trav(root, &scan_env); 05390 if (r < 0) goto err_unset; 05391 r = subexp_inf_recursive_check_trav(root, &scan_env); 05392 if (r != 0) goto err_unset; 05393 05394 reg->num_call = scan_env.num_call; 05395 } 05396 else 05397 reg->num_call = 0; 05398 #endif 05399 05400 r = setup_tree(root, reg, 0, &scan_env); 05401 if (r != 0) goto err_unset; 05402 05403 #ifdef ONIG_DEBUG_PARSE_TREE 05404 if (!onig_is_prelude()) print_tree(stderr, root); 05405 #endif 05406 05407 reg->capture_history = scan_env.capture_history; 05408 reg->bt_mem_start = scan_env.bt_mem_start; 05409 reg->bt_mem_start |= reg->capture_history; 05410 if (IS_FIND_CONDITION(reg->options)) 05411 BIT_STATUS_ON_ALL(reg->bt_mem_end); 05412 else { 05413 reg->bt_mem_end = scan_env.bt_mem_end; 05414 reg->bt_mem_end |= reg->capture_history; 05415 } 05416 05417 #ifdef USE_COMBINATION_EXPLOSION_CHECK 05418 if (scan_env.backrefed_mem == 0 05419 #ifdef USE_SUBEXP_CALL 05420 || scan_env.num_call == 0 05421 #endif 05422 ) { 05423 setup_comb_exp_check(root, 0, &scan_env); 05424 #ifdef USE_SUBEXP_CALL 05425 if (scan_env.has_recursion != 0) { 05426 scan_env.num_comb_exp_check = 0; 05427 } 05428 else 05429 #endif 05430 if (scan_env.comb_exp_max_regnum > 0) { 05431 int i; 05432 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { 05433 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { 05434 scan_env.num_comb_exp_check = 0; 05435 break; 05436 } 05437 } 05438 } 05439 } 05440 05441 reg->num_comb_exp_check = scan_env.num_comb_exp_check; 05442 #endif 05443 05444 clear_optimize_info(reg); 05445 #ifndef ONIG_DONT_OPTIMIZE 05446 r = set_optimize_info_from_tree(root, reg, &scan_env); 05447 if (r != 0) goto err_unset; 05448 #endif 05449 05450 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { 05451 xfree(scan_env.mem_nodes_dynamic); 05452 scan_env.mem_nodes_dynamic = (Node** )NULL; 05453 } 05454 05455 r = compile_tree(root, reg); 05456 if (r == 0) { 05457 r = add_opcode(reg, OP_END); 05458 #ifdef USE_SUBEXP_CALL 05459 if (scan_env.num_call > 0) { 05460 r = unset_addr_list_fix(&uslist, reg); 05461 unset_addr_list_end(&uslist); 05462 if (r) goto err; 05463 } 05464 #endif 05465 05466 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) 05467 reg->stack_pop_level = STACK_POP_LEVEL_ALL; 05468 else { 05469 if (reg->bt_mem_start != 0) 05470 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; 05471 else 05472 reg->stack_pop_level = STACK_POP_LEVEL_FREE; 05473 } 05474 } 05475 #ifdef USE_SUBEXP_CALL 05476 else if (scan_env.num_call > 0) { 05477 unset_addr_list_end(&uslist); 05478 } 05479 #endif 05480 onig_node_free(root); 05481 05482 #ifdef ONIG_DEBUG_COMPILE 05483 #ifdef USE_NAMED_GROUP 05484 if (!onig_is_prelude()) onig_print_names(stderr, reg); 05485 #endif 05486 if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg); 05487 #endif 05488 05489 end: 05490 reg->state = ONIG_STATE_NORMAL; 05491 return r; 05492 05493 err_unset: 05494 #ifdef USE_SUBEXP_CALL 05495 if (scan_env.num_call > 0) { 05496 unset_addr_list_end(&uslist); 05497 } 05498 #endif 05499 err: 05500 if (IS_NOT_NULL(scan_env.error)) { 05501 if (IS_NOT_NULL(einfo)) { 05502 einfo->enc = scan_env.enc; 05503 einfo->par = scan_env.error; 05504 einfo->par_end = scan_env.error_end; 05505 } 05506 } 05507 05508 onig_node_free(root); 05509 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) 05510 xfree(scan_env.mem_nodes_dynamic); 05511 return r; 05512 } 05513 05514 #ifdef USE_RECOMPILE_API 05515 extern int 05516 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 05517 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, 05518 OnigErrorInfo* einfo) 05519 { 05520 int r; 05521 regex_t *new_reg; 05522 05523 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); 05524 if (r) return r; 05525 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 05526 onig_transfer(reg, new_reg); 05527 } 05528 else { 05529 onig_chain_link_add(reg, new_reg); 05530 } 05531 return 0; 05532 } 05533 #endif 05534 05535 static int onig_inited = 0; 05536 05537 extern int 05538 onig_reg_init(regex_t* reg, OnigOptionType option, 05539 OnigCaseFoldType case_fold_flag, 05540 OnigEncoding enc, const OnigSyntaxType* syntax) 05541 { 05542 if (! onig_inited) 05543 onig_init(); 05544 05545 if (IS_NULL(reg)) 05546 return ONIGERR_INVALID_ARGUMENT; 05547 05548 if (ONIGENC_IS_UNDEF(enc)) 05549 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; 05550 05551 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) 05552 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { 05553 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; 05554 } 05555 05556 (reg)->state = ONIG_STATE_MODIFY; 05557 05558 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { 05559 option |= syntax->options; 05560 option &= ~ONIG_OPTION_SINGLELINE; 05561 } 05562 else 05563 option |= syntax->options; 05564 05565 (reg)->enc = enc; 05566 (reg)->options = option; 05567 (reg)->syntax = syntax; 05568 (reg)->optimize = 0; 05569 (reg)->exact = (UChar* )NULL; 05570 (reg)->int_map = (int* )NULL; 05571 (reg)->int_map_backward = (int* )NULL; 05572 (reg)->chain = (regex_t* )NULL; 05573 05574 (reg)->p = (UChar* )NULL; 05575 (reg)->alloc = 0; 05576 (reg)->used = 0; 05577 (reg)->name_table = (void* )NULL; 05578 05579 (reg)->case_fold_flag = case_fold_flag; 05580 return 0; 05581 } 05582 05583 extern int 05584 onig_new_without_alloc(regex_t* reg, const UChar* pattern, 05585 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, 05586 OnigSyntaxType* syntax, OnigErrorInfo* einfo) 05587 { 05588 int r; 05589 05590 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 05591 if (r) return r; 05592 05593 r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0); 05594 return r; 05595 } 05596 05597 extern int 05598 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 05599 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, 05600 OnigErrorInfo* einfo) 05601 { 05602 int r; 05603 05604 *reg = (regex_t* )xmalloc(sizeof(regex_t)); 05605 if (IS_NULL(*reg)) return ONIGERR_MEMORY; 05606 05607 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 05608 if (r) goto err; 05609 05610 r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0); 05611 if (r) { 05612 err: 05613 onig_free(*reg); 05614 *reg = NULL; 05615 } 05616 return r; 05617 } 05618 05619 05620 extern int 05621 onig_init(void) 05622 { 05623 if (onig_inited != 0) 05624 return 0; 05625 05626 THREAD_SYSTEM_INIT; 05627 THREAD_ATOMIC_START; 05628 05629 onig_inited = 1; 05630 05631 onigenc_init(); 05632 /* onigenc_set_default_caseconv_table((UChar* )0); */ 05633 05634 #ifdef ONIG_DEBUG_STATISTICS 05635 onig_statistics_init(); 05636 #endif 05637 05638 THREAD_ATOMIC_END; 05639 return 0; 05640 } 05641 05642 05643 extern int 05644 onig_end(void) 05645 { 05646 THREAD_ATOMIC_START; 05647 05648 #ifdef ONIG_DEBUG_STATISTICS 05649 if (!onig_is_prelude()) onig_print_statistics(stderr); 05650 #endif 05651 05652 #ifdef USE_SHARED_CCLASS_TABLE 05653 onig_free_shared_cclass_table(); 05654 #endif 05655 05656 #ifdef USE_PARSE_TREE_NODE_RECYCLE 05657 onig_free_node_list(); 05658 #endif 05659 05660 onig_inited = 0; 05661 05662 THREAD_ATOMIC_END; 05663 THREAD_SYSTEM_END; 05664 return 0; 05665 } 05666 05667 extern int 05668 onig_is_in_code_range(const UChar* p, OnigCodePoint code) 05669 { 05670 OnigCodePoint n, *data; 05671 OnigCodePoint low, high, x; 05672 05673 GET_CODE_POINT(n, p); 05674 data = (OnigCodePoint* )p; 05675 data++; 05676 05677 for (low = 0, high = n; low < high; ) { 05678 x = (low + high) >> 1; 05679 if (code > data[x * 2 + 1]) 05680 low = x + 1; 05681 else 05682 high = x; 05683 } 05684 05685 return ((low < n && code >= data[low * 2]) ? 1 : 0); 05686 } 05687 05688 extern int 05689 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) 05690 { 05691 int found; 05692 05693 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { 05694 if (IS_NULL(cc->mbuf)) { 05695 found = 0; 05696 } 05697 else { 05698 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); 05699 } 05700 } 05701 else { 05702 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); 05703 } 05704 05705 if (IS_NCCLASS_NOT(cc)) 05706 return !found; 05707 else 05708 return found; 05709 } 05710 05711 extern int 05712 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) 05713 { 05714 int len; 05715 05716 if (ONIGENC_MBC_MINLEN(enc) > 1) { 05717 len = 2; 05718 } 05719 else { 05720 len = ONIGENC_CODE_TO_MBCLEN(enc, code); 05721 } 05722 return onig_is_code_in_cc_len(len, code, cc); 05723 } 05724 05725 05726 #ifdef ONIG_DEBUG 05727 05728 /* arguments type */ 05729 #define ARG_SPECIAL -1 05730 #define ARG_NON 0 05731 #define ARG_RELADDR 1 05732 #define ARG_ABSADDR 2 05733 #define ARG_LENGTH 3 05734 #define ARG_MEMNUM 4 05735 #define ARG_OPTION 5 05736 #define ARG_STATE_CHECK 6 05737 05738 OnigOpInfoType OnigOpInfo[] = { 05739 { OP_FINISH, "finish", ARG_NON }, 05740 { OP_END, "end", ARG_NON }, 05741 { OP_EXACT1, "exact1", ARG_SPECIAL }, 05742 { OP_EXACT2, "exact2", ARG_SPECIAL }, 05743 { OP_EXACT3, "exact3", ARG_SPECIAL }, 05744 { OP_EXACT4, "exact4", ARG_SPECIAL }, 05745 { OP_EXACT5, "exact5", ARG_SPECIAL }, 05746 { OP_EXACTN, "exactn", ARG_SPECIAL }, 05747 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, 05748 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, 05749 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, 05750 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, 05751 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, 05752 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, 05753 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, 05754 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, 05755 { OP_CCLASS, "cclass", ARG_SPECIAL }, 05756 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, 05757 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, 05758 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, 05759 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, 05760 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, 05761 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, 05762 { OP_ANYCHAR, "anychar", ARG_NON }, 05763 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, 05764 { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, 05765 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, 05766 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, 05767 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, 05768 { OP_WORD, "word", ARG_NON }, 05769 { OP_NOT_WORD, "not-word", ARG_NON }, 05770 { OP_WORD_BOUND, "word-bound", ARG_NON }, 05771 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, 05772 { OP_WORD_BEGIN, "word-begin", ARG_NON }, 05773 { OP_WORD_END, "word-end", ARG_NON }, 05774 { OP_BEGIN_BUF, "begin-buf", ARG_NON }, 05775 { OP_END_BUF, "end-buf", ARG_NON }, 05776 { OP_BEGIN_LINE, "begin-line", ARG_NON }, 05777 { OP_END_LINE, "end-line", ARG_NON }, 05778 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, 05779 { OP_BEGIN_POSITION, "begin-position", ARG_NON }, 05780 { OP_BACKREF1, "backref1", ARG_NON }, 05781 { OP_BACKREF2, "backref2", ARG_NON }, 05782 { OP_BACKREFN, "backrefn", ARG_MEMNUM }, 05783 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, 05784 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, 05785 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, 05786 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, 05787 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, 05788 { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, 05789 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, 05790 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, 05791 { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, 05792 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, 05793 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, 05794 { OP_SET_OPTION, "set-option", ARG_OPTION }, 05795 { OP_FAIL, "fail", ARG_NON }, 05796 { OP_JUMP, "jump", ARG_RELADDR }, 05797 { OP_PUSH, "push", ARG_RELADDR }, 05798 { OP_POP, "pop", ARG_NON }, 05799 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, 05800 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, 05801 { OP_REPEAT, "repeat", ARG_SPECIAL }, 05802 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, 05803 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, 05804 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, 05805 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, 05806 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, 05807 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, 05808 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, 05809 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, 05810 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, 05811 { OP_PUSH_POS, "push-pos", ARG_NON }, 05812 { OP_POP_POS, "pop-pos", ARG_NON }, 05813 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, 05814 { OP_FAIL_POS, "fail-pos", ARG_NON }, 05815 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, 05816 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, 05817 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, 05818 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, 05819 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, 05820 { OP_CALL, "call", ARG_ABSADDR }, 05821 { OP_RETURN, "return", ARG_NON }, 05822 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, 05823 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, 05824 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, 05825 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, 05826 { OP_STATE_CHECK_ANYCHAR_ML_STAR, 05827 "state-check-anychar-ml*", ARG_STATE_CHECK }, 05828 { -1, "", ARG_NON } 05829 }; 05830 05831 static const char* 05832 op2name(int opcode) 05833 { 05834 int i; 05835 05836 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 05837 if (opcode == OnigOpInfo[i].opcode) 05838 return OnigOpInfo[i].name; 05839 } 05840 return ""; 05841 } 05842 05843 static int 05844 op2arg_type(int opcode) 05845 { 05846 int i; 05847 05848 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 05849 if (opcode == OnigOpInfo[i].opcode) 05850 return OnigOpInfo[i].arg_type; 05851 } 05852 return ARG_SPECIAL; 05853 } 05854 05855 static void 05856 Indent(FILE* f, int indent) 05857 { 05858 int i; 05859 for (i = 0; i < indent; i++) putc(' ', f); 05860 } 05861 05862 static void 05863 p_string(FILE* f, int len, UChar* s) 05864 { 05865 fputs(":", f); 05866 while (len-- > 0) { fputc(*s++, f); } 05867 } 05868 05869 static void 05870 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) 05871 { 05872 int x = len * mb_len; 05873 05874 fprintf(f, ":%d:", len); 05875 while (x-- > 0) { fputc(*s++, f); } 05876 } 05877 05878 extern void 05879 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, 05880 OnigEncoding enc) 05881 { 05882 int i, n, arg_type; 05883 RelAddrType addr; 05884 LengthType len; 05885 MemNumType mem; 05886 StateCheckNumType scn; 05887 OnigCodePoint code; 05888 UChar *q; 05889 05890 fprintf(f, "[%s", op2name(*bp)); 05891 arg_type = op2arg_type(*bp); 05892 if (arg_type != ARG_SPECIAL) { 05893 bp++; 05894 switch (arg_type) { 05895 case ARG_NON: 05896 break; 05897 case ARG_RELADDR: 05898 GET_RELADDR_INC(addr, bp); 05899 fprintf(f, ":(%d)", addr); 05900 break; 05901 case ARG_ABSADDR: 05902 GET_ABSADDR_INC(addr, bp); 05903 fprintf(f, ":(%d)", addr); 05904 break; 05905 case ARG_LENGTH: 05906 GET_LENGTH_INC(len, bp); 05907 fprintf(f, ":%d", len); 05908 break; 05909 case ARG_MEMNUM: 05910 mem = *((MemNumType* )bp); 05911 bp += SIZE_MEMNUM; 05912 fprintf(f, ":%d", mem); 05913 break; 05914 case ARG_OPTION: 05915 { 05916 OnigOptionType option = *((OnigOptionType* )bp); 05917 bp += SIZE_OPTION; 05918 fprintf(f, ":%d", option); 05919 } 05920 break; 05921 05922 case ARG_STATE_CHECK: 05923 scn = *((StateCheckNumType* )bp); 05924 bp += SIZE_STATE_CHECK_NUM; 05925 fprintf(f, ":%d", scn); 05926 break; 05927 } 05928 } 05929 else { 05930 switch (*bp++) { 05931 case OP_EXACT1: 05932 case OP_ANYCHAR_STAR_PEEK_NEXT: 05933 case OP_ANYCHAR_ML_STAR_PEEK_NEXT: 05934 p_string(f, 1, bp++); break; 05935 case OP_EXACT2: 05936 p_string(f, 2, bp); bp += 2; break; 05937 case OP_EXACT3: 05938 p_string(f, 3, bp); bp += 3; break; 05939 case OP_EXACT4: 05940 p_string(f, 4, bp); bp += 4; break; 05941 case OP_EXACT5: 05942 p_string(f, 5, bp); bp += 5; break; 05943 case OP_EXACTN: 05944 GET_LENGTH_INC(len, bp); 05945 p_len_string(f, len, 1, bp); 05946 bp += len; 05947 break; 05948 05949 case OP_EXACTMB2N1: 05950 p_string(f, 2, bp); bp += 2; break; 05951 case OP_EXACTMB2N2: 05952 p_string(f, 4, bp); bp += 4; break; 05953 case OP_EXACTMB2N3: 05954 p_string(f, 6, bp); bp += 6; break; 05955 case OP_EXACTMB2N: 05956 GET_LENGTH_INC(len, bp); 05957 p_len_string(f, len, 2, bp); 05958 bp += len * 2; 05959 break; 05960 case OP_EXACTMB3N: 05961 GET_LENGTH_INC(len, bp); 05962 p_len_string(f, len, 3, bp); 05963 bp += len * 3; 05964 break; 05965 case OP_EXACTMBN: 05966 { 05967 int mb_len; 05968 05969 GET_LENGTH_INC(mb_len, bp); 05970 GET_LENGTH_INC(len, bp); 05971 fprintf(f, ":%d:%d:", mb_len, len); 05972 n = len * mb_len; 05973 while (n-- > 0) { fputc(*bp++, f); } 05974 } 05975 break; 05976 05977 case OP_EXACT1_IC: 05978 len = enclen(enc, bp, bpend); 05979 p_string(f, len, bp); 05980 bp += len; 05981 break; 05982 case OP_EXACTN_IC: 05983 GET_LENGTH_INC(len, bp); 05984 p_len_string(f, len, 1, bp); 05985 bp += len; 05986 break; 05987 05988 case OP_CCLASS: 05989 n = bitset_on_num((BitSetRef )bp); 05990 bp += SIZE_BITSET; 05991 fprintf(f, ":%d", n); 05992 break; 05993 05994 case OP_CCLASS_NOT: 05995 n = bitset_on_num((BitSetRef )bp); 05996 bp += SIZE_BITSET; 05997 fprintf(f, ":%d", n); 05998 break; 05999 06000 case OP_CCLASS_MB: 06001 case OP_CCLASS_MB_NOT: 06002 GET_LENGTH_INC(len, bp); 06003 q = bp; 06004 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 06005 ALIGNMENT_RIGHT(q); 06006 #endif 06007 GET_CODE_POINT(code, q); 06008 bp += len; 06009 fprintf(f, ":%d:%d", (int )code, len); 06010 break; 06011 06012 case OP_CCLASS_MIX: 06013 case OP_CCLASS_MIX_NOT: 06014 n = bitset_on_num((BitSetRef )bp); 06015 bp += SIZE_BITSET; 06016 GET_LENGTH_INC(len, bp); 06017 q = bp; 06018 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 06019 ALIGNMENT_RIGHT(q); 06020 #endif 06021 GET_CODE_POINT(code, q); 06022 bp += len; 06023 fprintf(f, ":%d:%d:%d", n, (int )code, len); 06024 break; 06025 06026 case OP_CCLASS_NODE: 06027 { 06028 CClassNode *cc; 06029 06030 GET_POINTER_INC(cc, bp); 06031 n = bitset_on_num(cc->bs); 06032 fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n); 06033 } 06034 break; 06035 06036 case OP_BACKREFN_IC: 06037 mem = *((MemNumType* )bp); 06038 bp += SIZE_MEMNUM; 06039 fprintf(f, ":%d", mem); 06040 break; 06041 06042 case OP_BACKREF_MULTI_IC: 06043 case OP_BACKREF_MULTI: 06044 fputs(" ", f); 06045 GET_LENGTH_INC(len, bp); 06046 for (i = 0; i < len; i++) { 06047 GET_MEMNUM_INC(mem, bp); 06048 if (i > 0) fputs(", ", f); 06049 fprintf(f, "%d", mem); 06050 } 06051 break; 06052 06053 case OP_BACKREF_WITH_LEVEL: 06054 { 06055 OnigOptionType option; 06056 LengthType level; 06057 06058 GET_OPTION_INC(option, bp); 06059 fprintf(f, ":%d", option); 06060 GET_LENGTH_INC(level, bp); 06061 fprintf(f, ":%d", level); 06062 06063 fputs(" ", f); 06064 GET_LENGTH_INC(len, bp); 06065 for (i = 0; i < len; i++) { 06066 GET_MEMNUM_INC(mem, bp); 06067 if (i > 0) fputs(", ", f); 06068 fprintf(f, "%d", mem); 06069 } 06070 } 06071 break; 06072 06073 case OP_REPEAT: 06074 case OP_REPEAT_NG: 06075 { 06076 mem = *((MemNumType* )bp); 06077 bp += SIZE_MEMNUM; 06078 addr = *((RelAddrType* )bp); 06079 bp += SIZE_RELADDR; 06080 fprintf(f, ":%d:%d", mem, addr); 06081 } 06082 break; 06083 06084 case OP_PUSH_OR_JUMP_EXACT1: 06085 case OP_PUSH_IF_PEEK_NEXT: 06086 addr = *((RelAddrType* )bp); 06087 bp += SIZE_RELADDR; 06088 fprintf(f, ":(%d)", addr); 06089 p_string(f, 1, bp); 06090 bp += 1; 06091 break; 06092 06093 case OP_LOOK_BEHIND: 06094 GET_LENGTH_INC(len, bp); 06095 fprintf(f, ":%d", len); 06096 break; 06097 06098 case OP_PUSH_LOOK_BEHIND_NOT: 06099 GET_RELADDR_INC(addr, bp); 06100 GET_LENGTH_INC(len, bp); 06101 fprintf(f, ":%d:(%d)", len, addr); 06102 break; 06103 06104 case OP_STATE_CHECK_PUSH: 06105 case OP_STATE_CHECK_PUSH_OR_JUMP: 06106 scn = *((StateCheckNumType* )bp); 06107 bp += SIZE_STATE_CHECK_NUM; 06108 addr = *((RelAddrType* )bp); 06109 bp += SIZE_RELADDR; 06110 fprintf(f, ":%d:(%d)", scn, addr); 06111 break; 06112 06113 default: 06114 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", 06115 *--bp); 06116 } 06117 } 06118 fputs("]", f); 06119 if (nextp) *nextp = bp; 06120 } 06121 06122 static void 06123 print_compiled_byte_code_list(FILE* f, regex_t* reg) 06124 { 06125 int ncode; 06126 UChar* bp = reg->p; 06127 UChar* end = reg->p + reg->used; 06128 06129 fprintf(f, "code length: %d\n", reg->used); 06130 06131 ncode = -1; 06132 while (bp < end) { 06133 ncode++; 06134 if (bp > reg->p) { 06135 if (ncode % 5 == 0) 06136 fprintf(f, "\n%ld:", bp-reg->p); 06137 else 06138 fprintf(f, " %ld:", bp-reg->p); 06139 } 06140 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc); 06141 } 06142 06143 fprintf(f, "\n"); 06144 } 06145 06146 static void 06147 print_indent_tree(FILE* f, Node* node, int indent) 06148 { 06149 int i, type, container_p = 0; 06150 int add = 3; 06151 UChar* p; 06152 06153 Indent(f, indent); 06154 if (IS_NULL(node)) { 06155 fprintf(f, "ERROR: null node!!!\n"); 06156 exit (0); 06157 } 06158 06159 type = NTYPE(node); 06160 switch (type) { 06161 case NT_LIST: 06162 case NT_ALT: 06163 if (NTYPE(node) == NT_LIST) 06164 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node); 06165 else 06166 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node); 06167 06168 print_indent_tree(f, NCAR(node), indent + add); 06169 while (IS_NOT_NULL(node = NCDR(node))) { 06170 if (NTYPE(node) != type) { 06171 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); 06172 exit(0); 06173 } 06174 print_indent_tree(f, NCAR(node), indent + add); 06175 } 06176 break; 06177 06178 case NT_STR: 06179 fprintf(f, "<string%s:%"PRIxPTR">", 06180 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node); 06181 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { 06182 if (*p >= 0x20 && *p < 0x7f) 06183 fputc(*p, f); 06184 else { 06185 fprintf(f, " 0x%02x", *p); 06186 } 06187 } 06188 break; 06189 06190 case NT_CCLASS: 06191 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node); 06192 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); 06193 if (NCCLASS(node)->mbuf) { 06194 BBuf* bbuf = NCCLASS(node)->mbuf; 06195 for (i = 0; i < (int)bbuf->used; i++) { 06196 if (i > 0) fprintf(f, ","); 06197 fprintf(f, "%0x", bbuf->p[i]); 06198 } 06199 } 06200 break; 06201 06202 case NT_CTYPE: 06203 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node); 06204 switch (NCTYPE(node)->ctype) { 06205 case ONIGENC_CTYPE_WORD: 06206 if (NCTYPE(node)->not != 0) 06207 fputs("not word", f); 06208 else 06209 fputs("word", f); 06210 break; 06211 06212 default: 06213 fprintf(f, "ERROR: undefined ctype.\n"); 06214 exit(0); 06215 } 06216 break; 06217 06218 case NT_CANY: 06219 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node); 06220 break; 06221 06222 case NT_ANCHOR: 06223 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node); 06224 switch (NANCHOR(node)->type) { 06225 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; 06226 case ANCHOR_END_BUF: fputs("end buf", f); break; 06227 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; 06228 case ANCHOR_END_LINE: fputs("end line", f); break; 06229 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; 06230 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; 06231 06232 case ANCHOR_WORD_BOUND: fputs("word bound", f); break; 06233 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; 06234 #ifdef USE_WORD_BEGIN_END 06235 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; 06236 case ANCHOR_WORD_END: fputs("word end", f); break; 06237 #endif 06238 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break; 06239 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break; 06240 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break; 06241 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break; 06242 06243 default: 06244 fprintf(f, "ERROR: undefined anchor type.\n"); 06245 break; 06246 } 06247 break; 06248 06249 case NT_BREF: 06250 { 06251 int* p; 06252 BRefNode* br = NBREF(node); 06253 p = BACKREFS_P(br); 06254 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node); 06255 for (i = 0; i < br->back_num; i++) { 06256 if (i > 0) fputs(", ", f); 06257 fprintf(f, "%d", p[i]); 06258 } 06259 } 06260 break; 06261 06262 #ifdef USE_SUBEXP_CALL 06263 case NT_CALL: 06264 { 06265 CallNode* cn = NCALL(node); 06266 fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node); 06267 p_string(f, cn->name_end - cn->name, cn->name); 06268 } 06269 break; 06270 #endif 06271 06272 case NT_QTFR: 06273 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node, 06274 NQTFR(node)->lower, NQTFR(node)->upper, 06275 (NQTFR(node)->greedy ? "" : "?")); 06276 print_indent_tree(f, NQTFR(node)->target, indent + add); 06277 break; 06278 06279 case NT_ENCLOSE: 06280 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node); 06281 switch (NENCLOSE(node)->type) { 06282 case ENCLOSE_OPTION: 06283 fprintf(f, "option:%d\n", NENCLOSE(node)->option); 06284 print_indent_tree(f, NENCLOSE(node)->target, indent + add); 06285 break; 06286 case ENCLOSE_MEMORY: 06287 fprintf(f, "memory:%d", NENCLOSE(node)->regnum); 06288 break; 06289 case ENCLOSE_STOP_BACKTRACK: 06290 fprintf(f, "stop-bt"); 06291 break; 06292 06293 default: 06294 break; 06295 } 06296 fprintf(f, "\n"); 06297 print_indent_tree(f, NENCLOSE(node)->target, indent + add); 06298 break; 06299 06300 default: 06301 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); 06302 break; 06303 } 06304 06305 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && 06306 type != NT_ENCLOSE) 06307 fprintf(f, "\n"); 06308 06309 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add); 06310 06311 fflush(f); 06312 } 06313 #endif /* ONIG_DEBUG */ 06314 06315 #ifdef ONIG_DEBUG_PARSE_TREE 06316 static void 06317 print_tree(FILE* f, Node* node) 06318 { 06319 print_indent_tree(f, node, 0); 06320 } 06321 #endif 06322