Ruby 1.9.3p327(2012-11-10revision37606)
regcomp.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regcomp.c -  Oniguruma (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include "regparse.h"
00031 
00032 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00033 
00034 extern OnigCaseFoldType
00035 onig_get_default_case_fold_flag(void)
00036 {
00037   return OnigDefaultCaseFoldFlag;
00038 }
00039 
00040 extern int
00041 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00042 {
00043   OnigDefaultCaseFoldFlag = case_fold_flag;
00044   return 0;
00045 }
00046 
00047 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00048 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00049 #endif
00050 
00051 static UChar*
00052 str_dup(UChar* s, UChar* end)
00053 {
00054   ptrdiff_t len = end - s;
00055 
00056   if (len > 0) {
00057     UChar* r = (UChar* )xmalloc(len + 1);
00058     CHECK_NULL_RETURN(r);
00059     xmemcpy(r, s, len);
00060     r[len] = (UChar )0;
00061     return r;
00062   }
00063   else return NULL;
00064 }
00065 
00066 static void
00067 swap_node(Node* a, Node* b)
00068 {
00069   Node c;
00070   c = *a; *a = *b; *b = c;
00071 
00072   if (NTYPE(a) == NT_STR) {
00073     StrNode* sn = NSTR(a);
00074     if (sn->capa == 0) {
00075       size_t len = sn->end - sn->s;
00076       sn->s   = sn->buf;
00077       sn->end = sn->s + len;
00078     }
00079   }
00080 
00081   if (NTYPE(b) == NT_STR) {
00082     StrNode* sn = NSTR(b);
00083     if (sn->capa == 0) {
00084       size_t len = sn->end - sn->s;
00085       sn->s   = sn->buf;
00086       sn->end = sn->s + len;
00087     }
00088   }
00089 }
00090 
00091 static OnigDistance
00092 distance_add(OnigDistance d1, OnigDistance d2)
00093 {
00094   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00095     return ONIG_INFINITE_DISTANCE;
00096   else {
00097     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00098     else return ONIG_INFINITE_DISTANCE;
00099   }
00100 }
00101 
00102 static OnigDistance
00103 distance_multiply(OnigDistance d, int m)
00104 {
00105   if (m == 0) return 0;
00106 
00107   if (d < ONIG_INFINITE_DISTANCE / m)
00108     return d * m;
00109   else
00110     return ONIG_INFINITE_DISTANCE;
00111 }
00112 
00113 static int
00114 bitset_is_empty(BitSetRef bs)
00115 {
00116   int i;
00117   for (i = 0; i < (int )BITSET_SIZE; i++) {
00118     if (bs[i] != 0) return 0;
00119   }
00120   return 1;
00121 }
00122 
00123 #ifdef ONIG_DEBUG
00124 static int
00125 onig_is_prelude(void)
00126 {
00127     return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
00128 }
00129 
00130 static int
00131 bitset_on_num(BitSetRef bs)
00132 {
00133   int i, n;
00134 
00135   n = 0;
00136   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00137     if (BITSET_AT(bs, i)) n++;
00138   }
00139   return n;
00140 }
00141 #endif
00142 
00143 extern int
00144 onig_bbuf_init(BBuf* buf, OnigDistance size)
00145 {
00146   if (size <= 0) {
00147     size   = 0;
00148     buf->p = NULL;
00149   }
00150   else {
00151     buf->p = (UChar* )xmalloc(size);
00152     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00153   }
00154 
00155   buf->alloc = (unsigned int)size;
00156   buf->used  = 0;
00157   return 0;
00158 }
00159 
00160 
00161 #ifdef USE_SUBEXP_CALL
00162 
00163 static int
00164 unset_addr_list_init(UnsetAddrList* uslist, int size)
00165 {
00166   UnsetAddr* p;
00167 
00168   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00169   CHECK_NULL_RETURN_MEMERR(p);
00170   uslist->num   = 0;
00171   uslist->alloc = size;
00172   uslist->us    = p;
00173   return 0;
00174 }
00175 
00176 static void
00177 unset_addr_list_end(UnsetAddrList* uslist)
00178 {
00179   if (IS_NOT_NULL(uslist->us))
00180     xfree(uslist->us);
00181 }
00182 
00183 static int
00184 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00185 {
00186   UnsetAddr* p;
00187   int size;
00188 
00189   if (uslist->num >= uslist->alloc) {
00190     size = uslist->alloc * 2;
00191     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00192     CHECK_NULL_RETURN_MEMERR(p);
00193     uslist->alloc = size;
00194     uslist->us    = p;
00195   }
00196 
00197   uslist->us[uslist->num].offset = offset;
00198   uslist->us[uslist->num].target = node;
00199   uslist->num++;
00200   return 0;
00201 }
00202 #endif /* USE_SUBEXP_CALL */
00203 
00204 
00205 static int
00206 add_opcode(regex_t* reg, int opcode)
00207 {
00208   BBUF_ADD1(reg, opcode);
00209   return 0;
00210 }
00211 
00212 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00213 static int
00214 add_state_check_num(regex_t* reg, int num)
00215 {
00216   StateCheckNumType n = (StateCheckNumType )num;
00217 
00218   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00219   return 0;
00220 }
00221 #endif
00222 
00223 static int
00224 add_rel_addr(regex_t* reg, int addr)
00225 {
00226   RelAddrType ra = (RelAddrType )addr;
00227 
00228   BBUF_ADD(reg, &ra, SIZE_RELADDR);
00229   return 0;
00230 }
00231 
00232 static int
00233 add_abs_addr(regex_t* reg, int addr)
00234 {
00235   AbsAddrType ra = (AbsAddrType )addr;
00236 
00237   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00238   return 0;
00239 }
00240 
00241 static int
00242 add_length(regex_t* reg, OnigDistance len)
00243 {
00244   LengthType l = (LengthType )len;
00245 
00246   BBUF_ADD(reg, &l, SIZE_LENGTH);
00247   return 0;
00248 }
00249 
00250 static int
00251 add_mem_num(regex_t* reg, int num)
00252 {
00253   MemNumType n = (MemNumType )num;
00254 
00255   BBUF_ADD(reg, &n, SIZE_MEMNUM);
00256   return 0;
00257 }
00258 
00259 static int
00260 add_pointer(regex_t* reg, void* addr)
00261 {
00262   PointerType ptr = (PointerType )addr;
00263 
00264   BBUF_ADD(reg, &ptr, SIZE_POINTER);
00265   return 0;
00266 }
00267 
00268 static int
00269 add_option(regex_t* reg, OnigOptionType option)
00270 {
00271   BBUF_ADD(reg, &option, SIZE_OPTION);
00272   return 0;
00273 }
00274 
00275 static int
00276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00277 {
00278   int r;
00279 
00280   r = add_opcode(reg, opcode);
00281   if (r) return r;
00282   r = add_rel_addr(reg, addr);
00283   return r;
00284 }
00285 
00286 static int
00287 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
00288 {
00289   BBUF_ADD(reg, bytes, len);
00290   return 0;
00291 }
00292 
00293 static int
00294 add_bitset(regex_t* reg, BitSetRef bs)
00295 {
00296   BBUF_ADD(reg, bs, SIZE_BITSET);
00297   return 0;
00298 }
00299 
00300 static int
00301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00302 {
00303   int r;
00304 
00305   r = add_opcode(reg, opcode);
00306   if (r) return r;
00307   r = add_option(reg, option);
00308   return r;
00309 }
00310 
00311 static int compile_length_tree(Node* node, regex_t* reg);
00312 static int compile_tree(Node* node, regex_t* reg);
00313 
00314 
00315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00316    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
00317     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
00318 
00319 static int
00320 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
00321 {
00322   int op;
00323 
00324   if (ignore_case) {
00325     switch (str_len) {
00326     case 1:  op = OP_EXACT1_IC; break;
00327     default: op = OP_EXACTN_IC; break;
00328     }
00329   }
00330   else {
00331     switch (mb_len) {
00332     case 1:
00333       switch (str_len) {
00334       case 1:  op = OP_EXACT1; break;
00335       case 2:  op = OP_EXACT2; break;
00336       case 3:  op = OP_EXACT3; break;
00337       case 4:  op = OP_EXACT4; break;
00338       case 5:  op = OP_EXACT5; break;
00339       default: op = OP_EXACTN; break;
00340       }
00341       break;
00342 
00343     case 2:
00344       switch (str_len) {
00345       case 1:  op = OP_EXACTMB2N1; break;
00346       case 2:  op = OP_EXACTMB2N2; break;
00347       case 3:  op = OP_EXACTMB2N3; break;
00348       default: op = OP_EXACTMB2N;  break;
00349       }
00350       break;
00351 
00352     case 3:
00353       op = OP_EXACTMB3N;
00354       break;
00355 
00356     default:
00357       op = OP_EXACTMBN;
00358       break;
00359     }
00360   }
00361   return op;
00362 }
00363 
00364 static int
00365 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00366 {
00367   int r;
00368   int saved_num_null_check = reg->num_null_check;
00369 
00370   if (empty_info != 0) {
00371     r = add_opcode(reg, OP_NULL_CHECK_START);
00372     if (r) return r;
00373     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
00374     if (r) return r;
00375     reg->num_null_check++;
00376   }
00377 
00378   r = compile_tree(node, reg);
00379   if (r) return r;
00380 
00381   if (empty_info != 0) {
00382     if (empty_info == NQ_TARGET_IS_EMPTY)
00383       r = add_opcode(reg, OP_NULL_CHECK_END);
00384     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00385       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00386     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00387       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00388 
00389     if (r) return r;
00390     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
00391   }
00392   return r;
00393 }
00394 
00395 #ifdef USE_SUBEXP_CALL
00396 static int
00397 compile_call(CallNode* node, regex_t* reg)
00398 {
00399   int r;
00400 
00401   r = add_opcode(reg, OP_CALL);
00402   if (r) return r;
00403   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00404                           node->target);
00405   if (r) return r;
00406   r = add_abs_addr(reg, 0 /*dummy addr.*/);
00407   return r;
00408 }
00409 #endif
00410 
00411 static int
00412 compile_tree_n_times(Node* node, int n, regex_t* reg)
00413 {
00414   int i, r;
00415 
00416   for (i = 0; i < n; i++) {
00417     r = compile_tree(node, reg);
00418     if (r) return r;
00419   }
00420   return 0;
00421 }
00422 
00423 static int
00424 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00425                           regex_t* reg ARG_UNUSED, int ignore_case)
00426 {
00427   int len;
00428   int op = select_str_opcode(mb_len, str_len, ignore_case);
00429 
00430   len = SIZE_OPCODE;
00431 
00432   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
00433   if (IS_NEED_STR_LEN_OP_EXACT(op))
00434     len += SIZE_LENGTH;
00435 
00436   len += mb_len * str_len;
00437   return len;
00438 }
00439 
00440 static int
00441 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
00442                    regex_t* reg, int ignore_case)
00443 {
00444   int op = select_str_opcode(mb_len, str_len, ignore_case);
00445   add_opcode(reg, op);
00446 
00447   if (op == OP_EXACTMBN)
00448     add_length(reg, mb_len);
00449 
00450   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00451     if (op == OP_EXACTN_IC)
00452       add_length(reg, mb_len * str_len);
00453     else
00454       add_length(reg, str_len);
00455   }
00456 
00457   add_bytes(reg, s, mb_len * str_len);
00458   return 0;
00459 }
00460 
00461 
00462 static int
00463 compile_length_string_node(Node* node, regex_t* reg)
00464 {
00465   int rlen, r, len, prev_len, slen, ambig;
00466   OnigEncoding enc = reg->enc;
00467   UChar *p, *prev;
00468   StrNode* sn;
00469 
00470   sn = NSTR(node);
00471   if (sn->end <= sn->s)
00472     return 0;
00473 
00474   ambig = NSTRING_IS_AMBIG(node);
00475 
00476   p = prev = sn->s;
00477   prev_len = enclen(enc, p, sn->end);
00478   p += prev_len;
00479   slen = 1;
00480   rlen = 0;
00481 
00482   for (; p < sn->end; ) {
00483     len = enclen(enc, p, sn->end);
00484     if (len == prev_len) {
00485       slen++;
00486     }
00487     else {
00488       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00489       rlen += r;
00490       prev = p;
00491       slen = 1;
00492       prev_len = len;
00493     }
00494     p += len;
00495   }
00496   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00497   rlen += r;
00498   return rlen;
00499 }
00500 
00501 static int
00502 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00503 {
00504   if (sn->end <= sn->s)
00505     return 0;
00506 
00507   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00508 }
00509 
00510 static int
00511 compile_string_node(Node* node, regex_t* reg)
00512 {
00513   int r, len, prev_len, slen, ambig;
00514   OnigEncoding enc = reg->enc;
00515   UChar *p, *prev, *end;
00516   StrNode* sn;
00517 
00518   sn = NSTR(node);
00519   if (sn->end <= sn->s)
00520     return 0;
00521 
00522   end = sn->end;
00523   ambig = NSTRING_IS_AMBIG(node);
00524 
00525   p = prev = sn->s;
00526   prev_len = enclen(enc, p, end);
00527   p += prev_len;
00528   slen = 1;
00529 
00530   for (; p < end; ) {
00531     len = enclen(enc, p, end);
00532     if (len == prev_len) {
00533       slen++;
00534     }
00535     else {
00536       r = add_compile_string(prev, prev_len, slen, reg, ambig);
00537       if (r) return r;
00538 
00539       prev  = p;
00540       slen  = 1;
00541       prev_len = len;
00542     }
00543 
00544     p += len;
00545   }
00546   return add_compile_string(prev, prev_len, slen, reg, ambig);
00547 }
00548 
00549 static int
00550 compile_string_raw_node(StrNode* sn, regex_t* reg)
00551 {
00552   if (sn->end <= sn->s)
00553     return 0;
00554 
00555   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00556 }
00557 
00558 static int
00559 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00560 {
00561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00562   add_length(reg, mbuf->used);
00563   return add_bytes(reg, mbuf->p, mbuf->used);
00564 #else
00565   int r, pad_size;
00566   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00567 
00568   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00569   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00570   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00571 
00572   r = add_bytes(reg, mbuf->p, mbuf->used);
00573 
00574   /* padding for return value from compile_length_cclass_node() to be fix. */
00575   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00576   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00577   return r;
00578 #endif
00579 }
00580 
00581 static int
00582 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00583 {
00584   int len;
00585 
00586   if (IS_NCCLASS_SHARE(cc)) {
00587     len = SIZE_OPCODE + SIZE_POINTER;
00588     return len;
00589   }
00590 
00591   if (IS_NULL(cc->mbuf)) {
00592     len = SIZE_OPCODE + SIZE_BITSET;
00593   }
00594   else {
00595     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00596       len = SIZE_OPCODE;
00597     }
00598     else {
00599       len = SIZE_OPCODE + SIZE_BITSET;
00600     }
00601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00602     len += SIZE_LENGTH + cc->mbuf->used;
00603 #else
00604     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00605 #endif
00606   }
00607 
00608   return len;
00609 }
00610 
00611 static int
00612 compile_cclass_node(CClassNode* cc, regex_t* reg)
00613 {
00614   int r;
00615 
00616   if (IS_NCCLASS_SHARE(cc)) {
00617     add_opcode(reg, OP_CCLASS_NODE);
00618     r = add_pointer(reg, cc);
00619     return r;
00620   }
00621 
00622   if (IS_NULL(cc->mbuf)) {
00623     if (IS_NCCLASS_NOT(cc))
00624       add_opcode(reg, OP_CCLASS_NOT);
00625     else
00626       add_opcode(reg, OP_CCLASS);
00627 
00628     r = add_bitset(reg, cc->bs);
00629   }
00630   else {
00631     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00632       if (IS_NCCLASS_NOT(cc))
00633         add_opcode(reg, OP_CCLASS_MB_NOT);
00634       else
00635         add_opcode(reg, OP_CCLASS_MB);
00636 
00637       r = add_multi_byte_cclass(cc->mbuf, reg);
00638     }
00639     else {
00640       if (IS_NCCLASS_NOT(cc))
00641         add_opcode(reg, OP_CCLASS_MIX_NOT);
00642       else
00643         add_opcode(reg, OP_CCLASS_MIX);
00644 
00645       r = add_bitset(reg, cc->bs);
00646       if (r) return r;
00647       r = add_multi_byte_cclass(cc->mbuf, reg);
00648     }
00649   }
00650 
00651   return r;
00652 }
00653 
00654 static int
00655 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00656 {
00657 #define REPEAT_RANGE_ALLOC  4
00658 
00659   OnigRepeatRange* p;
00660 
00661   if (reg->repeat_range_alloc == 0) {
00662     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00663     CHECK_NULL_RETURN_MEMERR(p);
00664     reg->repeat_range = p;
00665     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00666   }
00667   else if (reg->repeat_range_alloc <= id) {
00668     int n;
00669     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00670     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00671                                     sizeof(OnigRepeatRange) * n);
00672     CHECK_NULL_RETURN_MEMERR(p);
00673     reg->repeat_range = p;
00674     reg->repeat_range_alloc = n;
00675   }
00676   else {
00677     p = reg->repeat_range;
00678   }
00679 
00680   p[id].lower = lower;
00681   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00682   return 0;
00683 }
00684 
00685 static int
00686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00687                           regex_t* reg)
00688 {
00689   int r;
00690   int num_repeat = reg->num_repeat;
00691 
00692   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00693   if (r) return r;
00694   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00695   reg->num_repeat++;
00696   if (r) return r;
00697   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00698   if (r) return r;
00699 
00700   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00701   if (r) return r;
00702 
00703   r = compile_tree_empty_check(qn->target, reg, empty_info);
00704   if (r) return r;
00705 
00706   if (
00707 #ifdef USE_SUBEXP_CALL
00708       reg->num_call > 0 ||
00709 #endif
00710       IS_QUANTIFIER_IN_REPEAT(qn)) {
00711     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00712   }
00713   else {
00714     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00715   }
00716   if (r) return r;
00717   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00718   return r;
00719 }
00720 
00721 static int
00722 is_anychar_star_quantifier(QtfrNode* qn)
00723 {
00724   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00725       NTYPE(qn->target) == NT_CANY)
00726     return 1;
00727   else
00728     return 0;
00729 }
00730 
00731 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
00732 #define CKN_ON   (ckn > 0)
00733 
00734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00735 
00736 static int
00737 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00738 {
00739   int len, mod_tlen, cklen;
00740   int ckn;
00741   int infinite = IS_REPEAT_INFINITE(qn->upper);
00742   int empty_info = qn->target_empty_info;
00743   int tlen = compile_length_tree(qn->target, reg);
00744 
00745   if (tlen < 0) return tlen;
00746 
00747   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00748 
00749   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00750 
00751   /* anychar repeat */
00752   if (NTYPE(qn->target) == NT_CANY) {
00753     if (qn->greedy && infinite) {
00754       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00755         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00756       else
00757         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00758     }
00759   }
00760 
00761   if (empty_info != 0)
00762     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00763   else
00764     mod_tlen = tlen;
00765 
00766   if (infinite && qn->lower <= 1) {
00767     if (qn->greedy) {
00768       if (qn->lower == 1)
00769         len = SIZE_OP_JUMP;
00770       else
00771         len = 0;
00772 
00773       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00774     }
00775     else {
00776       if (qn->lower == 0)
00777         len = SIZE_OP_JUMP;
00778       else
00779         len = 0;
00780 
00781       len += mod_tlen + SIZE_OP_PUSH + cklen;
00782     }
00783   }
00784   else if (qn->upper == 0) {
00785     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
00786       len = SIZE_OP_JUMP + tlen;
00787     else
00788       len = 0;
00789   }
00790   else if (qn->upper == 1 && qn->greedy) {
00791     if (qn->lower == 0) {
00792       if (CKN_ON) {
00793         len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00794       }
00795       else {
00796         len = SIZE_OP_PUSH + tlen;
00797       }
00798     }
00799     else {
00800       len = tlen;
00801     }
00802   }
00803   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00804     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00805   }
00806   else {
00807     len = SIZE_OP_REPEAT_INC
00808         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00809     if (CKN_ON)
00810       len += SIZE_OP_STATE_CHECK;
00811   }
00812 
00813   return len;
00814 }
00815 
00816 static int
00817 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00818 {
00819   int r, mod_tlen;
00820   int ckn;
00821   int infinite = IS_REPEAT_INFINITE(qn->upper);
00822   int empty_info = qn->target_empty_info;
00823   int tlen = compile_length_tree(qn->target, reg);
00824 
00825   if (tlen < 0) return tlen;
00826 
00827   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00828 
00829   if (is_anychar_star_quantifier(qn)) {
00830     r = compile_tree_n_times(qn->target, qn->lower, reg);
00831     if (r) return r;
00832     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00833       if (IS_MULTILINE(reg->options))
00834         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00835       else
00836         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00837       if (r) return r;
00838       if (CKN_ON) {
00839         r = add_state_check_num(reg, ckn);
00840         if (r) return r;
00841       }
00842 
00843       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00844     }
00845     else {
00846       if (IS_MULTILINE(reg->options)) {
00847         r = add_opcode(reg, (CKN_ON ?
00848                                OP_STATE_CHECK_ANYCHAR_ML_STAR
00849                              : OP_ANYCHAR_ML_STAR));
00850       }
00851       else {
00852         r = add_opcode(reg, (CKN_ON ?
00853                                OP_STATE_CHECK_ANYCHAR_STAR
00854                              : OP_ANYCHAR_STAR));
00855       }
00856       if (r) return r;
00857       if (CKN_ON)
00858         r = add_state_check_num(reg, ckn);
00859 
00860       return r;
00861     }
00862   }
00863 
00864   if (empty_info != 0)
00865     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00866   else
00867     mod_tlen = tlen;
00868 
00869   if (infinite && qn->lower <= 1) {
00870     if (qn->greedy) {
00871       if (qn->lower == 1) {
00872         r = add_opcode_rel_addr(reg, OP_JUMP,
00873                         (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00874         if (r) return r;
00875       }
00876 
00877       if (CKN_ON) {
00878         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00879         if (r) return r;
00880         r = add_state_check_num(reg, ckn);
00881         if (r) return r;
00882         r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00883       }
00884       else {
00885         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00886       }
00887       if (r) return r;
00888       r = compile_tree_empty_check(qn->target, reg, empty_info);
00889       if (r) return r;
00890       r = add_opcode_rel_addr(reg, OP_JUMP,
00891               -(mod_tlen + (int )SIZE_OP_JUMP
00892                 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00893     }
00894     else {
00895       if (qn->lower == 0) {
00896         r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00897         if (r) return r;
00898       }
00899       r = compile_tree_empty_check(qn->target, reg, empty_info);
00900       if (r) return r;
00901       if (CKN_ON) {
00902         r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00903         if (r) return r;
00904         r = add_state_check_num(reg, ckn);
00905         if (r) return r;
00906         r = add_rel_addr(reg,
00907                  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00908       }
00909       else
00910         r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00911     }
00912   }
00913   else if (qn->upper == 0) {
00914     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
00915       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00916       if (r) return r;
00917       r = compile_tree(qn->target, reg);
00918     }
00919     else
00920       r = 0;
00921   }
00922   else if (qn->upper == 1 && qn->greedy) {
00923     if (qn->lower == 0) {
00924       if (CKN_ON) {
00925         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00926         if (r) return r;
00927         r = add_state_check_num(reg, ckn);
00928         if (r) return r;
00929         r = add_rel_addr(reg, tlen);
00930       }
00931       else {
00932         r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00933       }
00934       if (r) return r;
00935     }
00936 
00937     r = compile_tree(qn->target, reg);
00938   }
00939   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00940     if (CKN_ON) {
00941       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00942       if (r) return r;
00943       r = add_state_check_num(reg, ckn);
00944       if (r) return r;
00945       r = add_rel_addr(reg, SIZE_OP_JUMP);
00946     }
00947     else {
00948       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00949     }
00950 
00951     if (r) return r;
00952     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00953     if (r) return r;
00954     r = compile_tree(qn->target, reg);
00955   }
00956   else {
00957     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00958     if (CKN_ON) {
00959       if (r) return r;
00960       r = add_opcode(reg, OP_STATE_CHECK);
00961       if (r) return r;
00962       r = add_state_check_num(reg, ckn);
00963     }
00964   }
00965   return r;
00966 }
00967 
00968 #else /* USE_COMBINATION_EXPLOSION_CHECK */
00969 
00970 static int
00971 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00972 {
00973   int len, mod_tlen;
00974   int infinite = IS_REPEAT_INFINITE(qn->upper);
00975   int empty_info = qn->target_empty_info;
00976   int tlen = compile_length_tree(qn->target, reg);
00977 
00978   if (tlen < 0) return tlen;
00979 
00980   /* anychar repeat */
00981   if (NTYPE(qn->target) == NT_CANY) {
00982     if (qn->greedy && infinite) {
00983       if (IS_NOT_NULL(qn->next_head_exact))
00984         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00985       else
00986         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00987     }
00988   }
00989 
00990   if (empty_info != 0)
00991     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00992   else
00993     mod_tlen = tlen;
00994 
00995   if (infinite &&
00996       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
00997     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
00998       len = SIZE_OP_JUMP;
00999     }
01000     else {
01001       len = tlen * qn->lower;
01002     }
01003 
01004     if (qn->greedy) {
01005       if (IS_NOT_NULL(qn->head_exact))
01006         len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01007       else if (IS_NOT_NULL(qn->next_head_exact))
01008         len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01009       else
01010         len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01011     }
01012     else
01013       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01014   }
01015   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01016     len = SIZE_OP_JUMP + tlen;
01017   }
01018   else if (!infinite && qn->greedy &&
01019            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01020                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01021     len = tlen * qn->lower;
01022     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01023   }
01024   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01025     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01026   }
01027   else {
01028     len = SIZE_OP_REPEAT_INC
01029         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01030   }
01031 
01032   return len;
01033 }
01034 
01035 static int
01036 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01037 {
01038   int i, r, mod_tlen;
01039   int infinite = IS_REPEAT_INFINITE(qn->upper);
01040   int empty_info = qn->target_empty_info;
01041   int tlen = compile_length_tree(qn->target, reg);
01042 
01043   if (tlen < 0) return tlen;
01044 
01045   if (is_anychar_star_quantifier(qn)) {
01046     r = compile_tree_n_times(qn->target, qn->lower, reg);
01047     if (r) return r;
01048     if (IS_NOT_NULL(qn->next_head_exact)) {
01049       if (IS_MULTILINE(reg->options))
01050         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01051       else
01052         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01053       if (r) return r;
01054       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01055     }
01056     else {
01057       if (IS_MULTILINE(reg->options))
01058         return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01059       else
01060         return add_opcode(reg, OP_ANYCHAR_STAR);
01061     }
01062   }
01063 
01064   if (empty_info != 0)
01065     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01066   else
01067     mod_tlen = tlen;
01068 
01069   if (infinite &&
01070       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01071     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01072       if (qn->greedy) {
01073         if (IS_NOT_NULL(qn->head_exact))
01074           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01075         else if (IS_NOT_NULL(qn->next_head_exact))
01076           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01077         else
01078           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01079       }
01080       else {
01081         r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01082       }
01083       if (r) return r;
01084     }
01085     else {
01086       r = compile_tree_n_times(qn->target, qn->lower, reg);
01087       if (r) return r;
01088     }
01089 
01090     if (qn->greedy) {
01091       if (IS_NOT_NULL(qn->head_exact)) {
01092         r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01093                              mod_tlen + SIZE_OP_JUMP);
01094         if (r) return r;
01095         add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01096         r = compile_tree_empty_check(qn->target, reg, empty_info);
01097         if (r) return r;
01098         r = add_opcode_rel_addr(reg, OP_JUMP,
01099         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01100       }
01101       else if (IS_NOT_NULL(qn->next_head_exact)) {
01102         r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01103                                 mod_tlen + SIZE_OP_JUMP);
01104         if (r) return r;
01105         add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01106         r = compile_tree_empty_check(qn->target, reg, empty_info);
01107         if (r) return r;
01108         r = add_opcode_rel_addr(reg, OP_JUMP,
01109           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01110       }
01111       else {
01112         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01113         if (r) return r;
01114         r = compile_tree_empty_check(qn->target, reg, empty_info);
01115         if (r) return r;
01116         r = add_opcode_rel_addr(reg, OP_JUMP,
01117                      -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01118       }
01119     }
01120     else {
01121       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01122       if (r) return r;
01123       r = compile_tree_empty_check(qn->target, reg, empty_info);
01124       if (r) return r;
01125       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01126     }
01127   }
01128   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01129     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01130     if (r) return r;
01131     r = compile_tree(qn->target, reg);
01132   }
01133   else if (!infinite && qn->greedy &&
01134            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01135                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01136     int n = qn->upper - qn->lower;
01137 
01138     r = compile_tree_n_times(qn->target, qn->lower, reg);
01139     if (r) return r;
01140 
01141     for (i = 0; i < n; i++) {
01142       r = add_opcode_rel_addr(reg, OP_PUSH,
01143                            (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01144       if (r) return r;
01145       r = compile_tree(qn->target, reg);
01146       if (r) return r;
01147     }
01148   }
01149   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01150     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01151     if (r) return r;
01152     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01153     if (r) return r;
01154     r = compile_tree(qn->target, reg);
01155   }
01156   else {
01157     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01158   }
01159   return r;
01160 }
01161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
01162 
01163 static int
01164 compile_length_option_node(EncloseNode* node, regex_t* reg)
01165 {
01166   int tlen;
01167   OnigOptionType prev = reg->options;
01168 
01169   reg->options = node->option;
01170   tlen = compile_length_tree(node->target, reg);
01171   reg->options = prev;
01172 
01173   if (tlen < 0) return tlen;
01174 
01175   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01176     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01177            + tlen + SIZE_OP_SET_OPTION;
01178   }
01179   else
01180     return tlen;
01181 }
01182 
01183 static int
01184 compile_option_node(EncloseNode* node, regex_t* reg)
01185 {
01186   int r;
01187   OnigOptionType prev = reg->options;
01188 
01189   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01190     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01191     if (r) return r;
01192     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01193     if (r) return r;
01194     r = add_opcode(reg, OP_FAIL);
01195     if (r) return r;
01196   }
01197 
01198   reg->options = node->option;
01199   r = compile_tree(node->target, reg);
01200   reg->options = prev;
01201 
01202   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01203     if (r) return r;
01204     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01205   }
01206   return r;
01207 }
01208 
01209 static int
01210 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01211 {
01212   int len;
01213   int tlen;
01214 
01215   if (node->type == ENCLOSE_OPTION)
01216     return compile_length_option_node(node, reg);
01217 
01218   if (node->target) {
01219     tlen = compile_length_tree(node->target, reg);
01220     if (tlen < 0) return tlen;
01221   }
01222   else
01223     tlen = 0;
01224 
01225   switch (node->type) {
01226   case ENCLOSE_MEMORY:
01227 #ifdef USE_SUBEXP_CALL
01228     if (IS_ENCLOSE_CALLED(node)) {
01229       len = SIZE_OP_MEMORY_START_PUSH + tlen
01230           + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01231       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01232         len += (IS_ENCLOSE_RECURSION(node)
01233                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01234       else
01235         len += (IS_ENCLOSE_RECURSION(node)
01236                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01237     }
01238     else
01239 #endif
01240     {
01241       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01242         len = SIZE_OP_MEMORY_START_PUSH;
01243       else
01244         len = SIZE_OP_MEMORY_START;
01245 
01246       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01247                      ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01248     }
01249     break;
01250 
01251   case ENCLOSE_STOP_BACKTRACK:
01252     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01253       QtfrNode* qn = NQTFR(node->target);
01254       tlen = compile_length_tree(qn->target, reg);
01255       if (tlen < 0) return tlen;
01256 
01257       len = tlen * qn->lower
01258           + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01259     }
01260     else {
01261       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01262     }
01263     break;
01264 
01265   default:
01266     return ONIGERR_TYPE_BUG;
01267     break;
01268   }
01269 
01270   return len;
01271 }
01272 
01273 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01274 
01275 static int
01276 compile_enclose_node(EncloseNode* node, regex_t* reg)
01277 {
01278   int r, len;
01279 
01280   if (node->type == ENCLOSE_OPTION)
01281     return compile_option_node(node, reg);
01282 
01283   switch (node->type) {
01284   case ENCLOSE_MEMORY:
01285 #ifdef USE_SUBEXP_CALL
01286     if (IS_ENCLOSE_CALLED(node)) {
01287       r = add_opcode(reg, OP_CALL);
01288       if (r) return r;
01289       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01290       node->state |= NST_ADDR_FIXED;
01291       r = add_abs_addr(reg, (int )node->call_addr);
01292       if (r) return r;
01293       len = compile_length_tree(node->target, reg);
01294       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01295       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01296         len += (IS_ENCLOSE_RECURSION(node)
01297                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01298       else
01299         len += (IS_ENCLOSE_RECURSION(node)
01300                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01301 
01302       r = add_opcode_rel_addr(reg, OP_JUMP, len);
01303       if (r) return r;
01304     }
01305 #endif
01306     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01307       r = add_opcode(reg, OP_MEMORY_START_PUSH);
01308     else
01309       r = add_opcode(reg, OP_MEMORY_START);
01310     if (r) return r;
01311     r = add_mem_num(reg, node->regnum);
01312     if (r) return r;
01313     r = compile_tree(node->target, reg);
01314     if (r) return r;
01315 #ifdef USE_SUBEXP_CALL
01316     if (IS_ENCLOSE_CALLED(node)) {
01317       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01318         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01319                              ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01320       else
01321         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01322                              ? OP_MEMORY_END_REC : OP_MEMORY_END));
01323 
01324       if (r) return r;
01325       r = add_mem_num(reg, node->regnum);
01326       if (r) return r;
01327       r = add_opcode(reg, OP_RETURN);
01328     }
01329     else
01330 #endif
01331     {
01332       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01333         r = add_opcode(reg, OP_MEMORY_END_PUSH);
01334       else
01335         r = add_opcode(reg, OP_MEMORY_END);
01336       if (r) return r;
01337       r = add_mem_num(reg, node->regnum);
01338     }
01339     break;
01340 
01341   case ENCLOSE_STOP_BACKTRACK:
01342     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01343       QtfrNode* qn = NQTFR(node->target);
01344       r = compile_tree_n_times(qn->target, qn->lower, reg);
01345       if (r) return r;
01346 
01347       len = compile_length_tree(qn->target, reg);
01348       if (len < 0) return len;
01349 
01350       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01351       if (r) return r;
01352       r = compile_tree(qn->target, reg);
01353       if (r) return r;
01354       r = add_opcode(reg, OP_POP);
01355       if (r) return r;
01356       r = add_opcode_rel_addr(reg, OP_JUMP,
01357          -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01358     }
01359     else {
01360       r = add_opcode(reg, OP_PUSH_STOP_BT);
01361       if (r) return r;
01362       r = compile_tree(node->target, reg);
01363       if (r) return r;
01364       r = add_opcode(reg, OP_POP_STOP_BT);
01365     }
01366     break;
01367 
01368   default:
01369     return ONIGERR_TYPE_BUG;
01370     break;
01371   }
01372 
01373   return r;
01374 }
01375 
01376 static int
01377 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01378 {
01379   int len;
01380   int tlen = 0;
01381 
01382   if (node->target) {
01383     tlen = compile_length_tree(node->target, reg);
01384     if (tlen < 0) return tlen;
01385   }
01386 
01387   switch (node->type) {
01388   case ANCHOR_PREC_READ:
01389     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01390     break;
01391   case ANCHOR_PREC_READ_NOT:
01392     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01393     break;
01394   case ANCHOR_LOOK_BEHIND:
01395     len = SIZE_OP_LOOK_BEHIND + tlen;
01396     break;
01397   case ANCHOR_LOOK_BEHIND_NOT:
01398     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01399     break;
01400 
01401   default:
01402     len = SIZE_OPCODE;
01403     break;
01404   }
01405 
01406   return len;
01407 }
01408 
01409 static int
01410 compile_anchor_node(AnchorNode* node, regex_t* reg)
01411 {
01412   int r, len;
01413 
01414   switch (node->type) {
01415   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
01416   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
01417   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
01418   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
01419   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
01420   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01421 
01422   case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
01423   case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
01424 #ifdef USE_WORD_BEGIN_END
01425   case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
01426   case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
01427 #endif
01428 
01429   case ANCHOR_PREC_READ:
01430     r = add_opcode(reg, OP_PUSH_POS);
01431     if (r) return r;
01432     r = compile_tree(node->target, reg);
01433     if (r) return r;
01434     r = add_opcode(reg, OP_POP_POS);
01435     break;
01436 
01437   case ANCHOR_PREC_READ_NOT:
01438     len = compile_length_tree(node->target, reg);
01439     if (len < 0) return len;
01440     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01441     if (r) return r;
01442     r = compile_tree(node->target, reg);
01443     if (r) return r;
01444     r = add_opcode(reg, OP_FAIL_POS);
01445     break;
01446 
01447   case ANCHOR_LOOK_BEHIND:
01448     {
01449       int n;
01450       r = add_opcode(reg, OP_LOOK_BEHIND);
01451       if (r) return r;
01452       if (node->char_len < 0) {
01453         r = get_char_length_tree(node->target, reg, &n);
01454         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01455       }
01456       else
01457         n = node->char_len;
01458       r = add_length(reg, n);
01459       if (r) return r;
01460       r = compile_tree(node->target, reg);
01461     }
01462     break;
01463 
01464   case ANCHOR_LOOK_BEHIND_NOT:
01465     {
01466       int n;
01467       len = compile_length_tree(node->target, reg);
01468       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01469                            len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01470       if (r) return r;
01471       if (node->char_len < 0) {
01472         r = get_char_length_tree(node->target, reg, &n);
01473         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01474       }
01475       else
01476         n = node->char_len;
01477       r = add_length(reg, n);
01478       if (r) return r;
01479       r = compile_tree(node->target, reg);
01480       if (r) return r;
01481       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01482     }
01483     break;
01484 
01485   default:
01486     return ONIGERR_TYPE_BUG;
01487     break;
01488   }
01489 
01490   return r;
01491 }
01492 
01493 static int
01494 compile_length_tree(Node* node, regex_t* reg)
01495 {
01496   int len, type, r;
01497 
01498   type = NTYPE(node);
01499   switch (type) {
01500   case NT_LIST:
01501     len = 0;
01502     do {
01503       r = compile_length_tree(NCAR(node), reg);
01504       if (r < 0) return r;
01505       len += r;
01506     } while (IS_NOT_NULL(node = NCDR(node)));
01507     r = len;
01508     break;
01509 
01510   case NT_ALT:
01511     {
01512       int n;
01513 
01514       n = r = 0;
01515       do {
01516         r += compile_length_tree(NCAR(node), reg);
01517         n++;
01518       } while (IS_NOT_NULL(node = NCDR(node)));
01519       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01520     }
01521     break;
01522 
01523   case NT_STR:
01524     if (NSTRING_IS_RAW(node))
01525       r = compile_length_string_raw_node(NSTR(node), reg);
01526     else
01527       r = compile_length_string_node(node, reg);
01528     break;
01529 
01530   case NT_CCLASS:
01531     r = compile_length_cclass_node(NCCLASS(node), reg);
01532     break;
01533 
01534   case NT_CTYPE:
01535   case NT_CANY:
01536     r = SIZE_OPCODE;
01537     break;
01538 
01539   case NT_BREF:
01540     {
01541       BRefNode* br = NBREF(node);
01542 
01543 #ifdef USE_BACKREF_WITH_LEVEL
01544       if (IS_BACKREF_NEST_LEVEL(br)) {
01545         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01546             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01547       }
01548       else
01549 #endif
01550       if (br->back_num == 1) {
01551         r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01552              ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01553       }
01554       else {
01555         r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01556       }
01557     }
01558     break;
01559 
01560 #ifdef USE_SUBEXP_CALL
01561   case NT_CALL:
01562     r = SIZE_OP_CALL;
01563     break;
01564 #endif
01565 
01566   case NT_QTFR:
01567     r = compile_length_quantifier_node(NQTFR(node), reg);
01568     break;
01569 
01570   case NT_ENCLOSE:
01571     r = compile_length_enclose_node(NENCLOSE(node), reg);
01572     break;
01573 
01574   case NT_ANCHOR:
01575     r = compile_length_anchor_node(NANCHOR(node), reg);
01576     break;
01577 
01578   default:
01579     return ONIGERR_TYPE_BUG;
01580     break;
01581   }
01582 
01583   return r;
01584 }
01585 
01586 static int
01587 compile_tree(Node* node, regex_t* reg)
01588 {
01589   int n, type, len, pos, r = 0;
01590 
01591   type = NTYPE(node);
01592   switch (type) {
01593   case NT_LIST:
01594     do {
01595       r = compile_tree(NCAR(node), reg);
01596     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01597     break;
01598 
01599   case NT_ALT:
01600     {
01601       Node* x = node;
01602       len = 0;
01603       do {
01604         len += compile_length_tree(NCAR(x), reg);
01605         if (NCDR(x) != NULL) {
01606           len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01607         }
01608       } while (IS_NOT_NULL(x = NCDR(x)));
01609       pos = reg->used + len;  /* goal position */
01610 
01611       do {
01612         len = compile_length_tree(NCAR(node), reg);
01613         if (IS_NOT_NULL(NCDR(node))) {
01614           r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01615           if (r) break;
01616         }
01617         r = compile_tree(NCAR(node), reg);
01618         if (r) break;
01619         if (IS_NOT_NULL(NCDR(node))) {
01620           len = pos - (reg->used + SIZE_OP_JUMP);
01621           r = add_opcode_rel_addr(reg, OP_JUMP, len);
01622           if (r) break;
01623         }
01624       } while (IS_NOT_NULL(node = NCDR(node)));
01625     }
01626     break;
01627 
01628   case NT_STR:
01629     if (NSTRING_IS_RAW(node))
01630       r = compile_string_raw_node(NSTR(node), reg);
01631     else
01632       r = compile_string_node(node, reg);
01633     break;
01634 
01635   case NT_CCLASS:
01636     r = compile_cclass_node(NCCLASS(node), reg);
01637     break;
01638 
01639   case NT_CTYPE:
01640     {
01641       int op;
01642 
01643       switch (NCTYPE(node)->ctype) {
01644       case ONIGENC_CTYPE_WORD:
01645         if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
01646         else                         op = OP_WORD;
01647         break;
01648       default:
01649         return ONIGERR_TYPE_BUG;
01650         break;
01651       }
01652       r = add_opcode(reg, op);
01653     }
01654     break;
01655 
01656   case NT_CANY:
01657     if (IS_MULTILINE(reg->options))
01658       r = add_opcode(reg, OP_ANYCHAR_ML);
01659     else
01660       r = add_opcode(reg, OP_ANYCHAR);
01661     break;
01662 
01663   case NT_BREF:
01664     {
01665       BRefNode* br = NBREF(node);
01666 
01667 #ifdef USE_BACKREF_WITH_LEVEL
01668       if (IS_BACKREF_NEST_LEVEL(br)) {
01669         r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01670         if (r) return r;
01671         r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01672         if (r) return r;
01673         r = add_length(reg, br->nest_level);
01674         if (r) return r;
01675 
01676         goto add_bacref_mems;
01677       }
01678       else
01679 #endif
01680       if (br->back_num == 1) {
01681         n = br->back_static[0];
01682         if (IS_IGNORECASE(reg->options)) {
01683           r = add_opcode(reg, OP_BACKREFN_IC);
01684           if (r) return r;
01685           r = add_mem_num(reg, n);
01686         }
01687         else {
01688           switch (n) {
01689           case 1:  r = add_opcode(reg, OP_BACKREF1); break;
01690           case 2:  r = add_opcode(reg, OP_BACKREF2); break;
01691           default:
01692             r = add_opcode(reg, OP_BACKREFN);
01693             if (r) return r;
01694             r = add_mem_num(reg, n);
01695             break;
01696           }
01697         }
01698       }
01699       else {
01700         int i;
01701         int* p;
01702 
01703         if (IS_IGNORECASE(reg->options)) {
01704           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01705         }
01706         else {
01707           r = add_opcode(reg, OP_BACKREF_MULTI);
01708         }
01709         if (r) return r;
01710 
01711 #ifdef USE_BACKREF_WITH_LEVEL
01712       add_bacref_mems:
01713 #endif
01714         r = add_length(reg, br->back_num);
01715         if (r) return r;
01716         p = BACKREFS_P(br);
01717         for (i = br->back_num - 1; i >= 0; i--) {
01718           r = add_mem_num(reg, p[i]);
01719           if (r) return r;
01720         }
01721       }
01722     }
01723     break;
01724 
01725 #ifdef USE_SUBEXP_CALL
01726   case NT_CALL:
01727     r = compile_call(NCALL(node), reg);
01728     break;
01729 #endif
01730 
01731   case NT_QTFR:
01732     r = compile_quantifier_node(NQTFR(node), reg);
01733     break;
01734 
01735   case NT_ENCLOSE:
01736     r = compile_enclose_node(NENCLOSE(node), reg);
01737     break;
01738 
01739   case NT_ANCHOR:
01740     r = compile_anchor_node(NANCHOR(node), reg);
01741     break;
01742 
01743   default:
01744 #ifdef ONIG_DEBUG
01745     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01746 #endif
01747     break;
01748   }
01749 
01750   return r;
01751 }
01752 
01753 #ifdef USE_NAMED_GROUP
01754 
01755 static int
01756 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01757 {
01758   int r = 0;
01759   Node* node = *plink;
01760 
01761   switch (NTYPE(node)) {
01762   case NT_LIST:
01763   case NT_ALT:
01764     do {
01765       r = noname_disable_map(&(NCAR(node)), map, counter);
01766     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01767     break;
01768 
01769   case NT_QTFR:
01770     {
01771       Node** ptarget = &(NQTFR(node)->target);
01772       Node*  old = *ptarget;
01773       r = noname_disable_map(ptarget, map, counter);
01774       if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01775         onig_reduce_nested_quantifier(node, *ptarget);
01776       }
01777     }
01778     break;
01779 
01780   case NT_ENCLOSE:
01781     {
01782       EncloseNode* en = NENCLOSE(node);
01783       if (en->type == ENCLOSE_MEMORY) {
01784         if (IS_ENCLOSE_NAMED_GROUP(en)) {
01785           (*counter)++;
01786           map[en->regnum].new_val = *counter;
01787           en->regnum = *counter;
01788           r = noname_disable_map(&(en->target), map, counter);
01789         }
01790         else {
01791           *plink = en->target;
01792           en->target = NULL_NODE;
01793           onig_node_free(node);
01794           r = noname_disable_map(plink, map, counter);
01795         }
01796       }
01797       else
01798         r = noname_disable_map(&(en->target), map, counter);
01799     }
01800     break;
01801 
01802   case NT_ANCHOR:
01803     {
01804       AnchorNode* an = NANCHOR(node);
01805       switch (an->type) {
01806       case ANCHOR_PREC_READ:
01807       case ANCHOR_PREC_READ_NOT:
01808       case ANCHOR_LOOK_BEHIND:
01809       case ANCHOR_LOOK_BEHIND_NOT:
01810         r = noname_disable_map(&(an->target), map, counter);
01811         break;
01812       }
01813     }
01814     break;
01815 
01816   default:
01817     break;
01818   }
01819 
01820   return r;
01821 }
01822 
01823 static int
01824 renumber_node_backref(Node* node, GroupNumRemap* map)
01825 {
01826   int i, pos, n, old_num;
01827   int *backs;
01828   BRefNode* bn = NBREF(node);
01829 
01830   if (! IS_BACKREF_NAME_REF(bn))
01831     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01832 
01833   old_num = bn->back_num;
01834   if (IS_NULL(bn->back_dynamic))
01835     backs = bn->back_static;
01836   else
01837     backs = bn->back_dynamic;
01838 
01839   for (i = 0, pos = 0; i < old_num; i++) {
01840     n = map[backs[i]].new_val;
01841     if (n > 0) {
01842       backs[pos] = n;
01843       pos++;
01844     }
01845   }
01846 
01847   bn->back_num = pos;
01848   return 0;
01849 }
01850 
01851 static int
01852 renumber_by_map(Node* node, GroupNumRemap* map)
01853 {
01854   int r = 0;
01855 
01856   switch (NTYPE(node)) {
01857   case NT_LIST:
01858   case NT_ALT:
01859     do {
01860       r = renumber_by_map(NCAR(node), map);
01861     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01862     break;
01863   case NT_QTFR:
01864     r = renumber_by_map(NQTFR(node)->target, map);
01865     break;
01866   case NT_ENCLOSE:
01867     r = renumber_by_map(NENCLOSE(node)->target, map);
01868     break;
01869 
01870   case NT_BREF:
01871     r = renumber_node_backref(node, map);
01872     break;
01873 
01874   case NT_ANCHOR:
01875     {
01876       AnchorNode* an = NANCHOR(node);
01877       switch (an->type) {
01878       case ANCHOR_PREC_READ:
01879       case ANCHOR_PREC_READ_NOT:
01880       case ANCHOR_LOOK_BEHIND:
01881       case ANCHOR_LOOK_BEHIND_NOT:
01882         r = renumber_by_map(an->target, map);
01883         break;
01884       }
01885     }
01886     break;
01887 
01888   default:
01889     break;
01890   }
01891 
01892   return r;
01893 }
01894 
01895 static int
01896 numbered_ref_check(Node* node)
01897 {
01898   int r = 0;
01899 
01900   switch (NTYPE(node)) {
01901   case NT_LIST:
01902   case NT_ALT:
01903     do {
01904       r = numbered_ref_check(NCAR(node));
01905     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01906     break;
01907   case NT_QTFR:
01908     r = numbered_ref_check(NQTFR(node)->target);
01909     break;
01910   case NT_ENCLOSE:
01911     r = numbered_ref_check(NENCLOSE(node)->target);
01912     break;
01913 
01914   case NT_BREF:
01915     if (! IS_BACKREF_NAME_REF(NBREF(node)))
01916       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01917     break;
01918 
01919   default:
01920     break;
01921   }
01922 
01923   return r;
01924 }
01925 
01926 static int
01927 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
01928 {
01929   int r, i, pos, counter;
01930   BitStatusType loc;
01931   GroupNumRemap* map;
01932 
01933   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
01934   CHECK_NULL_RETURN_MEMERR(map);
01935   for (i = 1; i <= env->num_mem; i++) {
01936     map[i].new_val = 0;
01937   }
01938   counter = 0;
01939   r = noname_disable_map(root, map, &counter);
01940   if (r != 0) return r;
01941 
01942   r = renumber_by_map(*root, map);
01943   if (r != 0) return r;
01944 
01945   for (i = 1, pos = 1; i <= env->num_mem; i++) {
01946     if (map[i].new_val > 0) {
01947       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
01948       pos++;
01949     }
01950   }
01951 
01952   loc = env->capture_history;
01953   BIT_STATUS_CLEAR(env->capture_history);
01954   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
01955     if (BIT_STATUS_AT(loc, i)) {
01956       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
01957     }
01958   }
01959 
01960   env->num_mem = env->num_named;
01961   reg->num_mem = env->num_named;
01962 
01963   return onig_renumber_name_table(reg, map);
01964 }
01965 #endif /* USE_NAMED_GROUP */
01966 
01967 #ifdef USE_SUBEXP_CALL
01968 static int
01969 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
01970 {
01971   int i, offset;
01972   EncloseNode* en;
01973   AbsAddrType addr;
01974 
01975   for (i = 0; i < uslist->num; i++) {
01976     en = NENCLOSE(uslist->us[i].target);
01977     if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
01978     addr = en->call_addr;
01979     offset = uslist->us[i].offset;
01980 
01981     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
01982   }
01983   return 0;
01984 }
01985 #endif
01986 
01987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
01988 static int
01989 quantifiers_memory_node_info(Node* node)
01990 {
01991   int r = 0;
01992 
01993   switch (NTYPE(node)) {
01994   case NT_LIST:
01995   case NT_ALT:
01996     {
01997       int v;
01998       do {
01999         v = quantifiers_memory_node_info(NCAR(node));
02000         if (v > r) r = v;
02001       } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
02002     }
02003     break;
02004 
02005 #ifdef USE_SUBEXP_CALL
02006   case NT_CALL:
02007     if (IS_CALL_RECURSION(NCALL(node))) {
02008       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
02009     }
02010     else
02011       r = quantifiers_memory_node_info(NCALL(node)->target);
02012     break;
02013 #endif
02014 
02015   case NT_QTFR:
02016     {
02017       QtfrNode* qn = NQTFR(node);
02018       if (qn->upper != 0) {
02019         r = quantifiers_memory_node_info(qn->target);
02020       }
02021     }
02022     break;
02023 
02024   case NT_ENCLOSE:
02025     {
02026       EncloseNode* en = NENCLOSE(node);
02027       switch (en->type) {
02028       case ENCLOSE_MEMORY:
02029         return NQ_TARGET_IS_EMPTY_MEM;
02030         break;
02031 
02032       case ENCLOSE_OPTION:
02033       case ENCLOSE_STOP_BACKTRACK:
02034         r = quantifiers_memory_node_info(en->target);
02035         break;
02036       default:
02037         break;
02038       }
02039     }
02040     break;
02041 
02042   case NT_BREF:
02043   case NT_STR:
02044   case NT_CTYPE:
02045   case NT_CCLASS:
02046   case NT_CANY:
02047   case NT_ANCHOR:
02048   default:
02049     break;
02050   }
02051 
02052   return r;
02053 }
02054 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
02055 
02056 static int
02057 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02058 {
02059   OnigDistance tmin;
02060   int r = 0;
02061 
02062   *min = 0;
02063   switch (NTYPE(node)) {
02064   case NT_BREF:
02065     {
02066       int i;
02067       int* backs;
02068       Node** nodes = SCANENV_MEM_NODES(env);
02069       BRefNode* br = NBREF(node);
02070       if (br->state & NST_RECURSION) break;
02071 
02072       backs = BACKREFS_P(br);
02073       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02074       r = get_min_match_length(nodes[backs[0]], min, env);
02075       if (r != 0) break;
02076       for (i = 1; i < br->back_num; i++) {
02077         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02078         r = get_min_match_length(nodes[backs[i]], &tmin, env);
02079         if (r != 0) break;
02080         if (*min > tmin) *min = tmin;
02081       }
02082     }
02083     break;
02084 
02085 #ifdef USE_SUBEXP_CALL
02086   case NT_CALL:
02087     if (IS_CALL_RECURSION(NCALL(node))) {
02088       EncloseNode* en = NENCLOSE(NCALL(node)->target);
02089       if (IS_ENCLOSE_MIN_FIXED(en))
02090         *min = en->min_len;
02091     }
02092     else
02093       r = get_min_match_length(NCALL(node)->target, min, env);
02094     break;
02095 #endif
02096 
02097   case NT_LIST:
02098     do {
02099       r = get_min_match_length(NCAR(node), &tmin, env);
02100       if (r == 0) *min += tmin;
02101     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02102     break;
02103 
02104   case NT_ALT:
02105     {
02106       Node *x, *y;
02107       y = node;
02108       do {
02109         x = NCAR(y);
02110         r = get_min_match_length(x, &tmin, env);
02111         if (r != 0) break;
02112         if (y == node) *min = tmin;
02113         else if (*min > tmin) *min = tmin;
02114       } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02115     }
02116     break;
02117 
02118   case NT_STR:
02119     {
02120       StrNode* sn = NSTR(node);
02121       *min = sn->end - sn->s;
02122     }
02123     break;
02124 
02125   case NT_CTYPE:
02126     *min = 1;
02127     break;
02128 
02129   case NT_CCLASS:
02130   case NT_CANY:
02131     *min = 1;
02132     break;
02133 
02134   case NT_QTFR:
02135     {
02136       QtfrNode* qn = NQTFR(node);
02137 
02138       if (qn->lower > 0) {
02139         r = get_min_match_length(qn->target, min, env);
02140         if (r == 0)
02141           *min = distance_multiply(*min, qn->lower);
02142       }
02143     }
02144     break;
02145 
02146   case NT_ENCLOSE:
02147     {
02148       EncloseNode* en = NENCLOSE(node);
02149       switch (en->type) {
02150       case ENCLOSE_MEMORY:
02151 #ifdef USE_SUBEXP_CALL
02152         if (IS_ENCLOSE_MIN_FIXED(en))
02153           *min = en->min_len;
02154         else {
02155           r = get_min_match_length(en->target, min, env);
02156           if (r == 0) {
02157             en->min_len = *min;
02158             SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02159           }
02160         }
02161         break;
02162 #endif
02163       case ENCLOSE_OPTION:
02164       case ENCLOSE_STOP_BACKTRACK:
02165         r = get_min_match_length(en->target, min, env);
02166         break;
02167       }
02168     }
02169     break;
02170 
02171   case NT_ANCHOR:
02172   default:
02173     break;
02174   }
02175 
02176   return r;
02177 }
02178 
02179 static int
02180 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02181 {
02182   OnigDistance tmax;
02183   int r = 0;
02184 
02185   *max = 0;
02186   switch (NTYPE(node)) {
02187   case NT_LIST:
02188     do {
02189       r = get_max_match_length(NCAR(node), &tmax, env);
02190       if (r == 0)
02191         *max = distance_add(*max, tmax);
02192     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02193     break;
02194 
02195   case NT_ALT:
02196     do {
02197       r = get_max_match_length(NCAR(node), &tmax, env);
02198       if (r == 0 && *max < tmax) *max = tmax;
02199     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02200     break;
02201 
02202   case NT_STR:
02203     {
02204       StrNode* sn = NSTR(node);
02205       *max = sn->end - sn->s;
02206     }
02207     break;
02208 
02209   case NT_CTYPE:
02210     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02211     break;
02212 
02213   case NT_CCLASS:
02214   case NT_CANY:
02215     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02216     break;
02217 
02218   case NT_BREF:
02219     {
02220       int i;
02221       int* backs;
02222       Node** nodes = SCANENV_MEM_NODES(env);
02223       BRefNode* br = NBREF(node);
02224       if (br->state & NST_RECURSION) {
02225         *max = ONIG_INFINITE_DISTANCE;
02226         break;
02227       }
02228       backs = BACKREFS_P(br);
02229       for (i = 0; i < br->back_num; i++) {
02230         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02231         r = get_max_match_length(nodes[backs[i]], &tmax, env);
02232         if (r != 0) break;
02233         if (*max < tmax) *max = tmax;
02234       }
02235     }
02236     break;
02237 
02238 #ifdef USE_SUBEXP_CALL
02239   case NT_CALL:
02240     if (! IS_CALL_RECURSION(NCALL(node)))
02241       r = get_max_match_length(NCALL(node)->target, max, env);
02242     else
02243       *max = ONIG_INFINITE_DISTANCE;
02244     break;
02245 #endif
02246 
02247   case NT_QTFR:
02248     {
02249       QtfrNode* qn = NQTFR(node);
02250 
02251       if (qn->upper != 0) {
02252         r = get_max_match_length(qn->target, max, env);
02253         if (r == 0 && *max != 0) {
02254           if (! IS_REPEAT_INFINITE(qn->upper))
02255             *max = distance_multiply(*max, qn->upper);
02256           else
02257             *max = ONIG_INFINITE_DISTANCE;
02258         }
02259       }
02260     }
02261     break;
02262 
02263   case NT_ENCLOSE:
02264     {
02265       EncloseNode* en = NENCLOSE(node);
02266       switch (en->type) {
02267       case ENCLOSE_MEMORY:
02268 #ifdef USE_SUBEXP_CALL
02269         if (IS_ENCLOSE_MAX_FIXED(en))
02270           *max = en->max_len;
02271         else {
02272           r = get_max_match_length(en->target, max, env);
02273           if (r == 0) {
02274             en->max_len = *max;
02275             SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02276           }
02277         }
02278         break;
02279 #endif
02280       case ENCLOSE_OPTION:
02281       case ENCLOSE_STOP_BACKTRACK:
02282         r = get_max_match_length(en->target, max, env);
02283         break;
02284       }
02285     }
02286     break;
02287 
02288   case NT_ANCHOR:
02289   default:
02290     break;
02291   }
02292 
02293   return r;
02294 }
02295 
02296 #define GET_CHAR_LEN_VARLEN           -1
02297 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
02298 
02299 /* fixed size pattern node only */
02300 static int
02301 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02302 {
02303   int tlen;
02304   int r = 0;
02305 
02306   level++;
02307   *len = 0;
02308   switch (NTYPE(node)) {
02309   case NT_LIST:
02310     do {
02311       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02312       if (r == 0)
02313         *len = (int)distance_add(*len, tlen);
02314     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02315     break;
02316 
02317   case NT_ALT:
02318     {
02319       int tlen2;
02320       int varlen = 0;
02321 
02322       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02323       while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02324         r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02325         if (r == 0) {
02326           if (tlen != tlen2)
02327             varlen = 1;
02328         }
02329       }
02330       if (r == 0) {
02331         if (varlen != 0) {
02332           if (level == 1)
02333             r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02334           else
02335             r = GET_CHAR_LEN_VARLEN;
02336         }
02337         else
02338           *len = tlen;
02339       }
02340     }
02341     break;
02342 
02343   case NT_STR:
02344     {
02345       StrNode* sn = NSTR(node);
02346       UChar *s = sn->s;
02347       while (s < sn->end) {
02348         s += enclen(reg->enc, s, sn->end);
02349         (*len)++;
02350       }
02351     }
02352     break;
02353 
02354   case NT_QTFR:
02355     {
02356       QtfrNode* qn = NQTFR(node);
02357       if (qn->lower == qn->upper) {
02358         r = get_char_length_tree1(qn->target, reg, &tlen, level);
02359         if (r == 0)
02360           *len = (int)distance_multiply(tlen, qn->lower);
02361       }
02362       else
02363         r = GET_CHAR_LEN_VARLEN;
02364     }
02365     break;
02366 
02367 #ifdef USE_SUBEXP_CALL
02368   case NT_CALL:
02369     if (! IS_CALL_RECURSION(NCALL(node)))
02370       r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02371     else
02372       r = GET_CHAR_LEN_VARLEN;
02373     break;
02374 #endif
02375 
02376   case NT_CTYPE:
02377     *len = 1;
02378     break;
02379 
02380   case NT_CCLASS:
02381   case NT_CANY:
02382     *len = 1;
02383     break;
02384 
02385   case NT_ENCLOSE:
02386     {
02387       EncloseNode* en = NENCLOSE(node);
02388       switch (en->type) {
02389       case ENCLOSE_MEMORY:
02390 #ifdef USE_SUBEXP_CALL
02391         if (IS_ENCLOSE_CLEN_FIXED(en))
02392           *len = en->char_len;
02393         else {
02394           r = get_char_length_tree1(en->target, reg, len, level);
02395           if (r == 0) {
02396             en->char_len = *len;
02397             SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02398           }
02399         }
02400         break;
02401 #endif
02402       case ENCLOSE_OPTION:
02403       case ENCLOSE_STOP_BACKTRACK:
02404         r = get_char_length_tree1(en->target, reg, len, level);
02405         break;
02406       default:
02407         break;
02408       }
02409     }
02410     break;
02411 
02412   case NT_ANCHOR:
02413     break;
02414 
02415   default:
02416     r = GET_CHAR_LEN_VARLEN;
02417     break;
02418   }
02419 
02420   return r;
02421 }
02422 
02423 static int
02424 get_char_length_tree(Node* node, regex_t* reg, int* len)
02425 {
02426   return get_char_length_tree1(node, reg, len, 0);
02427 }
02428 
02429 /* x is not included y ==>  1 : 0 */
02430 static int
02431 is_not_included(Node* x, Node* y, regex_t* reg)
02432 {
02433   int i;
02434   OnigDistance len;
02435   OnigCodePoint code;
02436   UChar *p, c;
02437   int ytype;
02438 
02439  retry:
02440   ytype = NTYPE(y);
02441   switch (NTYPE(x)) {
02442   case NT_CTYPE:
02443     {
02444       switch (ytype) {
02445       case NT_CTYPE:
02446         if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02447             NCTYPE(y)->not   != NCTYPE(x)->not)
02448           return 1;
02449         else
02450           return 0;
02451         break;
02452 
02453       case NT_CCLASS:
02454       swap:
02455         {
02456           Node* tmp;
02457           tmp = x; x = y; y = tmp;
02458           goto retry;
02459         }
02460         break;
02461 
02462       case NT_STR:
02463         goto swap;
02464         break;
02465 
02466       default:
02467         break;
02468       }
02469     }
02470     break;
02471 
02472   case NT_CCLASS:
02473     {
02474       CClassNode* xc = NCCLASS(x);
02475       switch (ytype) {
02476       case NT_CTYPE:
02477         switch (NCTYPE(y)->ctype) {
02478         case ONIGENC_CTYPE_WORD:
02479           if (NCTYPE(y)->not == 0) {
02480             if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02481               for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02482                 if (BITSET_AT(xc->bs, i)) {
02483                   if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02484                 }
02485               }
02486               return 1;
02487             }
02488             return 0;
02489           }
02490           else {
02491             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02492               if (! IS_CODE_SB_WORD(reg->enc, i)) {
02493                 if (!IS_NCCLASS_NOT(xc)) {
02494                   if (BITSET_AT(xc->bs, i))
02495                     return 0;
02496                 }
02497                 else {
02498                   if (! BITSET_AT(xc->bs, i))
02499                     return 0;
02500                 }
02501               }
02502             }
02503             return 1;
02504           }
02505           break;
02506 
02507         default:
02508           break;
02509         }
02510         break;
02511 
02512       case NT_CCLASS:
02513         {
02514           int v;
02515           CClassNode* yc = NCCLASS(y);
02516 
02517           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02518             v = BITSET_AT(xc->bs, i);
02519             if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02520                 (v == 0 && IS_NCCLASS_NOT(xc))) {
02521               v = BITSET_AT(yc->bs, i);
02522               if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02523                   (v == 0 && IS_NCCLASS_NOT(yc)))
02524                 return 0;
02525             }
02526           }
02527           if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02528               (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02529             return 1;
02530           return 0;
02531         }
02532         break;
02533 
02534       case NT_STR:
02535         goto swap;
02536         break;
02537 
02538       default:
02539         break;
02540       }
02541     }
02542     break;
02543 
02544   case NT_STR:
02545     {
02546       StrNode* xs = NSTR(x);
02547       if (NSTRING_LEN(x) == 0)
02548         break;
02549 
02550       c = *(xs->s);
02551       switch (ytype) {
02552       case NT_CTYPE:
02553         switch (NCTYPE(y)->ctype) {
02554         case ONIGENC_CTYPE_WORD:
02555           if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02556             return NCTYPE(y)->not;
02557           else
02558             return !(NCTYPE(y)->not);
02559           break;
02560         default:
02561           break;
02562         }
02563         break;
02564 
02565       case NT_CCLASS:
02566         {
02567           CClassNode* cc = NCCLASS(y);
02568 
02569           code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02570                                      xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02571           return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02572         }
02573         break;
02574 
02575       case NT_STR:
02576         {
02577           UChar *q;
02578           StrNode* ys = NSTR(y);
02579           len = NSTRING_LEN(x);
02580           if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02581           if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02582             /* tiny version */
02583             return 0;
02584           }
02585           else {
02586             for (i = 0, p = ys->s, q = xs->s; (OnigDistance)i < len; i++, p++, q++) {
02587               if (*p != *q) return 1;
02588             }
02589           }
02590         }
02591         break;
02592 
02593       default:
02594         break;
02595       }
02596     }
02597     break;
02598 
02599   default:
02600     break;
02601   }
02602 
02603   return 0;
02604 }
02605 
02606 static Node*
02607 get_head_value_node(Node* node, int exact, regex_t* reg)
02608 {
02609   Node* n = NULL_NODE;
02610 
02611   switch (NTYPE(node)) {
02612   case NT_BREF:
02613   case NT_ALT:
02614   case NT_CANY:
02615 #ifdef USE_SUBEXP_CALL
02616   case NT_CALL:
02617 #endif
02618     break;
02619 
02620   case NT_CTYPE:
02621   case NT_CCLASS:
02622     if (exact == 0) {
02623       n = node;
02624     }
02625     break;
02626 
02627   case NT_LIST:
02628     n = get_head_value_node(NCAR(node), exact, reg);
02629     break;
02630 
02631   case NT_STR:
02632     {
02633       StrNode* sn = NSTR(node);
02634 
02635       if (sn->end <= sn->s)
02636         break;
02637 
02638       if (exact != 0 &&
02639           !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02640       }
02641       else {
02642         n = node;
02643       }
02644     }
02645     break;
02646 
02647   case NT_QTFR:
02648     {
02649       QtfrNode* qn = NQTFR(node);
02650       if (qn->lower > 0) {
02651         if (IS_NOT_NULL(qn->head_exact))
02652           n = qn->head_exact;
02653         else
02654           n = get_head_value_node(qn->target, exact, reg);
02655       }
02656     }
02657     break;
02658 
02659   case NT_ENCLOSE:
02660     {
02661       EncloseNode* en = NENCLOSE(node);
02662       switch (en->type) {
02663       case ENCLOSE_OPTION:
02664         {
02665           OnigOptionType options = reg->options;
02666 
02667           reg->options = NENCLOSE(node)->option;
02668           n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02669           reg->options = options;
02670         }
02671         break;
02672 
02673       case ENCLOSE_MEMORY:
02674       case ENCLOSE_STOP_BACKTRACK:
02675         n = get_head_value_node(en->target, exact, reg);
02676         break;
02677       }
02678     }
02679     break;
02680 
02681   case NT_ANCHOR:
02682     if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02683       n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02684     break;
02685 
02686   default:
02687     break;
02688   }
02689 
02690   return n;
02691 }
02692 
02693 static int
02694 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02695 {
02696   int type, r = 0;
02697 
02698   type = NTYPE(node);
02699   if ((NTYPE2BIT(type) & type_mask) == 0)
02700     return 1;
02701 
02702   switch (type) {
02703   case NT_LIST:
02704   case NT_ALT:
02705     do {
02706       r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02707                           anchor_mask);
02708     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02709     break;
02710 
02711   case NT_QTFR:
02712     r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02713                         anchor_mask);
02714     break;
02715 
02716   case NT_ENCLOSE:
02717     {
02718       EncloseNode* en = NENCLOSE(node);
02719       if ((en->type & enclose_mask) == 0)
02720         return 1;
02721 
02722       r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02723     }
02724     break;
02725 
02726   case NT_ANCHOR:
02727     type = NANCHOR(node)->type;
02728     if ((type & anchor_mask) == 0)
02729       return 1;
02730 
02731     if (NANCHOR(node)->target)
02732       r = check_type_tree(NANCHOR(node)->target,
02733                           type_mask, enclose_mask, anchor_mask);
02734     break;
02735 
02736   default:
02737     break;
02738   }
02739   return r;
02740 }
02741 
02742 #ifdef USE_SUBEXP_CALL
02743 
02744 #define RECURSION_EXIST       1
02745 #define RECURSION_INFINITE    2
02746 
02747 static int
02748 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02749 {
02750   int type;
02751   int r = 0;
02752 
02753   type = NTYPE(node);
02754   switch (type) {
02755   case NT_LIST:
02756     {
02757       Node *x;
02758       OnigDistance min;
02759       int ret;
02760 
02761       x = node;
02762       do {
02763         ret = subexp_inf_recursive_check(NCAR(x), env, head);
02764         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02765         r |= ret;
02766         if (head) {
02767           ret = get_min_match_length(NCAR(x), &min, env);
02768           if (ret != 0) return ret;
02769           if (min != 0) head = 0;
02770         }
02771       } while (IS_NOT_NULL(x = NCDR(x)));
02772     }
02773     break;
02774 
02775   case NT_ALT:
02776     {
02777       int ret;
02778       r = RECURSION_EXIST;
02779       do {
02780         ret = subexp_inf_recursive_check(NCAR(node), env, head);
02781         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02782         r &= ret;
02783       } while (IS_NOT_NULL(node = NCDR(node)));
02784     }
02785     break;
02786 
02787   case NT_QTFR:
02788     r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02789     if (r == RECURSION_EXIST) {
02790       if (NQTFR(node)->lower == 0) r = 0;
02791     }
02792     break;
02793 
02794   case NT_ANCHOR:
02795     {
02796       AnchorNode* an = NANCHOR(node);
02797       switch (an->type) {
02798       case ANCHOR_PREC_READ:
02799       case ANCHOR_PREC_READ_NOT:
02800       case ANCHOR_LOOK_BEHIND:
02801       case ANCHOR_LOOK_BEHIND_NOT:
02802         r = subexp_inf_recursive_check(an->target, env, head);
02803         break;
02804       }
02805     }
02806     break;
02807 
02808   case NT_CALL:
02809     r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02810     break;
02811 
02812   case NT_ENCLOSE:
02813     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02814       return 0;
02815     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02816       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02817     else {
02818       SET_ENCLOSE_STATUS(node, NST_MARK2);
02819       r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02820       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02821     }
02822     break;
02823 
02824   default:
02825     break;
02826   }
02827 
02828   return r;
02829 }
02830 
02831 static int
02832 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02833 {
02834   int type;
02835   int r = 0;
02836 
02837   type = NTYPE(node);
02838   switch (type) {
02839   case NT_LIST:
02840   case NT_ALT:
02841     do {
02842       r = subexp_inf_recursive_check_trav(NCAR(node), env);
02843     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02844     break;
02845 
02846   case NT_QTFR:
02847     r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02848     break;
02849 
02850   case NT_ANCHOR:
02851     {
02852       AnchorNode* an = NANCHOR(node);
02853       switch (an->type) {
02854       case ANCHOR_PREC_READ:
02855       case ANCHOR_PREC_READ_NOT:
02856       case ANCHOR_LOOK_BEHIND:
02857       case ANCHOR_LOOK_BEHIND_NOT:
02858         r = subexp_inf_recursive_check_trav(an->target, env);
02859         break;
02860       }
02861     }
02862     break;
02863 
02864   case NT_ENCLOSE:
02865     {
02866       EncloseNode* en = NENCLOSE(node);
02867 
02868       if (IS_ENCLOSE_RECURSION(en)) {
02869         SET_ENCLOSE_STATUS(node, NST_MARK1);
02870         r = subexp_inf_recursive_check(en->target, env, 1);
02871         if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02872         CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02873       }
02874       r = subexp_inf_recursive_check_trav(en->target, env);
02875     }
02876 
02877     break;
02878 
02879   default:
02880     break;
02881   }
02882 
02883   return r;
02884 }
02885 
02886 static int
02887 subexp_recursive_check(Node* node)
02888 {
02889   int r = 0;
02890 
02891   switch (NTYPE(node)) {
02892   case NT_LIST:
02893   case NT_ALT:
02894     do {
02895       r |= subexp_recursive_check(NCAR(node));
02896     } while (IS_NOT_NULL(node = NCDR(node)));
02897     break;
02898 
02899   case NT_QTFR:
02900     r = subexp_recursive_check(NQTFR(node)->target);
02901     break;
02902 
02903   case NT_ANCHOR:
02904     {
02905       AnchorNode* an = NANCHOR(node);
02906       switch (an->type) {
02907       case ANCHOR_PREC_READ:
02908       case ANCHOR_PREC_READ_NOT:
02909       case ANCHOR_LOOK_BEHIND:
02910       case ANCHOR_LOOK_BEHIND_NOT:
02911         r = subexp_recursive_check(an->target);
02912         break;
02913       }
02914     }
02915     break;
02916 
02917   case NT_CALL:
02918     r = subexp_recursive_check(NCALL(node)->target);
02919     if (r != 0) SET_CALL_RECURSION(node);
02920     break;
02921 
02922   case NT_ENCLOSE:
02923     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02924       return 0;
02925     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02926       return 1; /* recursion */
02927     else {
02928       SET_ENCLOSE_STATUS(node, NST_MARK2);
02929       r = subexp_recursive_check(NENCLOSE(node)->target);
02930       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02931     }
02932     break;
02933 
02934   default:
02935     break;
02936   }
02937 
02938   return r;
02939 }
02940 
02941 
02942 static int
02943 subexp_recursive_check_trav(Node* node, ScanEnv* env)
02944 {
02945 #define FOUND_CALLED_NODE    1
02946 
02947   int type;
02948   int r = 0;
02949 
02950   type = NTYPE(node);
02951   switch (type) {
02952   case NT_LIST:
02953   case NT_ALT:
02954     {
02955       int ret;
02956       do {
02957         ret = subexp_recursive_check_trav(NCAR(node), env);
02958         if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
02959         else if (ret < 0) return ret;
02960       } while (IS_NOT_NULL(node = NCDR(node)));
02961     }
02962     break;
02963 
02964   case NT_QTFR:
02965     r = subexp_recursive_check_trav(NQTFR(node)->target, env);
02966     if (NQTFR(node)->upper == 0) {
02967       if (r == FOUND_CALLED_NODE)
02968         NQTFR(node)->is_refered = 1;
02969     }
02970     break;
02971 
02972   case NT_ANCHOR:
02973     {
02974       AnchorNode* an = NANCHOR(node);
02975       switch (an->type) {
02976       case ANCHOR_PREC_READ:
02977       case ANCHOR_PREC_READ_NOT:
02978       case ANCHOR_LOOK_BEHIND:
02979       case ANCHOR_LOOK_BEHIND_NOT:
02980         r = subexp_recursive_check_trav(an->target, env);
02981         break;
02982       }
02983     }
02984     break;
02985 
02986   case NT_ENCLOSE:
02987     {
02988       EncloseNode* en = NENCLOSE(node);
02989 
02990       if (! IS_ENCLOSE_RECURSION(en)) {
02991         if (IS_ENCLOSE_CALLED(en)) {
02992           SET_ENCLOSE_STATUS(node, NST_MARK1);
02993           r = subexp_recursive_check(en->target);
02994           if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
02995           CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02996         }
02997       }
02998       r = subexp_recursive_check_trav(en->target, env);
02999       if (IS_ENCLOSE_CALLED(en))
03000         r |= FOUND_CALLED_NODE;
03001     }
03002     break;
03003 
03004   default:
03005     break;
03006   }
03007 
03008   return r;
03009 }
03010 
03011 static int
03012 setup_subexp_call(Node* node, ScanEnv* env)
03013 {
03014   int type;
03015   int r = 0;
03016 
03017   type = NTYPE(node);
03018   switch (type) {
03019   case NT_LIST:
03020     do {
03021       r = setup_subexp_call(NCAR(node), env);
03022     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03023     break;
03024 
03025   case NT_ALT:
03026     do {
03027       r = setup_subexp_call(NCAR(node), env);
03028     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03029     break;
03030 
03031   case NT_QTFR:
03032     r = setup_subexp_call(NQTFR(node)->target, env);
03033     break;
03034   case NT_ENCLOSE:
03035     r = setup_subexp_call(NENCLOSE(node)->target, env);
03036     break;
03037 
03038   case NT_CALL:
03039     {
03040       CallNode* cn = NCALL(node);
03041       Node** nodes = SCANENV_MEM_NODES(env);
03042 
03043       if (cn->group_num != 0) {
03044         int gnum = cn->group_num;
03045 
03046 #ifdef USE_NAMED_GROUP
03047         if (env->num_named > 0 &&
03048             IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03049             !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03050           return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03051         }
03052 #endif
03053         if (gnum > env->num_mem) {
03054           onig_scan_env_set_error_string(env,
03055                  ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03056           return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03057         }
03058 
03059 #ifdef USE_NAMED_GROUP
03060       set_call_attr:
03061 #endif
03062         cn->target = nodes[cn->group_num];
03063         if (IS_NULL(cn->target)) {
03064           onig_scan_env_set_error_string(env,
03065                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03066           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03067         }
03068         SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03069         BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03070         cn->unset_addr_list = env->unset_addr_list;
03071       }
03072 #ifdef USE_NAMED_GROUP
03073       else {
03074         int *refs;
03075 
03076         int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03077                                            &refs);
03078         if (n <= 0) {
03079           onig_scan_env_set_error_string(env,
03080                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03081           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03082         }
03083         else if (n > 1) {
03084           onig_scan_env_set_error_string(env,
03085             ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03086           return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03087         }
03088         else {
03089           cn->group_num = refs[0];
03090           goto set_call_attr;
03091         }
03092       }
03093 #endif
03094     }
03095     break;
03096 
03097   case NT_ANCHOR:
03098     {
03099       AnchorNode* an = NANCHOR(node);
03100 
03101       switch (an->type) {
03102       case ANCHOR_PREC_READ:
03103       case ANCHOR_PREC_READ_NOT:
03104       case ANCHOR_LOOK_BEHIND:
03105       case ANCHOR_LOOK_BEHIND_NOT:
03106         r = setup_subexp_call(an->target, env);
03107         break;
03108       }
03109     }
03110     break;
03111 
03112   default:
03113     break;
03114   }
03115 
03116   return r;
03117 }
03118 #endif
03119 
03120 /* divide different length alternatives in look-behind.
03121   (?<=A|B) ==> (?<=A)|(?<=B)
03122   (?<!A|B) ==> (?<!A)(?<!B)
03123 */
03124 static int
03125 divide_look_behind_alternatives(Node* node)
03126 {
03127   Node *head, *np, *insert_node;
03128   AnchorNode* an = NANCHOR(node);
03129   int anc_type = an->type;
03130 
03131   head = an->target;
03132   np = NCAR(head);
03133   swap_node(node, head);
03134   NCAR(node) = head;
03135   NANCHOR(head)->target = np;
03136 
03137   np = node;
03138   while ((np = NCDR(np)) != NULL_NODE) {
03139     insert_node = onig_node_new_anchor(anc_type);
03140     CHECK_NULL_RETURN_MEMERR(insert_node);
03141     NANCHOR(insert_node)->target = NCAR(np);
03142     NCAR(np) = insert_node;
03143   }
03144 
03145   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03146     np = node;
03147     do {
03148       SET_NTYPE(np, NT_LIST);  /* alt -> list */
03149     } while ((np = NCDR(np)) != NULL_NODE);
03150   }
03151   return 0;
03152 }
03153 
03154 static int
03155 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03156 {
03157   int r, len;
03158   AnchorNode* an = NANCHOR(node);
03159 
03160   r = get_char_length_tree(an->target, reg, &len);
03161   if (r == 0)
03162     an->char_len = len;
03163   else if (r == GET_CHAR_LEN_VARLEN)
03164     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03165   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03166     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03167       r = divide_look_behind_alternatives(node);
03168     else
03169       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03170   }
03171 
03172   return r;
03173 }
03174 
03175 static int
03176 next_setup(Node* node, Node* next_node, regex_t* reg)
03177 {
03178   int type;
03179 
03180  retry:
03181   type = NTYPE(node);
03182   if (type == NT_QTFR) {
03183     QtfrNode* qn = NQTFR(node);
03184     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03185 #ifdef USE_QTFR_PEEK_NEXT
03186       Node* n = get_head_value_node(next_node, 1, reg);
03187       /* '\0': for UTF-16BE etc... */
03188       if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03189         qn->next_head_exact = n;
03190       }
03191 #endif
03192       /* automatic posseivation a*b ==> (?>a*)b */
03193       if (qn->lower <= 1) {
03194         int ttype = NTYPE(qn->target);
03195         if (IS_NODE_TYPE_SIMPLE(ttype)) {
03196           Node *x, *y;
03197           x = get_head_value_node(qn->target, 0, reg);
03198           if (IS_NOT_NULL(x)) {
03199             y = get_head_value_node(next_node,  0, reg);
03200             if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03201               Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03202               CHECK_NULL_RETURN_MEMERR(en);
03203               SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03204               swap_node(node, en);
03205               NENCLOSE(node)->target = en;
03206             }
03207           }
03208         }
03209       }
03210     }
03211   }
03212   else if (type == NT_ENCLOSE) {
03213     EncloseNode* en = NENCLOSE(node);
03214     if (en->type == ENCLOSE_MEMORY) {
03215       node = en->target;
03216       goto retry;
03217     }
03218   }
03219   return 0;
03220 }
03221 
03222 
03223 static int
03224 update_string_node_case_fold(regex_t* reg, Node *node)
03225 {
03226   UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03227   UChar *sbuf, *ebuf, *sp;
03228   int r, i, len;
03229   OnigDistance sbuf_size;
03230   StrNode* sn = NSTR(node);
03231 
03232   end = sn->end;
03233   sbuf_size = (end - sn->s) * 2;
03234   sbuf = (UChar* )xmalloc(sbuf_size);
03235   CHECK_NULL_RETURN_MEMERR(sbuf);
03236   ebuf = sbuf + sbuf_size;
03237 
03238   sp = sbuf;
03239   p = sn->s;
03240   while (p < end) {
03241     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03242     q = buf;
03243     for (i = 0; i < len; i++) {
03244       if (sp >= ebuf) {
03245         sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03246         CHECK_NULL_RETURN_MEMERR(sbuf);
03247         sp = sbuf + sbuf_size;
03248         sbuf_size *= 2;
03249         ebuf = sbuf + sbuf_size;
03250       }
03251 
03252       *sp++ = buf[i];
03253     }
03254   }
03255 
03256   r = onig_node_str_set(node, sbuf, sp);
03257   if (r != 0) {
03258     xfree(sbuf);
03259     return r;
03260   }
03261 
03262   xfree(sbuf);
03263   return 0;
03264 }
03265 
03266 static int
03267 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03268                                  regex_t* reg)
03269 {
03270   int r;
03271   Node *node;
03272 
03273   node = onig_node_new_str(s, end);
03274   if (IS_NULL(node)) return ONIGERR_MEMORY;
03275 
03276   r = update_string_node_case_fold(reg, node);
03277   if (r != 0) {
03278     onig_node_free(node);
03279     return r;
03280   }
03281 
03282   NSTRING_SET_AMBIG(node);
03283   NSTRING_SET_DONT_GET_OPT_INFO(node);
03284   *rnode = node;
03285   return 0;
03286 }
03287 
03288 static int
03289 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03290                             UChar *p, int slen, UChar *end,
03291                             regex_t* reg, Node **rnode)
03292 {
03293   int r, i, j, len, varlen;
03294   Node *anode, *var_anode, *snode, *xnode, *an;
03295   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03296 
03297   *rnode = var_anode = NULL_NODE;
03298 
03299   varlen = 0;
03300   for (i = 0; i < item_num; i++) {
03301     if (items[i].byte_len != slen) {
03302       varlen = 1;
03303       break;
03304     }
03305   }
03306 
03307   if (varlen != 0) {
03308     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03309     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03310 
03311     xnode = onig_node_new_list(NULL, NULL);
03312     if (IS_NULL(xnode)) goto mem_err;
03313     NCAR(var_anode) = xnode;
03314 
03315     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03316     if (IS_NULL(anode)) goto mem_err;
03317     NCAR(xnode) = anode;
03318   }
03319   else {
03320     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03321     if (IS_NULL(anode)) return ONIGERR_MEMORY;
03322   }
03323 
03324   snode = onig_node_new_str(p, p + slen);
03325   if (IS_NULL(snode)) goto mem_err;
03326 
03327   NCAR(anode) = snode;
03328 
03329   for (i = 0; i < item_num; i++) {
03330     snode = onig_node_new_str(NULL, NULL);
03331     if (IS_NULL(snode)) goto mem_err;
03332 
03333     for (j = 0; j < items[i].code_len; j++) {
03334       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03335       if (len < 0) {
03336         r = len;
03337         goto mem_err2;
03338       }
03339 
03340       r = onig_node_str_cat(snode, buf, buf + len);
03341       if (r != 0) goto mem_err2;
03342     }
03343 
03344     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03345     if (IS_NULL(an)) {
03346       goto mem_err2;
03347     }
03348 
03349     if (items[i].byte_len != slen) {
03350       Node *rem;
03351       UChar *q = p + items[i].byte_len;
03352 
03353       if (q < end) {
03354         r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03355         if (r != 0) {
03356           onig_node_free(an);
03357           goto mem_err2;
03358         }
03359 
03360         xnode = onig_node_list_add(NULL_NODE, snode);
03361         if (IS_NULL(xnode)) {
03362           onig_node_free(an);
03363           onig_node_free(rem);
03364           goto mem_err2;
03365         }
03366         if (IS_NULL(onig_node_list_add(xnode, rem))) {
03367           onig_node_free(an);
03368           onig_node_free(xnode);
03369           onig_node_free(rem);
03370           goto mem_err;
03371         }
03372 
03373         NCAR(an) = xnode;
03374       }
03375       else {
03376         NCAR(an) = snode;
03377       }
03378 
03379       NCDR(var_anode) = an;
03380       var_anode = an;
03381     }
03382     else {
03383       NCAR(an)     = snode;
03384       NCDR(anode) = an;
03385       anode = an;
03386     }
03387   }
03388 
03389   return varlen;
03390 
03391  mem_err2:
03392   onig_node_free(snode);
03393 
03394  mem_err:
03395   onig_node_free(*rnode);
03396 
03397   return ONIGERR_MEMORY;
03398 }
03399 
03400 static int
03401 expand_case_fold_string(Node* node, regex_t* reg)
03402 {
03403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
03404 
03405   int r, n, len, alt_num;
03406   UChar *start, *end, *p;
03407   Node *top_root, *root, *snode, *prev_node;
03408   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03409   StrNode* sn = NSTR(node);
03410 
03411   if (NSTRING_IS_AMBIG(node)) return 0;
03412 
03413   start = sn->s;
03414   end   = sn->end;
03415   if (start >= end) return 0;
03416 
03417   r = 0;
03418   top_root = root = prev_node = snode = NULL_NODE;
03419   alt_num = 1;
03420   p = start;
03421   while (p < end) {
03422     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03423                                            p, end, items);
03424     if (n < 0) {
03425       r = n;
03426       goto err;
03427     }
03428 
03429     len = enclen(reg->enc, p, end);
03430 
03431     if (n == 0) {
03432       if (IS_NULL(snode)) {
03433         if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03434           top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03435           if (IS_NULL(root)) {
03436             onig_node_free(prev_node);
03437             goto mem_err;
03438           }
03439         }
03440 
03441         prev_node = snode = onig_node_new_str(NULL, NULL);
03442         if (IS_NULL(snode)) goto mem_err;
03443         if (IS_NOT_NULL(root)) {
03444           if (IS_NULL(onig_node_list_add(root, snode))) {
03445             onig_node_free(snode);
03446             goto mem_err;
03447           }
03448         }
03449       }
03450 
03451       r = onig_node_str_cat(snode, p, p + len);
03452       if (r != 0) goto err;
03453     }
03454     else {
03455       alt_num *= (n + 1);
03456       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03457 
03458       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03459         top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03460         if (IS_NULL(root)) {
03461           onig_node_free(prev_node);
03462           goto mem_err;
03463         }
03464       }
03465 
03466       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03467       if (r < 0) goto mem_err;
03468       if (r == 1) {
03469         if (IS_NULL(root)) {
03470           top_root = prev_node;
03471         }
03472         else {
03473           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03474             onig_node_free(prev_node);
03475             goto mem_err;
03476           }
03477         }
03478 
03479         root = NCAR(prev_node);
03480       }
03481       else { /* r == 0 */
03482         if (IS_NOT_NULL(root)) {
03483           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03484             onig_node_free(prev_node);
03485             goto mem_err;
03486           }
03487         }
03488       }
03489 
03490       snode = NULL_NODE;
03491     }
03492 
03493     p += len;
03494   }
03495 
03496   if (p < end) {
03497     Node *srem;
03498 
03499     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03500     if (r != 0) goto mem_err;
03501 
03502     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03503       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03504       if (IS_NULL(root)) {
03505         onig_node_free(srem);
03506         onig_node_free(prev_node);
03507         goto mem_err;
03508       }
03509     }
03510 
03511     if (IS_NULL(root)) {
03512       prev_node = srem;
03513     }
03514     else {
03515       if (IS_NULL(onig_node_list_add(root, srem))) {
03516         onig_node_free(srem);
03517         goto mem_err;
03518       }
03519     }
03520   }
03521 
03522   /* ending */
03523   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03524   swap_node(node, top_root);
03525   onig_node_free(top_root);
03526   return 0;
03527 
03528  mem_err:
03529   r = ONIGERR_MEMORY;
03530 
03531  err:
03532   onig_node_free(top_root);
03533   return r;
03534 }
03535 
03536 
03537 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03538 
03539 #define CEC_THRES_NUM_BIG_REPEAT         512
03540 #define CEC_INFINITE_NUM          0x7fffffff
03541 
03542 #define CEC_IN_INFINITE_REPEAT    (1<<0)
03543 #define CEC_IN_FINITE_REPEAT      (1<<1)
03544 #define CEC_CONT_BIG_REPEAT       (1<<2)
03545 
03546 static int
03547 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03548 {
03549   int type;
03550   int r = state;
03551 
03552   type = NTYPE(node);
03553   switch (type) {
03554   case NT_LIST:
03555     {
03556       Node* prev = NULL_NODE;
03557       do {
03558         r = setup_comb_exp_check(NCAR(node), r, env);
03559         prev = NCAR(node);
03560       } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03561     }
03562     break;
03563 
03564   case NT_ALT:
03565     {
03566       int ret;
03567       do {
03568         ret = setup_comb_exp_check(NCAR(node), state, env);
03569         r |= ret;
03570       } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03571     }
03572     break;
03573 
03574   case NT_QTFR:
03575     {
03576       int child_state = state;
03577       int add_state = 0;
03578       QtfrNode* qn = NQTFR(node);
03579       Node* target = qn->target;
03580       int var_num;
03581 
03582       if (! IS_REPEAT_INFINITE(qn->upper)) {
03583         if (qn->upper > 1) {
03584           /* {0,1}, {1,1} are allowed */
03585           child_state |= CEC_IN_FINITE_REPEAT;
03586 
03587           /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
03588           if (env->backrefed_mem == 0) {
03589             if (NTYPE(qn->target) == NT_ENCLOSE) {
03590               EncloseNode* en = NENCLOSE(qn->target);
03591               if (en->type == ENCLOSE_MEMORY) {
03592                 if (NTYPE(en->target) == NT_QTFR) {
03593                   QtfrNode* q = NQTFR(en->target);
03594                   if (IS_REPEAT_INFINITE(q->upper)
03595                       && q->greedy == qn->greedy) {
03596                     qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03597                     if (qn->upper == 1)
03598                       child_state = state;
03599                   }
03600                 }
03601               }
03602             }
03603           }
03604         }
03605       }
03606 
03607       if (state & CEC_IN_FINITE_REPEAT) {
03608         qn->comb_exp_check_num = -1;
03609       }
03610       else {
03611         if (IS_REPEAT_INFINITE(qn->upper)) {
03612           var_num = CEC_INFINITE_NUM;
03613           child_state |= CEC_IN_INFINITE_REPEAT;
03614         }
03615         else {
03616           var_num = qn->upper - qn->lower;
03617         }
03618 
03619         if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03620           add_state |= CEC_CONT_BIG_REPEAT;
03621 
03622         if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03623             ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03624              var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03625           if (qn->comb_exp_check_num == 0) {
03626             env->num_comb_exp_check++;
03627             qn->comb_exp_check_num = env->num_comb_exp_check;
03628             if (env->curr_max_regnum > env->comb_exp_max_regnum)
03629               env->comb_exp_max_regnum = env->curr_max_regnum;
03630           }
03631         }
03632       }
03633 
03634       r = setup_comb_exp_check(target, child_state, env);
03635       r |= add_state;
03636     }
03637     break;
03638 
03639   case NT_ENCLOSE:
03640     {
03641       EncloseNode* en = NENCLOSE(node);
03642 
03643       switch (en->type) {
03644       case ENCLOSE_MEMORY:
03645         {
03646           if (env->curr_max_regnum < en->regnum)
03647             env->curr_max_regnum = en->regnum;
03648 
03649           r = setup_comb_exp_check(en->target, state, env);
03650         }
03651         break;
03652 
03653       default:
03654         r = setup_comb_exp_check(en->target, state, env);
03655         break;
03656       }
03657     }
03658     break;
03659 
03660 #ifdef USE_SUBEXP_CALL
03661   case NT_CALL:
03662     if (IS_CALL_RECURSION(NCALL(node)))
03663       env->has_recursion = 1;
03664     else
03665       r = setup_comb_exp_check(NCALL(node)->target, state, env);
03666     break;
03667 #endif
03668 
03669   default:
03670     break;
03671   }
03672 
03673   return r;
03674 }
03675 #endif
03676 
03677 #define IN_ALT        (1<<0)
03678 #define IN_NOT        (1<<1)
03679 #define IN_REPEAT     (1<<2)
03680 #define IN_VAR_REPEAT (1<<3)
03681 
03682 /* setup_tree does the following work.
03683  1. check empty loop. (set qn->target_empty_info)
03684  2. expand ignore-case in char class.
03685  3. set memory status bit flags. (reg->mem_stats)
03686  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
03687  5. find invalid patterns in look-behind.
03688  6. expand repeated string.
03689  */
03690 static int
03691 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03692 {
03693   int type;
03694   int r = 0;
03695 
03696 restart:
03697   type = NTYPE(node);
03698   switch (type) {
03699   case NT_LIST:
03700     {
03701       Node* prev = NULL_NODE;
03702       do {
03703         r = setup_tree(NCAR(node), reg, state, env);
03704         if (IS_NOT_NULL(prev) && r == 0) {
03705           r = next_setup(prev, NCAR(node), reg);
03706         }
03707         prev = NCAR(node);
03708       } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03709     }
03710     break;
03711 
03712   case NT_ALT:
03713     do {
03714       r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03715     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03716     break;
03717 
03718   case NT_CCLASS:
03719     break;
03720 
03721   case NT_STR:
03722     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03723       r = expand_case_fold_string(node, reg);
03724     }
03725     break;
03726 
03727   case NT_CTYPE:
03728   case NT_CANY:
03729     break;
03730 
03731 #ifdef USE_SUBEXP_CALL
03732   case NT_CALL:
03733     break;
03734 #endif
03735 
03736   case NT_BREF:
03737     {
03738       int i;
03739       int* p;
03740       Node** nodes = SCANENV_MEM_NODES(env);
03741       BRefNode* br = NBREF(node);
03742       p = BACKREFS_P(br);
03743       for (i = 0; i < br->back_num; i++) {
03744         if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
03745         BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03746         BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03747 #ifdef USE_BACKREF_WITH_LEVEL
03748         if (IS_BACKREF_NEST_LEVEL(br)) {
03749           BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03750         }
03751 #endif
03752         SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03753       }
03754     }
03755     break;
03756 
03757   case NT_QTFR:
03758     {
03759       OnigDistance d;
03760       QtfrNode* qn = NQTFR(node);
03761       Node* target = qn->target;
03762 
03763       if ((state & IN_REPEAT) != 0) {
03764         qn->state |= NST_IN_REPEAT;
03765       }
03766 
03767       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03768         r = get_min_match_length(target, &d, env);
03769         if (r) break;
03770         if (d == 0) {
03771           qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03773           r = quantifiers_memory_node_info(target);
03774           if (r < 0) break;
03775           if (r > 0) {
03776             qn->target_empty_info = r;
03777           }
03778 #endif
03779 #if 0
03780           r = get_max_match_length(target, &d, env);
03781           if (r == 0 && d == 0) {
03782             /*  ()* ==> ()?, ()+ ==> ()  */
03783             qn->upper = 1;
03784             if (qn->lower > 1) qn->lower = 1;
03785             if (NTYPE(target) == NT_STR) {
03786               qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
03787             }
03788           }
03789 #endif
03790         }
03791       }
03792 
03793       state |= IN_REPEAT;
03794       if (qn->lower != qn->upper)
03795         state |= IN_VAR_REPEAT;
03796       r = setup_tree(target, reg, state, env);
03797       if (r) break;
03798 
03799       /* expand string */
03800 #define EXPAND_STRING_MAX_LENGTH  100
03801       if (NTYPE(target) == NT_STR) {
03802         if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
03803             qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03804           OnigDistance len = NSTRING_LEN(target);
03805           StrNode* sn = NSTR(target);
03806 
03807           if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03808             int i, n = qn->lower;
03809             onig_node_conv_to_str_node(node, NSTR(target)->flag);
03810             for (i = 0; i < n; i++) {
03811               r = onig_node_str_cat(node, sn->s, sn->end);
03812               if (r) break;
03813             }
03814             onig_node_free(target);
03815             break; /* break case NT_QTFR: */
03816           }
03817         }
03818       }
03819 
03820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
03821       if (qn->greedy && (qn->target_empty_info != 0)) {
03822         if (NTYPE(target) == NT_QTFR) {
03823           QtfrNode* tqn = NQTFR(target);
03824           if (IS_NOT_NULL(tqn->head_exact)) {
03825             qn->head_exact  = tqn->head_exact;
03826             tqn->head_exact = NULL;
03827           }
03828         }
03829         else {
03830           qn->head_exact = get_head_value_node(qn->target, 1, reg);
03831         }
03832       }
03833 #endif
03834     }
03835     break;
03836 
03837   case NT_ENCLOSE:
03838     {
03839       EncloseNode* en = NENCLOSE(node);
03840 
03841       switch (en->type) {
03842       case ENCLOSE_OPTION:
03843         {
03844           OnigOptionType options = reg->options;
03845           reg->options = NENCLOSE(node)->option;
03846           r = setup_tree(NENCLOSE(node)->target, reg, state, env);
03847           reg->options = options;
03848         }
03849         break;
03850 
03851       case ENCLOSE_MEMORY:
03852         if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
03853           BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
03854           /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
03855         }
03856         r = setup_tree(en->target, reg, state, env);
03857         break;
03858 
03859       case ENCLOSE_STOP_BACKTRACK:
03860         {
03861           Node* target = en->target;
03862           r = setup_tree(target, reg, state, env);
03863           if (NTYPE(target) == NT_QTFR) {
03864             QtfrNode* tqn = NQTFR(target);
03865             if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
03866                 tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
03867               int qtype = NTYPE(tqn->target);
03868               if (IS_NODE_TYPE_SIMPLE(qtype))
03869                 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
03870             }
03871           }
03872         }
03873         break;
03874       }
03875     }
03876     break;
03877 
03878   case NT_ANCHOR:
03879     {
03880       AnchorNode* an = NANCHOR(node);
03881 
03882       switch (an->type) {
03883       case ANCHOR_PREC_READ:
03884         r = setup_tree(an->target, reg, state, env);
03885         break;
03886       case ANCHOR_PREC_READ_NOT:
03887         r = setup_tree(an->target, reg, (state | IN_NOT), env);
03888         break;
03889 
03890 /* allowed node types in look-behind */
03891 #define ALLOWED_TYPE_IN_LB  \
03892   ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
03893     BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
03894 
03895 #define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY )
03896 #define ALLOWED_ENCLOSE_IN_LB_NOT   0
03897 
03898 #define ALLOWED_ANCHOR_IN_LB \
03899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03900 #define ALLOWED_ANCHOR_IN_LB_NOT \
03901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03902 
03903       case ANCHOR_LOOK_BEHIND:
03904         {
03905           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03906                               ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
03907           if (r < 0) return r;
03908           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03909           r = setup_look_behind(node, reg, env);
03910           if (r != 0) return r;
03911           if (NTYPE(node) != NT_ANCHOR) goto restart;
03912           r = setup_tree(an->target, reg, state, env);
03913         }
03914         break;
03915 
03916       case ANCHOR_LOOK_BEHIND_NOT:
03917         {
03918           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03919                       ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
03920           if (r < 0) return r;
03921           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03922           r = setup_look_behind(node, reg, env);
03923           if (r != 0) return r;
03924           if (NTYPE(node) != NT_ANCHOR) goto restart;
03925           r = setup_tree(an->target, reg, (state | IN_NOT), env);
03926         }
03927         break;
03928       }
03929     }
03930     break;
03931 
03932   default:
03933     break;
03934   }
03935 
03936   return r;
03937 }
03938 
03939 /* set skip map for Boyer-Moor search */
03940 static int
03941 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
03942             UChar skip[], int** int_skip)
03943 {
03944   OnigDistance i, len;
03945 
03946   len = end - s;
03947   if (len < ONIG_CHAR_TABLE_SIZE) {
03948     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
03949 
03950     for (i = 0; i < len - 1; i++)
03951       skip[s[i]] = len - 1 - i;
03952   }
03953   else {
03954     if (IS_NULL(*int_skip)) {
03955       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03956       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
03957     }
03958     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int)len;
03959 
03960     for (i = 0; i < len - 1; i++)
03961       (*int_skip)[s[i]] = (int)(len - 1 - i);
03962   }
03963   return 0;
03964 }
03965 
03966 #define OPT_EXACT_MAXLEN   24
03967 
03968 typedef struct {
03969   OnigDistance min;  /* min byte length */
03970   OnigDistance max;  /* max byte length */
03971 } MinMaxLen;
03972 
03973 typedef struct {
03974   MinMaxLen        mmd;
03975   OnigEncoding     enc;
03976   OnigOptionType   options;
03977   OnigCaseFoldType case_fold_flag;
03978   ScanEnv*         scan_env;
03979 } OptEnv;
03980 
03981 typedef struct {
03982   int left_anchor;
03983   int right_anchor;
03984 } OptAncInfo;
03985 
03986 typedef struct {
03987   MinMaxLen  mmd; /* info position */
03988   OptAncInfo anc;
03989 
03990   int   reach_end;
03991   int   ignore_case;
03992   int   len;
03993   UChar s[OPT_EXACT_MAXLEN];
03994 } OptExactInfo;
03995 
03996 typedef struct {
03997   MinMaxLen mmd; /* info position */
03998   OptAncInfo anc;
03999 
04000   int   value;      /* weighted value */
04001   UChar map[ONIG_CHAR_TABLE_SIZE];
04002 } OptMapInfo;
04003 
04004 typedef struct {
04005   MinMaxLen    len;
04006 
04007   OptAncInfo   anc;
04008   OptExactInfo exb;    /* boundary */
04009   OptExactInfo exm;    /* middle */
04010   OptExactInfo expr;   /* prec read (?=...) */
04011 
04012   OptMapInfo   map;   /* boundary */
04013 } NodeOptInfo;
04014 
04015 
04016 static int
04017 map_position_value(OnigEncoding enc, int i)
04018 {
04019   static const short int ByteValTable[] = {
04020      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
04021      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
04022     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
04023      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
04024      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04025      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
04026      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04027      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
04028   };
04029 
04030   if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04031     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04032       return 20;
04033     else
04034       return (int )ByteValTable[i];
04035   }
04036   else
04037     return 4;   /* Take it easy. */
04038 }
04039 
04040 static int
04041 distance_value(MinMaxLen* mm)
04042 {
04043   /* 1000 / (min-max-dist + 1) */
04044   static const short int dist_vals[] = {
04045     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
04046       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
04047       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
04048       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
04049       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
04050       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
04051       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
04052       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
04053       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
04054       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
04055   };
04056 
04057   OnigDistance d;
04058 
04059   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04060 
04061   d = mm->max - mm->min;
04062   if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
04063     /* return dist_vals[d] * 16 / (mm->min + 12); */
04064     return (int )dist_vals[d];
04065   else
04066     return 1;
04067 }
04068 
04069 static int
04070 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04071 {
04072   if (v2 <= 0) return -1;
04073   if (v1 <= 0) return  1;
04074 
04075   v1 *= distance_value(d1);
04076   v2 *= distance_value(d2);
04077 
04078   if (v2 > v1) return  1;
04079   if (v2 < v1) return -1;
04080 
04081   if (d2->min < d1->min) return  1;
04082   if (d2->min > d1->min) return -1;
04083   return 0;
04084 }
04085 
04086 static int
04087 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04088 {
04089   return (a->min == b->min && a->max == b->max) ? 1 : 0;
04090 }
04091 
04092 
04093 static void
04094 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04095 {
04096   mml->min = min;
04097   mml->max = max;
04098 }
04099 
04100 static void
04101 clear_mml(MinMaxLen* mml)
04102 {
04103   mml->min = mml->max = 0;
04104 }
04105 
04106 static void
04107 copy_mml(MinMaxLen* to, MinMaxLen* from)
04108 {
04109   to->min = from->min;
04110   to->max = from->max;
04111 }
04112 
04113 static void
04114 add_mml(MinMaxLen* to, MinMaxLen* from)
04115 {
04116   to->min = distance_add(to->min, from->min);
04117   to->max = distance_add(to->max, from->max);
04118 }
04119 
04120 #if 0
04121 static void
04122 add_len_mml(MinMaxLen* to, OnigDistance len)
04123 {
04124   to->min = distance_add(to->min, len);
04125   to->max = distance_add(to->max, len);
04126 }
04127 #endif
04128 
04129 static void
04130 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04131 {
04132   if (to->min > from->min) to->min = from->min;
04133   if (to->max < from->max) to->max = from->max;
04134 }
04135 
04136 static void
04137 copy_opt_env(OptEnv* to, OptEnv* from)
04138 {
04139   *to = *from;
04140 }
04141 
04142 static void
04143 clear_opt_anc_info(OptAncInfo* anc)
04144 {
04145   anc->left_anchor  = 0;
04146   anc->right_anchor = 0;
04147 }
04148 
04149 static void
04150 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04151 {
04152   *to = *from;
04153 }
04154 
04155 static void
04156 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04157                     OnigDistance left_len, OnigDistance right_len)
04158 {
04159   clear_opt_anc_info(to);
04160 
04161   to->left_anchor = left->left_anchor;
04162   if (left_len == 0) {
04163     to->left_anchor |= right->left_anchor;
04164   }
04165 
04166   to->right_anchor = right->right_anchor;
04167   if (right_len == 0) {
04168     to->right_anchor |= left->right_anchor;
04169   }
04170 }
04171 
04172 static int
04173 is_left_anchor(int anc)
04174 {
04175   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04176       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04177       anc == ANCHOR_PREC_READ_NOT)
04178     return 0;
04179 
04180   return 1;
04181 }
04182 
04183 static int
04184 is_set_opt_anc_info(OptAncInfo* to, int anc)
04185 {
04186   if ((to->left_anchor & anc) != 0) return 1;
04187 
04188   return ((to->right_anchor & anc) != 0 ? 1 : 0);
04189 }
04190 
04191 static void
04192 add_opt_anc_info(OptAncInfo* to, int anc)
04193 {
04194   if (is_left_anchor(anc))
04195     to->left_anchor |= anc;
04196   else
04197     to->right_anchor |= anc;
04198 }
04199 
04200 static void
04201 remove_opt_anc_info(OptAncInfo* to, int anc)
04202 {
04203   if (is_left_anchor(anc))
04204     to->left_anchor &= ~anc;
04205   else
04206     to->right_anchor &= ~anc;
04207 }
04208 
04209 static void
04210 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04211 {
04212   to->left_anchor  &= add->left_anchor;
04213   to->right_anchor &= add->right_anchor;
04214 }
04215 
04216 static int
04217 is_full_opt_exact_info(OptExactInfo* ex)
04218 {
04219   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04220 }
04221 
04222 static void
04223 clear_opt_exact_info(OptExactInfo* ex)
04224 {
04225   clear_mml(&ex->mmd);
04226   clear_opt_anc_info(&ex->anc);
04227   ex->reach_end   = 0;
04228   ex->ignore_case = 0;
04229   ex->len         = 0;
04230   ex->s[0]        = '\0';
04231 }
04232 
04233 static void
04234 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04235 {
04236   *to = *from;
04237 }
04238 
04239 static void
04240 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04241 {
04242   int i, j, len;
04243   UChar *p, *end;
04244   OptAncInfo tanc;
04245 
04246   if (! to->ignore_case && add->ignore_case) {
04247     if (to->len >= add->len) return ;  /* avoid */
04248 
04249     to->ignore_case = 1;
04250   }
04251 
04252   p = add->s;
04253   end = p + add->len;
04254   for (i = to->len; p < end; ) {
04255     len = enclen(enc, p, end);
04256     if (i + len > OPT_EXACT_MAXLEN) break;
04257     for (j = 0; j < len && p < end; j++)
04258       to->s[i++] = *p++;
04259   }
04260 
04261   to->len = i;
04262   to->reach_end = (p == end ? add->reach_end : 0);
04263 
04264   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04265   if (! to->reach_end) tanc.right_anchor = 0;
04266   copy_opt_anc_info(&to->anc, &tanc);
04267 }
04268 
04269 static void
04270 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04271                           int raw ARG_UNUSED, OnigEncoding enc)
04272 {
04273   int i, j, len;
04274   UChar *p;
04275 
04276   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04277     len = enclen(enc, p, end);
04278     if (i + len > OPT_EXACT_MAXLEN) break;
04279     for (j = 0; j < len && p < end; j++)
04280       to->s[i++] = *p++;
04281   }
04282 
04283   to->len = i;
04284 }
04285 
04286 static void
04287 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04288 {
04289   int i, j, len;
04290 
04291   if (add->len == 0 || to->len == 0) {
04292     clear_opt_exact_info(to);
04293     return ;
04294   }
04295 
04296   if (! is_equal_mml(&to->mmd, &add->mmd)) {
04297     clear_opt_exact_info(to);
04298     return ;
04299   }
04300 
04301   for (i = 0; i < to->len && i < add->len; ) {
04302     if (to->s[i] != add->s[i]) break;
04303     len = enclen(env->enc, to->s + i, to->s + to->len);
04304 
04305     for (j = 1; j < len; j++) {
04306       if (to->s[i+j] != add->s[i+j]) break;
04307     }
04308     if (j < len) break;
04309     i += len;
04310   }
04311 
04312   if (! add->reach_end || i < add->len || i < to->len) {
04313     to->reach_end = 0;
04314   }
04315   to->len = i;
04316   to->ignore_case |= add->ignore_case;
04317 
04318   alt_merge_opt_anc_info(&to->anc, &add->anc);
04319   if (! to->reach_end) to->anc.right_anchor = 0;
04320 }
04321 
04322 static void
04323 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04324 {
04325   int v1, v2;
04326 
04327   v1 = now->len;
04328   v2 = alt->len;
04329 
04330   if (v2 == 0) {
04331     return ;
04332   }
04333   else if (v1 == 0) {
04334     copy_opt_exact_info(now, alt);
04335     return ;
04336   }
04337   else if (v1 <= 2 && v2 <= 2) {
04338     /* ByteValTable[x] is big value --> low price */
04339     v2 = map_position_value(enc, now->s[0]);
04340     v1 = map_position_value(enc, alt->s[0]);
04341 
04342     if (now->len > 1) v1 += 5;
04343     if (alt->len > 1) v2 += 5;
04344   }
04345 
04346   if (now->ignore_case == 0) v1 *= 2;
04347   if (alt->ignore_case == 0) v2 *= 2;
04348 
04349   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04350     copy_opt_exact_info(now, alt);
04351 }
04352 
04353 static void
04354 clear_opt_map_info(OptMapInfo* map)
04355 {
04356   static const OptMapInfo clean_info = {
04357     {0, 0}, {0, 0}, 0,
04358     {
04359       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04360       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04361       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04362       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04363       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04364       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04365       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04366       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04367       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04368       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04369       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04370       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04371       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04372       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04373       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04374       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04375     }
04376   };
04377 
04378   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04379 }
04380 
04381 static void
04382 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04383 {
04384   *to = *from;
04385 }
04386 
04387 static void
04388 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04389 {
04390   if (map->map[c] == 0) {
04391     map->map[c] = 1;
04392     map->value += map_position_value(enc, c);
04393   }
04394 }
04395 
04396 static int
04397 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04398                           OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04399 {
04400   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04401   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04402   int i, n;
04403 
04404   add_char_opt_map_info(map, p[0], enc);
04405 
04406   case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04407   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04408   if (n < 0) return n;
04409 
04410   for (i = 0; i < n; i++) {
04411     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04412     add_char_opt_map_info(map, buf[0], enc);
04413   }
04414 
04415   return 0;
04416 }
04417 
04418 static void
04419 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04420 {
04421   const int z = 1<<15; /* 32768: something big value */
04422 
04423   int v1, v2;
04424 
04425   if (alt->value == 0) return ;
04426   if (now->value == 0) {
04427     copy_opt_map_info(now, alt);
04428     return ;
04429   }
04430 
04431   v1 = z / now->value;
04432   v2 = z / alt->value;
04433   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04434     copy_opt_map_info(now, alt);
04435 }
04436 
04437 static int
04438 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04439 {
04440 #define COMP_EM_BASE  20
04441   int ve, vm;
04442 
04443   if (m->value <= 0) return -1;
04444 
04445   ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
04446   vm = COMP_EM_BASE * 5 * 2 / m->value;
04447   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04448 }
04449 
04450 static void
04451 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04452 {
04453   int i, val;
04454 
04455   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
04456   if (to->value == 0) return ;
04457   if (add->value == 0 || to->mmd.max < add->mmd.min) {
04458     clear_opt_map_info(to);
04459     return ;
04460   }
04461 
04462   alt_merge_mml(&to->mmd, &add->mmd);
04463 
04464   val = 0;
04465   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04466     if (add->map[i])
04467       to->map[i] = 1;
04468 
04469     if (to->map[i])
04470       val += map_position_value(enc, i);
04471   }
04472   to->value = val;
04473 
04474   alt_merge_opt_anc_info(&to->anc, &add->anc);
04475 }
04476 
04477 static void
04478 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04479 {
04480   copy_mml(&(opt->exb.mmd),  mmd);
04481   copy_mml(&(opt->expr.mmd), mmd);
04482   copy_mml(&(opt->map.mmd),  mmd);
04483 }
04484 
04485 static void
04486 clear_node_opt_info(NodeOptInfo* opt)
04487 {
04488   clear_mml(&opt->len);
04489   clear_opt_anc_info(&opt->anc);
04490   clear_opt_exact_info(&opt->exb);
04491   clear_opt_exact_info(&opt->exm);
04492   clear_opt_exact_info(&opt->expr);
04493   clear_opt_map_info(&opt->map);
04494 }
04495 
04496 static void
04497 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04498 {
04499   *to = *from;
04500 }
04501 
04502 static void
04503 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04504 {
04505   int exb_reach, exm_reach;
04506   OptAncInfo tanc;
04507 
04508   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04509   copy_opt_anc_info(&to->anc, &tanc);
04510 
04511   if (add->exb.len > 0 && to->len.max == 0) {
04512     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04513                         to->len.max, add->len.max);
04514     copy_opt_anc_info(&add->exb.anc, &tanc);
04515   }
04516 
04517   if (add->map.value > 0 && to->len.max == 0) {
04518     if (add->map.mmd.max == 0)
04519       add->map.anc.left_anchor |= to->anc.left_anchor;
04520   }
04521 
04522   exb_reach = to->exb.reach_end;
04523   exm_reach = to->exm.reach_end;
04524 
04525   if (add->len.max != 0)
04526     to->exb.reach_end = to->exm.reach_end = 0;
04527 
04528   if (add->exb.len > 0) {
04529     if (exb_reach) {
04530       concat_opt_exact_info(&to->exb, &add->exb, enc);
04531       clear_opt_exact_info(&add->exb);
04532     }
04533     else if (exm_reach) {
04534       concat_opt_exact_info(&to->exm, &add->exb, enc);
04535       clear_opt_exact_info(&add->exb);
04536     }
04537   }
04538   select_opt_exact_info(enc, &to->exm, &add->exb);
04539   select_opt_exact_info(enc, &to->exm, &add->exm);
04540 
04541   if (to->expr.len > 0) {
04542     if (add->len.max > 0) {
04543       if (to->expr.len > (int )add->len.max)
04544         to->expr.len = (int)add->len.max;
04545 
04546       if (to->expr.mmd.max == 0)
04547         select_opt_exact_info(enc, &to->exb, &to->expr);
04548       else
04549         select_opt_exact_info(enc, &to->exm, &to->expr);
04550     }
04551   }
04552   else if (add->expr.len > 0) {
04553     copy_opt_exact_info(&to->expr, &add->expr);
04554   }
04555 
04556   select_opt_map_info(&to->map, &add->map);
04557 
04558   add_mml(&to->len, &add->len);
04559 }
04560 
04561 static void
04562 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04563 {
04564   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
04565   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
04566   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
04567   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04568   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
04569 
04570   alt_merge_mml(&to->len, &add->len);
04571 }
04572 
04573 
04574 #define MAX_NODE_OPT_INFO_REF_COUNT    5
04575 
04576 static int
04577 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04578 {
04579   int type;
04580   int r = 0;
04581 
04582   clear_node_opt_info(opt);
04583   set_bound_node_opt_info(opt, &env->mmd);
04584 
04585   type = NTYPE(node);
04586   switch (type) {
04587   case NT_LIST:
04588     {
04589       OptEnv nenv;
04590       NodeOptInfo nopt;
04591       Node* nd = node;
04592 
04593       copy_opt_env(&nenv, env);
04594       do {
04595         r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04596         if (r == 0) {
04597           add_mml(&nenv.mmd, &nopt.len);
04598           concat_left_node_opt_info(env->enc, opt, &nopt);
04599         }
04600       } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04601     }
04602     break;
04603 
04604   case NT_ALT:
04605     {
04606       NodeOptInfo nopt;
04607       Node* nd = node;
04608 
04609       do {
04610         r = optimize_node_left(NCAR(nd), &nopt, env);
04611         if (r == 0) {
04612           if (nd == node) copy_node_opt_info(opt, &nopt);
04613           else            alt_merge_node_opt_info(opt, &nopt, env);
04614         }
04615       } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04616     }
04617     break;
04618 
04619   case NT_STR:
04620     {
04621       StrNode* sn = NSTR(node);
04622       OnigDistance slen = sn->end - sn->s;
04623       int is_raw = NSTRING_IS_RAW(node);
04624 
04625       if (! NSTRING_IS_AMBIG(node)) {
04626         concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04627                                   NSTRING_IS_RAW(node), env->enc);
04628         if (slen > 0) {
04629           add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04630         }
04631         set_mml(&opt->len, slen, slen);
04632       }
04633       else {
04634         OnigDistance max;
04635 
04636         if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04637           int n = onigenc_strlen(env->enc, sn->s, sn->end);
04638           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04639         }
04640         else {
04641           concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04642                                     is_raw, env->enc);
04643           opt->exb.ignore_case = 1;
04644 
04645           if (slen > 0) {
04646             r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04647                                           env->enc, env->case_fold_flag);
04648             if (r != 0) break;
04649           }
04650 
04651           max = slen;
04652         }
04653 
04654         set_mml(&opt->len, slen, max);
04655       }
04656 
04657       if ((OnigDistance)opt->exb.len == slen)
04658         opt->exb.reach_end = 1;
04659     }
04660     break;
04661 
04662   case NT_CCLASS:
04663     {
04664       int i, z;
04665       CClassNode* cc = NCCLASS(node);
04666 
04667       /* no need to check ignore case. (setted in setup_tree()) */
04668 
04669       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
04670         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04671         OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04672 
04673         set_mml(&opt->len, min, max);
04674       }
04675       else {
04676         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04677           z = BITSET_AT(cc->bs, i);
04678           if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
04679             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04680           }
04681         }
04682         set_mml(&opt->len, 1, 1);
04683       }
04684     }
04685     break;
04686 
04687   case NT_CTYPE:
04688     {
04689       int i, min, max;
04690 
04691       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04692 
04693       if (max == 1) {
04694         min = 1;
04695 
04696         switch (NCTYPE(node)->ctype) {
04697         case ONIGENC_CTYPE_WORD:
04698           if (NCTYPE(node)->not != 0) {
04699             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04700               if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
04701                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04702               }
04703             }
04704           }
04705           else {
04706             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04707               if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
04708                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04709               }
04710             }
04711           }
04712           break;
04713         }
04714       }
04715       else {
04716         min = ONIGENC_MBC_MINLEN(env->enc);
04717       }
04718       set_mml(&opt->len, min, max);
04719     }
04720     break;
04721 
04722   case NT_CANY:
04723     {
04724       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04725       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04726       set_mml(&opt->len, min, max);
04727     }
04728     break;
04729 
04730   case NT_ANCHOR:
04731     switch (NANCHOR(node)->type) {
04732     case ANCHOR_BEGIN_BUF:
04733     case ANCHOR_BEGIN_POSITION:
04734     case ANCHOR_BEGIN_LINE:
04735     case ANCHOR_END_BUF:
04736     case ANCHOR_SEMI_END_BUF:
04737     case ANCHOR_END_LINE:
04738       add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
04739       break;
04740 
04741     case ANCHOR_PREC_READ:
04742       {
04743         NodeOptInfo nopt;
04744 
04745         r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
04746         if (r == 0) {
04747           if (nopt.exb.len > 0)
04748             copy_opt_exact_info(&opt->expr, &nopt.exb);
04749           else if (nopt.exm.len > 0)
04750             copy_opt_exact_info(&opt->expr, &nopt.exm);
04751 
04752           opt->expr.reach_end = 0;
04753 
04754           if (nopt.map.value > 0)
04755             copy_opt_map_info(&opt->map, &nopt.map);
04756         }
04757       }
04758       break;
04759 
04760     case ANCHOR_PREC_READ_NOT:
04761     case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
04762     case ANCHOR_LOOK_BEHIND_NOT:
04763       break;
04764     }
04765     break;
04766 
04767   case NT_BREF:
04768     {
04769       int i;
04770       int* backs;
04771       OnigDistance min, max, tmin, tmax;
04772       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
04773       BRefNode* br = NBREF(node);
04774 
04775       if (br->state & NST_RECURSION) {
04776         set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04777         break;
04778       }
04779       backs = BACKREFS_P(br);
04780       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
04781       if (r != 0) break;
04782       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
04783       if (r != 0) break;
04784       for (i = 1; i < br->back_num; i++) {
04785         r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
04786         if (r != 0) break;
04787         r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
04788         if (r != 0) break;
04789         if (min > tmin) min = tmin;
04790         if (max < tmax) max = tmax;
04791       }
04792       if (r == 0) set_mml(&opt->len, min, max);
04793     }
04794     break;
04795 
04796 #ifdef USE_SUBEXP_CALL
04797   case NT_CALL:
04798     if (IS_CALL_RECURSION(NCALL(node)))
04799       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04800     else {
04801       OnigOptionType save = env->options;
04802       env->options = NENCLOSE(NCALL(node)->target)->option;
04803       r = optimize_node_left(NCALL(node)->target, opt, env);
04804       env->options = save;
04805     }
04806     break;
04807 #endif
04808 
04809   case NT_QTFR:
04810     {
04811       int i;
04812       OnigDistance min, max;
04813       NodeOptInfo nopt;
04814       QtfrNode* qn = NQTFR(node);
04815 
04816       r = optimize_node_left(qn->target, &nopt, env);
04817       if (r) break;
04818 
04819       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
04820         if (env->mmd.max == 0 &&
04821             NTYPE(qn->target) == NT_CANY && qn->greedy) {
04822           if (IS_MULTILINE(env->options))
04823             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
04824           else
04825             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
04826         }
04827       }
04828       else {
04829         if (qn->lower > 0) {
04830           copy_node_opt_info(opt, &nopt);
04831           if (nopt.exb.len > 0) {
04832             if (nopt.exb.reach_end) {
04833               for (i = 2; i <= qn->lower &&
04834                           ! is_full_opt_exact_info(&opt->exb); i++) {
04835                 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
04836               }
04837               if (i < qn->lower) {
04838                 opt->exb.reach_end = 0;
04839               }
04840             }
04841           }
04842 
04843           if (qn->lower != qn->upper) {
04844             opt->exb.reach_end = 0;
04845             opt->exm.reach_end = 0;
04846           }
04847           if (qn->lower > 1)
04848             opt->exm.reach_end = 0;
04849         }
04850       }
04851 
04852       min = distance_multiply(nopt.len.min, qn->lower);
04853       if (IS_REPEAT_INFINITE(qn->upper))
04854         max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
04855       else
04856         max = distance_multiply(nopt.len.max, qn->upper);
04857 
04858       set_mml(&opt->len, min, max);
04859     }
04860     break;
04861 
04862   case NT_ENCLOSE:
04863     {
04864       EncloseNode* en = NENCLOSE(node);
04865 
04866       switch (en->type) {
04867       case ENCLOSE_OPTION:
04868         {
04869           OnigOptionType save = env->options;
04870 
04871           env->options = en->option;
04872           r = optimize_node_left(en->target, opt, env);
04873           env->options = save;
04874         }
04875         break;
04876 
04877       case ENCLOSE_MEMORY:
04878 #ifdef USE_SUBEXP_CALL
04879         en->opt_count++;
04880         if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
04881           OnigDistance min, max;
04882 
04883           min = 0;
04884           max = ONIG_INFINITE_DISTANCE;
04885           if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
04886           if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
04887           set_mml(&opt->len, min, max);
04888         }
04889         else
04890 #endif
04891         {
04892           r = optimize_node_left(en->target, opt, env);
04893 
04894           if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
04895             if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
04896               remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
04897           }
04898         }
04899         break;
04900 
04901       case ENCLOSE_STOP_BACKTRACK:
04902         r = optimize_node_left(en->target, opt, env);
04903         break;
04904       }
04905     }
04906     break;
04907 
04908   default:
04909 #ifdef ONIG_DEBUG
04910     if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
04911             NTYPE(node));
04912 #endif
04913     r = ONIGERR_TYPE_BUG;
04914     break;
04915   }
04916 
04917   return r;
04918 }
04919 
04920 static int
04921 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
04922 {
04923   int r;
04924 
04925   if (e->len == 0) return 0;
04926 
04927   if (e->ignore_case) {
04928     reg->exact = (UChar* )xmalloc(e->len);
04929     CHECK_NULL_RETURN_MEMERR(reg->exact);
04930     xmemcpy(reg->exact, e->s, e->len);
04931     reg->exact_end = reg->exact + e->len;
04932     reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
04933   }
04934   else {
04935     int allow_reverse;
04936 
04937     reg->exact = str_dup(e->s, e->s + e->len);
04938     CHECK_NULL_RETURN_MEMERR(reg->exact);
04939     reg->exact_end = reg->exact + e->len;
04940 
04941     allow_reverse =
04942         ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
04943 
04944     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
04945       r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
04946                       reg->map, &(reg->int_map));
04947       if (r) return r;
04948 
04949       reg->optimize = (allow_reverse != 0
04950                        ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
04951     }
04952     else {
04953       reg->optimize = ONIG_OPTIMIZE_EXACT;
04954     }
04955   }
04956 
04957   reg->dmin = e->mmd.min;
04958   reg->dmax = e->mmd.max;
04959 
04960   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04961     reg->threshold_len = (int)(reg->dmin + (reg->exact_end - reg->exact));
04962   }
04963 
04964   return 0;
04965 }
04966 
04967 static void
04968 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
04969 {
04970   int i;
04971 
04972   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
04973     reg->map[i] = m->map[i];
04974 
04975   reg->optimize   = ONIG_OPTIMIZE_MAP;
04976   reg->dmin       = m->mmd.min;
04977   reg->dmax       = m->mmd.max;
04978 
04979   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04980     reg->threshold_len = (int)(reg->dmin + 1);
04981   }
04982 }
04983 
04984 static void
04985 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
04986 {
04987   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
04988   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
04989 }
04990 
04991 #ifdef ONIG_DEBUG
04992 static void print_optimize_info(FILE* f, regex_t* reg);
04993 #endif
04994 
04995 static int
04996 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
04997 {
04998 
04999   int r;
05000   NodeOptInfo opt;
05001   OptEnv env;
05002 
05003   env.enc            = reg->enc;
05004   env.options        = reg->options;
05005   env.case_fold_flag = reg->case_fold_flag;
05006   env.scan_env   = scan_env;
05007   clear_mml(&env.mmd);
05008 
05009   r = optimize_node_left(node, &opt, &env);
05010   if (r) return r;
05011 
05012   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05013         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
05014 
05015   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
05016 
05017   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05018     reg->anchor_dmin = opt.len.min;
05019     reg->anchor_dmax = opt.len.max;
05020   }
05021 
05022   if (opt.exb.len > 0 || opt.exm.len > 0) {
05023     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05024     if (opt.map.value > 0 &&
05025         comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05026       goto set_map;
05027     }
05028     else {
05029       r = set_optimize_exact_info(reg, &opt.exb);
05030       set_sub_anchor(reg, &opt.exb.anc);
05031     }
05032   }
05033   else if (opt.map.value > 0) {
05034   set_map:
05035     set_optimize_map_info(reg, &opt.map);
05036     set_sub_anchor(reg, &opt.map.anc);
05037   }
05038   else {
05039     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05040     if (opt.len.max == 0)
05041       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05042   }
05043 
05044 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05045   if (!onig_is_prelude()) print_optimize_info(stderr, reg);
05046 #endif
05047   return r;
05048 }
05049 
05050 static void
05051 clear_optimize_info(regex_t* reg)
05052 {
05053   reg->optimize      = ONIG_OPTIMIZE_NONE;
05054   reg->anchor        = 0;
05055   reg->anchor_dmin   = 0;
05056   reg->anchor_dmax   = 0;
05057   reg->sub_anchor    = 0;
05058   reg->exact_end     = (UChar* )NULL;
05059   reg->threshold_len = 0;
05060   if (IS_NOT_NULL(reg->exact)) {
05061     xfree(reg->exact);
05062     reg->exact = (UChar* )NULL;
05063   }
05064 }
05065 
05066 #ifdef ONIG_DEBUG
05067 
05068 static void print_enc_string(FILE* fp, OnigEncoding enc,
05069                              const UChar *s, const UChar *end)
05070 {
05071   fprintf(fp, "\nPATTERN: /");
05072 
05073   if (ONIGENC_MBC_MINLEN(enc) > 1) {
05074     const UChar *p;
05075     OnigCodePoint code;
05076 
05077     p = s;
05078     while (p < end) {
05079       code = ONIGENC_MBC_TO_CODE(enc, p, end);
05080       if (code >= 0x80) {
05081         fprintf(fp, " 0x%04x ", (int )code);
05082       }
05083       else {
05084         fputc((int )code, fp);
05085       }
05086 
05087       p += enclen(enc, p, end);
05088     }
05089   }
05090   else {
05091     while (s < end) {
05092       fputc((int )*s, fp);
05093       s++;
05094     }
05095   }
05096 
05097   fprintf(fp, "/ (%s)\n", enc->name);
05098 }
05099 
05100 static void
05101 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05102 {
05103   if (a == ONIG_INFINITE_DISTANCE)
05104     fputs("inf", f);
05105   else
05106     fprintf(f, "(%"PRIuSIZE")", a);
05107 
05108   fputs("-", f);
05109 
05110   if (b == ONIG_INFINITE_DISTANCE)
05111     fputs("inf", f);
05112   else
05113     fprintf(f, "(%"PRIuSIZE")", b);
05114 }
05115 
05116 static void
05117 print_anchor(FILE* f, int anchor)
05118 {
05119   int q = 0;
05120 
05121   fprintf(f, "[");
05122 
05123   if (anchor & ANCHOR_BEGIN_BUF) {
05124     fprintf(f, "begin-buf");
05125     q = 1;
05126   }
05127   if (anchor & ANCHOR_BEGIN_LINE) {
05128     if (q) fprintf(f, ", ");
05129     q = 1;
05130     fprintf(f, "begin-line");
05131   }
05132   if (anchor & ANCHOR_BEGIN_POSITION) {
05133     if (q) fprintf(f, ", ");
05134     q = 1;
05135     fprintf(f, "begin-pos");
05136   }
05137   if (anchor & ANCHOR_END_BUF) {
05138     if (q) fprintf(f, ", ");
05139     q = 1;
05140     fprintf(f, "end-buf");
05141   }
05142   if (anchor & ANCHOR_SEMI_END_BUF) {
05143     if (q) fprintf(f, ", ");
05144     q = 1;
05145     fprintf(f, "semi-end-buf");
05146   }
05147   if (anchor & ANCHOR_END_LINE) {
05148     if (q) fprintf(f, ", ");
05149     q = 1;
05150     fprintf(f, "end-line");
05151   }
05152   if (anchor & ANCHOR_ANYCHAR_STAR) {
05153     if (q) fprintf(f, ", ");
05154     q = 1;
05155     fprintf(f, "anychar-star");
05156   }
05157   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05158     if (q) fprintf(f, ", ");
05159     fprintf(f, "anychar-star-pl");
05160   }
05161 
05162   fprintf(f, "]");
05163 }
05164 
05165 static void
05166 print_optimize_info(FILE* f, regex_t* reg)
05167 {
05168   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05169                               "EXACT_IC", "MAP" };
05170 
05171   fprintf(f, "optimize: %s\n", on[reg->optimize]);
05172   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
05173   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05174     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05175   fprintf(f, "\n");
05176 
05177   if (reg->optimize) {
05178     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
05179     fprintf(f, "\n");
05180   }
05181   fprintf(f, "\n");
05182 
05183   if (reg->exact) {
05184     UChar *p;
05185     fprintf(f, "exact: [");
05186     for (p = reg->exact; p < reg->exact_end; p++) {
05187       fputc(*p, f);
05188     }
05189     fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
05190   }
05191   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05192     int c, i, n = 0;
05193 
05194     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05195       if (reg->map[i]) n++;
05196 
05197     fprintf(f, "map: n=%d\n", n);
05198     if (n > 0) {
05199       c = 0;
05200       fputc('[', f);
05201       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05202         if (reg->map[i] != 0) {
05203           if (c > 0)  fputs(", ", f);
05204           c++;
05205           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05206               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05207             fputc(i, f);
05208           else
05209             fprintf(f, "%d", i);
05210         }
05211       }
05212       fprintf(f, "]\n");
05213     }
05214   }
05215 }
05216 #endif /* ONIG_DEBUG */
05217 
05218 
05219 extern void
05220 onig_free_body(regex_t* reg)
05221 {
05222   if (IS_NOT_NULL(reg)) {
05223     if (IS_NOT_NULL(reg->p))                xfree(reg->p);
05224     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
05225     if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
05226     if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05227     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
05228     if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
05229 
05230 #ifdef USE_NAMED_GROUP
05231     onig_names_free(reg);
05232 #endif
05233   }
05234 }
05235 
05236 extern void
05237 onig_free(regex_t* reg)
05238 {
05239   if (IS_NOT_NULL(reg)) {
05240     onig_free_body(reg);
05241     xfree(reg);
05242   }
05243 }
05244 
05245 size_t
05246 onig_memsize(const regex_t *reg)
05247 {
05248     size_t size = sizeof(regex_t);
05249     if (IS_NOT_NULL(reg->p))                size += reg->alloc;
05250     if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
05251     if (IS_NOT_NULL(reg->int_map))          size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05252     if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05253     if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05254     if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
05255 
05256     return size;
05257 }
05258 
05259 #define REGEX_TRANSFER(to,from) do {\
05260   (to)->state = ONIG_STATE_MODIFY;\
05261   onig_free_body(to);\
05262   xmemcpy(to, from, sizeof(regex_t));\
05263   xfree(from);\
05264 } while (0)
05265 
05266 extern void
05267 onig_transfer(regex_t* to, regex_t* from)
05268 {
05269   THREAD_ATOMIC_START;
05270   REGEX_TRANSFER(to, from);
05271   THREAD_ATOMIC_END;
05272 }
05273 
05274 #define REGEX_CHAIN_HEAD(reg) do {\
05275   while (IS_NOT_NULL((reg)->chain)) {\
05276     (reg) = (reg)->chain;\
05277   }\
05278 } while (0)
05279 
05280 extern void
05281 onig_chain_link_add(regex_t* to, regex_t* add)
05282 {
05283   THREAD_ATOMIC_START;
05284   REGEX_CHAIN_HEAD(to);
05285   to->chain = add;
05286   THREAD_ATOMIC_END;
05287 }
05288 
05289 extern void
05290 onig_chain_reduce(regex_t* reg)
05291 {
05292   regex_t *head, *prev;
05293 
05294   prev = reg;
05295   head = prev->chain;
05296   if (IS_NOT_NULL(head)) {
05297     reg->state = ONIG_STATE_MODIFY;
05298     while (IS_NOT_NULL(head->chain)) {
05299       prev = head;
05300       head = head->chain;
05301     }
05302     prev->chain = (regex_t* )NULL;
05303     REGEX_TRANSFER(reg, head);
05304   }
05305 }
05306 
05307 #ifdef ONIG_DEBUG
05308 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05309 #endif
05310 #ifdef ONIG_DEBUG_PARSE_TREE
05311 static void print_tree P_((FILE* f, Node* node));
05312 #endif
05313 
05314 extern int
05315 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05316               OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05317 {
05318 #define COMPILE_INIT_SIZE  20
05319 
05320   int r;
05321   OnigDistance init_size;
05322   Node*  root;
05323   ScanEnv  scan_env = {0};
05324 #ifdef USE_SUBEXP_CALL
05325   UnsetAddrList  uslist;
05326 #endif
05327 
05328   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05329 
05330   scan_env.sourcefile = sourcefile;
05331   scan_env.sourceline = sourceline;
05332   reg->state = ONIG_STATE_COMPILING;
05333 
05334 #ifdef ONIG_DEBUG
05335   if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
05336 #endif
05337 
05338   if (reg->alloc == 0) {
05339     init_size = (pattern_end - pattern) * 2;
05340     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05341     r = BBUF_INIT(reg, init_size);
05342     if (r != 0) goto end;
05343   }
05344   else
05345     reg->used = 0;
05346 
05347   reg->num_mem            = 0;
05348   reg->num_repeat         = 0;
05349   reg->num_null_check     = 0;
05350   reg->repeat_range_alloc = 0;
05351   reg->repeat_range       = (OnigRepeatRange* )NULL;
05352 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05353   reg->num_comb_exp_check = 0;
05354 #endif
05355 
05356   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05357   if (r != 0) goto err;
05358 
05359 #ifdef ONIG_DEBUG_PARSE_TREE
05360 # if 0
05361   fprintf(stderr, "ORIGINAL PARSE TREE:\n");
05362   if (!onig_is_prelude()) {
05363     print_tree(stderr, root);
05364   }
05365 # endif
05366 #endif
05367 
05368 #ifdef USE_NAMED_GROUP
05369   /* mixed use named group and no-named group */
05370   if (scan_env.num_named > 0 &&
05371       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05372       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05373     if (scan_env.num_named != scan_env.num_mem)
05374       r = disable_noname_group_capture(&root, reg, &scan_env);
05375     else
05376       r = numbered_ref_check(root);
05377 
05378     if (r != 0) goto err;
05379   }
05380 #endif
05381 
05382 #ifdef USE_SUBEXP_CALL
05383   if (scan_env.num_call > 0) {
05384     r = unset_addr_list_init(&uslist, scan_env.num_call);
05385     if (r != 0) goto err;
05386     scan_env.unset_addr_list = &uslist;
05387     r = setup_subexp_call(root, &scan_env);
05388     if (r != 0) goto err_unset;
05389     r = subexp_recursive_check_trav(root, &scan_env);
05390     if (r  < 0) goto err_unset;
05391     r = subexp_inf_recursive_check_trav(root, &scan_env);
05392     if (r != 0) goto err_unset;
05393 
05394     reg->num_call = scan_env.num_call;
05395   }
05396   else
05397     reg->num_call = 0;
05398 #endif
05399 
05400   r = setup_tree(root, reg, 0, &scan_env);
05401   if (r != 0) goto err_unset;
05402 
05403 #ifdef ONIG_DEBUG_PARSE_TREE
05404   if (!onig_is_prelude()) print_tree(stderr, root);
05405 #endif
05406 
05407   reg->capture_history  = scan_env.capture_history;
05408   reg->bt_mem_start     = scan_env.bt_mem_start;
05409   reg->bt_mem_start    |= reg->capture_history;
05410   if (IS_FIND_CONDITION(reg->options))
05411     BIT_STATUS_ON_ALL(reg->bt_mem_end);
05412   else {
05413     reg->bt_mem_end  = scan_env.bt_mem_end;
05414     reg->bt_mem_end |= reg->capture_history;
05415   }
05416 
05417 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05418   if (scan_env.backrefed_mem == 0
05419 #ifdef USE_SUBEXP_CALL
05420       || scan_env.num_call == 0
05421 #endif
05422       ) {
05423     setup_comb_exp_check(root, 0, &scan_env);
05424 #ifdef USE_SUBEXP_CALL
05425     if (scan_env.has_recursion != 0) {
05426       scan_env.num_comb_exp_check = 0;
05427     }
05428     else
05429 #endif
05430     if (scan_env.comb_exp_max_regnum > 0) {
05431       int i;
05432       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05433         if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05434           scan_env.num_comb_exp_check = 0;
05435           break;
05436         }
05437       }
05438     }
05439   }
05440 
05441   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05442 #endif
05443 
05444   clear_optimize_info(reg);
05445 #ifndef ONIG_DONT_OPTIMIZE
05446   r = set_optimize_info_from_tree(root, reg, &scan_env);
05447   if (r != 0) goto err_unset;
05448 #endif
05449 
05450   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05451     xfree(scan_env.mem_nodes_dynamic);
05452     scan_env.mem_nodes_dynamic = (Node** )NULL;
05453   }
05454 
05455   r = compile_tree(root, reg);
05456   if (r == 0) {
05457     r = add_opcode(reg, OP_END);
05458 #ifdef USE_SUBEXP_CALL
05459     if (scan_env.num_call > 0) {
05460       r = unset_addr_list_fix(&uslist, reg);
05461       unset_addr_list_end(&uslist);
05462       if (r) goto err;
05463     }
05464 #endif
05465 
05466     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05467       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05468     else {
05469       if (reg->bt_mem_start != 0)
05470         reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05471       else
05472         reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05473     }
05474   }
05475 #ifdef USE_SUBEXP_CALL
05476   else if (scan_env.num_call > 0) {
05477     unset_addr_list_end(&uslist);
05478   }
05479 #endif
05480   onig_node_free(root);
05481 
05482 #ifdef ONIG_DEBUG_COMPILE
05483 #ifdef USE_NAMED_GROUP
05484   if (!onig_is_prelude()) onig_print_names(stderr, reg);
05485 #endif
05486   if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
05487 #endif
05488 
05489  end:
05490   reg->state = ONIG_STATE_NORMAL;
05491   return r;
05492 
05493  err_unset:
05494 #ifdef USE_SUBEXP_CALL
05495   if (scan_env.num_call > 0) {
05496     unset_addr_list_end(&uslist);
05497   }
05498 #endif
05499  err:
05500   if (IS_NOT_NULL(scan_env.error)) {
05501     if (IS_NOT_NULL(einfo)) {
05502       einfo->enc     = scan_env.enc;
05503       einfo->par     = scan_env.error;
05504       einfo->par_end = scan_env.error_end;
05505     }
05506   }
05507 
05508   onig_node_free(root);
05509   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05510       xfree(scan_env.mem_nodes_dynamic);
05511   return r;
05512 }
05513 
05514 #ifdef USE_RECOMPILE_API
05515 extern int
05516 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05517             OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05518             OnigErrorInfo* einfo)
05519 {
05520   int r;
05521   regex_t *new_reg;
05522 
05523   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05524   if (r) return r;
05525   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05526     onig_transfer(reg, new_reg);
05527   }
05528   else {
05529     onig_chain_link_add(reg, new_reg);
05530   }
05531   return 0;
05532 }
05533 #endif
05534 
05535 static int onig_inited = 0;
05536 
05537 extern int
05538 onig_reg_init(regex_t* reg, OnigOptionType option,
05539               OnigCaseFoldType case_fold_flag,
05540               OnigEncoding enc, const OnigSyntaxType* syntax)
05541 {
05542   if (! onig_inited)
05543     onig_init();
05544 
05545   if (IS_NULL(reg))
05546     return ONIGERR_INVALID_ARGUMENT;
05547 
05548   if (ONIGENC_IS_UNDEF(enc))
05549     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
05550 
05551   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05552       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05553     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05554   }
05555 
05556   (reg)->state = ONIG_STATE_MODIFY;
05557 
05558   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05559     option |= syntax->options;
05560     option &= ~ONIG_OPTION_SINGLELINE;
05561   }
05562   else
05563     option |= syntax->options;
05564 
05565   (reg)->enc              = enc;
05566   (reg)->options          = option;
05567   (reg)->syntax           = syntax;
05568   (reg)->optimize         = 0;
05569   (reg)->exact            = (UChar* )NULL;
05570   (reg)->int_map          = (int* )NULL;
05571   (reg)->int_map_backward = (int* )NULL;
05572   (reg)->chain            = (regex_t* )NULL;
05573 
05574   (reg)->p                = (UChar* )NULL;
05575   (reg)->alloc            = 0;
05576   (reg)->used             = 0;
05577   (reg)->name_table       = (void* )NULL;
05578 
05579   (reg)->case_fold_flag   = case_fold_flag;
05580   return 0;
05581 }
05582 
05583 extern int
05584 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05585           const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05586           OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05587 {
05588   int r;
05589 
05590   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05591   if (r) return r;
05592 
05593   r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05594   return r;
05595 }
05596 
05597 extern int
05598 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05599           OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05600           OnigErrorInfo* einfo)
05601 {
05602   int r;
05603 
05604   *reg = (regex_t* )xmalloc(sizeof(regex_t));
05605   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05606 
05607   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05608   if (r) goto err;
05609 
05610   r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05611   if (r) {
05612   err:
05613     onig_free(*reg);
05614     *reg = NULL;
05615   }
05616   return r;
05617 }
05618 
05619 
05620 extern int
05621 onig_init(void)
05622 {
05623   if (onig_inited != 0)
05624     return 0;
05625 
05626   THREAD_SYSTEM_INIT;
05627   THREAD_ATOMIC_START;
05628 
05629   onig_inited = 1;
05630 
05631   onigenc_init();
05632   /* onigenc_set_default_caseconv_table((UChar* )0); */
05633 
05634 #ifdef ONIG_DEBUG_STATISTICS
05635   onig_statistics_init();
05636 #endif
05637 
05638   THREAD_ATOMIC_END;
05639   return 0;
05640 }
05641 
05642 
05643 extern int
05644 onig_end(void)
05645 {
05646   THREAD_ATOMIC_START;
05647 
05648 #ifdef ONIG_DEBUG_STATISTICS
05649   if (!onig_is_prelude()) onig_print_statistics(stderr);
05650 #endif
05651 
05652 #ifdef USE_SHARED_CCLASS_TABLE
05653   onig_free_shared_cclass_table();
05654 #endif
05655 
05656 #ifdef USE_PARSE_TREE_NODE_RECYCLE
05657   onig_free_node_list();
05658 #endif
05659 
05660   onig_inited = 0;
05661 
05662   THREAD_ATOMIC_END;
05663   THREAD_SYSTEM_END;
05664   return 0;
05665 }
05666 
05667 extern int
05668 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
05669 {
05670   OnigCodePoint n, *data;
05671   OnigCodePoint low, high, x;
05672 
05673   GET_CODE_POINT(n, p);
05674   data = (OnigCodePoint* )p;
05675   data++;
05676 
05677   for (low = 0, high = n; low < high; ) {
05678     x = (low + high) >> 1;
05679     if (code > data[x * 2 + 1])
05680       low = x + 1;
05681     else
05682       high = x;
05683   }
05684 
05685   return ((low < n && code >= data[low * 2]) ? 1 : 0);
05686 }
05687 
05688 extern int
05689 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
05690 {
05691   int found;
05692 
05693   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
05694     if (IS_NULL(cc->mbuf)) {
05695       found = 0;
05696     }
05697     else {
05698       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
05699     }
05700   }
05701   else {
05702     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
05703   }
05704 
05705   if (IS_NCCLASS_NOT(cc))
05706     return !found;
05707   else
05708     return found;
05709 }
05710 
05711 extern int
05712 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
05713 {
05714   int len;
05715 
05716   if (ONIGENC_MBC_MINLEN(enc) > 1) {
05717     len = 2;
05718   }
05719   else {
05720     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
05721   }
05722   return onig_is_code_in_cc_len(len, code, cc);
05723 }
05724 
05725 
05726 #ifdef ONIG_DEBUG
05727 
05728 /* arguments type */
05729 #define ARG_SPECIAL     -1
05730 #define ARG_NON          0
05731 #define ARG_RELADDR      1
05732 #define ARG_ABSADDR      2
05733 #define ARG_LENGTH       3
05734 #define ARG_MEMNUM       4
05735 #define ARG_OPTION       5
05736 #define ARG_STATE_CHECK  6
05737 
05738 OnigOpInfoType OnigOpInfo[] = {
05739   { OP_FINISH,            "finish",          ARG_NON },
05740   { OP_END,               "end",             ARG_NON },
05741   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
05742   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
05743   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
05744   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
05745   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
05746   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
05747   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
05748   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
05749   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
05750   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
05751   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
05752   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
05753   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
05754   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
05755   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
05756   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
05757   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
05758   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
05759   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
05760   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
05761   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
05762   { OP_ANYCHAR,           "anychar",         ARG_NON },
05763   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
05764   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
05765   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
05766   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
05767   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
05768   { OP_WORD,                "word",            ARG_NON },
05769   { OP_NOT_WORD,            "not-word",        ARG_NON },
05770   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
05771   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
05772   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
05773   { OP_WORD_END,            "word-end",        ARG_NON },
05774   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
05775   { OP_END_BUF,             "end-buf",         ARG_NON },
05776   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
05777   { OP_END_LINE,            "end-line",        ARG_NON },
05778   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
05779   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
05780   { OP_BACKREF1,            "backref1",             ARG_NON },
05781   { OP_BACKREF2,            "backref2",             ARG_NON },
05782   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
05783   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
05784   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
05785   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
05786   { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL },
05787   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
05788   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
05789   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
05790   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
05791   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
05792   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
05793   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
05794   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
05795   { OP_FAIL,                "fail",                 ARG_NON },
05796   { OP_JUMP,                "jump",                 ARG_RELADDR },
05797   { OP_PUSH,                "push",                 ARG_RELADDR },
05798   { OP_POP,                 "pop",                  ARG_NON },
05799   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
05800   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
05801   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
05802   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
05803   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
05804   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
05805   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
05806   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
05807   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
05808   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
05809   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
05810   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
05811   { OP_PUSH_POS,             "push-pos",             ARG_NON },
05812   { OP_POP_POS,              "pop-pos",              ARG_NON },
05813   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
05814   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
05815   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
05816   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
05817   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
05818   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
05819   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
05820   { OP_CALL,                 "call",                 ARG_ABSADDR },
05821   { OP_RETURN,               "return",               ARG_NON },
05822   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
05823   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
05824   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
05825   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
05826   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
05827     "state-check-anychar-ml*", ARG_STATE_CHECK },
05828   { -1, "", ARG_NON }
05829 };
05830 
05831 static const char*
05832 op2name(int opcode)
05833 {
05834   int i;
05835 
05836   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05837     if (opcode == OnigOpInfo[i].opcode)
05838       return OnigOpInfo[i].name;
05839   }
05840   return "";
05841 }
05842 
05843 static int
05844 op2arg_type(int opcode)
05845 {
05846   int i;
05847 
05848   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05849     if (opcode == OnigOpInfo[i].opcode)
05850       return OnigOpInfo[i].arg_type;
05851   }
05852   return ARG_SPECIAL;
05853 }
05854 
05855 static void
05856 Indent(FILE* f, int indent)
05857 {
05858   int i;
05859   for (i = 0; i < indent; i++) putc(' ', f);
05860 }
05861 
05862 static void
05863 p_string(FILE* f, int len, UChar* s)
05864 {
05865   fputs(":", f);
05866   while (len-- > 0) { fputc(*s++, f); }
05867 }
05868 
05869 static void
05870 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
05871 {
05872   int x = len * mb_len;
05873 
05874   fprintf(f, ":%d:", len);
05875   while (x-- > 0) { fputc(*s++, f); }
05876 }
05877 
05878 extern void
05879 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
05880                               OnigEncoding enc)
05881 {
05882   int i, n, arg_type;
05883   RelAddrType addr;
05884   LengthType len;
05885   MemNumType mem;
05886   StateCheckNumType scn;
05887   OnigCodePoint code;
05888   UChar *q;
05889 
05890   fprintf(f, "[%s", op2name(*bp));
05891   arg_type = op2arg_type(*bp);
05892   if (arg_type != ARG_SPECIAL) {
05893     bp++;
05894     switch (arg_type) {
05895     case ARG_NON:
05896       break;
05897     case ARG_RELADDR:
05898       GET_RELADDR_INC(addr, bp);
05899       fprintf(f, ":(%d)", addr);
05900       break;
05901     case ARG_ABSADDR:
05902       GET_ABSADDR_INC(addr, bp);
05903       fprintf(f, ":(%d)", addr);
05904       break;
05905     case ARG_LENGTH:
05906       GET_LENGTH_INC(len, bp);
05907       fprintf(f, ":%d", len);
05908       break;
05909     case ARG_MEMNUM:
05910       mem = *((MemNumType* )bp);
05911       bp += SIZE_MEMNUM;
05912       fprintf(f, ":%d", mem);
05913       break;
05914     case ARG_OPTION:
05915       {
05916         OnigOptionType option = *((OnigOptionType* )bp);
05917         bp += SIZE_OPTION;
05918         fprintf(f, ":%d", option);
05919       }
05920       break;
05921 
05922     case ARG_STATE_CHECK:
05923       scn = *((StateCheckNumType* )bp);
05924       bp += SIZE_STATE_CHECK_NUM;
05925       fprintf(f, ":%d", scn);
05926       break;
05927     }
05928   }
05929   else {
05930     switch (*bp++) {
05931     case OP_EXACT1:
05932     case OP_ANYCHAR_STAR_PEEK_NEXT:
05933     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
05934       p_string(f, 1, bp++); break;
05935     case OP_EXACT2:
05936       p_string(f, 2, bp); bp += 2; break;
05937     case OP_EXACT3:
05938       p_string(f, 3, bp); bp += 3; break;
05939     case OP_EXACT4:
05940       p_string(f, 4, bp); bp += 4; break;
05941     case OP_EXACT5:
05942       p_string(f, 5, bp); bp += 5; break;
05943     case OP_EXACTN:
05944       GET_LENGTH_INC(len, bp);
05945       p_len_string(f, len, 1, bp);
05946       bp += len;
05947       break;
05948 
05949     case OP_EXACTMB2N1:
05950       p_string(f, 2, bp); bp += 2; break;
05951     case OP_EXACTMB2N2:
05952       p_string(f, 4, bp); bp += 4; break;
05953     case OP_EXACTMB2N3:
05954       p_string(f, 6, bp); bp += 6; break;
05955     case OP_EXACTMB2N:
05956       GET_LENGTH_INC(len, bp);
05957       p_len_string(f, len, 2, bp);
05958       bp += len * 2;
05959       break;
05960     case OP_EXACTMB3N:
05961       GET_LENGTH_INC(len, bp);
05962       p_len_string(f, len, 3, bp);
05963       bp += len * 3;
05964       break;
05965     case OP_EXACTMBN:
05966       {
05967         int mb_len;
05968 
05969         GET_LENGTH_INC(mb_len, bp);
05970         GET_LENGTH_INC(len, bp);
05971         fprintf(f, ":%d:%d:", mb_len, len);
05972         n = len * mb_len;
05973         while (n-- > 0) { fputc(*bp++, f); }
05974       }
05975       break;
05976 
05977     case OP_EXACT1_IC:
05978       len = enclen(enc, bp, bpend);
05979       p_string(f, len, bp);
05980       bp += len;
05981       break;
05982     case OP_EXACTN_IC:
05983       GET_LENGTH_INC(len, bp);
05984       p_len_string(f, len, 1, bp);
05985       bp += len;
05986       break;
05987 
05988     case OP_CCLASS:
05989       n = bitset_on_num((BitSetRef )bp);
05990       bp += SIZE_BITSET;
05991       fprintf(f, ":%d", n);
05992       break;
05993 
05994     case OP_CCLASS_NOT:
05995       n = bitset_on_num((BitSetRef )bp);
05996       bp += SIZE_BITSET;
05997       fprintf(f, ":%d", n);
05998       break;
05999 
06000     case OP_CCLASS_MB:
06001     case OP_CCLASS_MB_NOT:
06002       GET_LENGTH_INC(len, bp);
06003       q = bp;
06004 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06005       ALIGNMENT_RIGHT(q);
06006 #endif
06007       GET_CODE_POINT(code, q);
06008       bp += len;
06009       fprintf(f, ":%d:%d", (int )code, len);
06010       break;
06011 
06012     case OP_CCLASS_MIX:
06013     case OP_CCLASS_MIX_NOT:
06014       n = bitset_on_num((BitSetRef )bp);
06015       bp += SIZE_BITSET;
06016       GET_LENGTH_INC(len, bp);
06017       q = bp;
06018 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06019       ALIGNMENT_RIGHT(q);
06020 #endif
06021       GET_CODE_POINT(code, q);
06022       bp += len;
06023       fprintf(f, ":%d:%d:%d", n, (int )code, len);
06024       break;
06025 
06026     case OP_CCLASS_NODE:
06027       {
06028         CClassNode *cc;
06029 
06030         GET_POINTER_INC(cc, bp);
06031         n = bitset_on_num(cc->bs);
06032         fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
06033       }
06034       break;
06035 
06036     case OP_BACKREFN_IC:
06037       mem = *((MemNumType* )bp);
06038       bp += SIZE_MEMNUM;
06039       fprintf(f, ":%d", mem);
06040       break;
06041 
06042     case OP_BACKREF_MULTI_IC:
06043     case OP_BACKREF_MULTI:
06044       fputs(" ", f);
06045       GET_LENGTH_INC(len, bp);
06046       for (i = 0; i < len; i++) {
06047         GET_MEMNUM_INC(mem, bp);
06048         if (i > 0) fputs(", ", f);
06049         fprintf(f, "%d", mem);
06050       }
06051       break;
06052 
06053     case OP_BACKREF_WITH_LEVEL:
06054       {
06055         OnigOptionType option;
06056         LengthType level;
06057 
06058         GET_OPTION_INC(option, bp);
06059         fprintf(f, ":%d", option);
06060         GET_LENGTH_INC(level, bp);
06061         fprintf(f, ":%d", level);
06062 
06063         fputs(" ", f);
06064         GET_LENGTH_INC(len, bp);
06065         for (i = 0; i < len; i++) {
06066           GET_MEMNUM_INC(mem, bp);
06067           if (i > 0) fputs(", ", f);
06068           fprintf(f, "%d", mem);
06069         }
06070       }
06071       break;
06072 
06073     case OP_REPEAT:
06074     case OP_REPEAT_NG:
06075       {
06076         mem = *((MemNumType* )bp);
06077         bp += SIZE_MEMNUM;
06078         addr = *((RelAddrType* )bp);
06079         bp += SIZE_RELADDR;
06080         fprintf(f, ":%d:%d", mem, addr);
06081       }
06082       break;
06083 
06084     case OP_PUSH_OR_JUMP_EXACT1:
06085     case OP_PUSH_IF_PEEK_NEXT:
06086       addr = *((RelAddrType* )bp);
06087       bp += SIZE_RELADDR;
06088       fprintf(f, ":(%d)", addr);
06089       p_string(f, 1, bp);
06090       bp += 1;
06091       break;
06092 
06093     case OP_LOOK_BEHIND:
06094       GET_LENGTH_INC(len, bp);
06095       fprintf(f, ":%d", len);
06096       break;
06097 
06098     case OP_PUSH_LOOK_BEHIND_NOT:
06099       GET_RELADDR_INC(addr, bp);
06100       GET_LENGTH_INC(len, bp);
06101       fprintf(f, ":%d:(%d)", len, addr);
06102       break;
06103 
06104     case OP_STATE_CHECK_PUSH:
06105     case OP_STATE_CHECK_PUSH_OR_JUMP:
06106       scn = *((StateCheckNumType* )bp);
06107       bp += SIZE_STATE_CHECK_NUM;
06108       addr = *((RelAddrType* )bp);
06109       bp += SIZE_RELADDR;
06110       fprintf(f, ":%d:(%d)", scn, addr);
06111       break;
06112 
06113     default:
06114       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06115               *--bp);
06116     }
06117   }
06118   fputs("]", f);
06119   if (nextp) *nextp = bp;
06120 }
06121 
06122 static void
06123 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06124 {
06125   int ncode;
06126   UChar* bp = reg->p;
06127   UChar* end = reg->p + reg->used;
06128 
06129   fprintf(f, "code length: %d\n", reg->used);
06130 
06131   ncode = -1;
06132   while (bp < end) {
06133     ncode++;
06134     if (bp > reg->p) {
06135       if (ncode % 5 == 0)
06136         fprintf(f, "\n%ld:", bp-reg->p);
06137       else
06138         fprintf(f, " %ld:", bp-reg->p);
06139     }
06140     onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06141   }
06142 
06143   fprintf(f, "\n");
06144 }
06145 
06146 static void
06147 print_indent_tree(FILE* f, Node* node, int indent)
06148 {
06149   int i, type, container_p = 0;
06150   int add = 3;
06151   UChar* p;
06152 
06153   Indent(f, indent);
06154   if (IS_NULL(node)) {
06155     fprintf(f, "ERROR: null node!!!\n");
06156     exit (0);
06157   }
06158 
06159   type = NTYPE(node);
06160   switch (type) {
06161   case NT_LIST:
06162   case NT_ALT:
06163     if (NTYPE(node) == NT_LIST)
06164       fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
06165     else
06166       fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
06167 
06168     print_indent_tree(f, NCAR(node), indent + add);
06169     while (IS_NOT_NULL(node = NCDR(node))) {
06170       if (NTYPE(node) != type) {
06171         fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06172         exit(0);
06173       }
06174       print_indent_tree(f, NCAR(node), indent + add);
06175     }
06176     break;
06177 
06178   case NT_STR:
06179     fprintf(f, "<string%s:%"PRIxPTR">",
06180             (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
06181     for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06182       if (*p >= 0x20 && *p < 0x7f)
06183         fputc(*p, f);
06184       else {
06185         fprintf(f, " 0x%02x", *p);
06186       }
06187     }
06188     break;
06189 
06190   case NT_CCLASS:
06191     fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
06192     if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06193     if (NCCLASS(node)->mbuf) {
06194       BBuf* bbuf = NCCLASS(node)->mbuf;
06195       for (i = 0; i < (int)bbuf->used; i++) {
06196         if (i > 0) fprintf(f, ",");
06197         fprintf(f, "%0x", bbuf->p[i]);
06198       }
06199     }
06200     break;
06201 
06202   case NT_CTYPE:
06203     fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
06204     switch (NCTYPE(node)->ctype) {
06205     case ONIGENC_CTYPE_WORD:
06206       if (NCTYPE(node)->not != 0)
06207         fputs("not word",       f);
06208       else
06209         fputs("word",           f);
06210       break;
06211 
06212     default:
06213       fprintf(f, "ERROR: undefined ctype.\n");
06214       exit(0);
06215     }
06216     break;
06217 
06218   case NT_CANY:
06219     fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
06220     break;
06221 
06222   case NT_ANCHOR:
06223     fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
06224     switch (NANCHOR(node)->type) {
06225     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
06226     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
06227     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
06228     case ANCHOR_END_LINE:       fputs("end line",       f); break;
06229     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
06230     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06231 
06232     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
06233     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
06234 #ifdef USE_WORD_BEGIN_END
06235     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
06236     case ANCHOR_WORD_END:        fputs("word end", f);       break;
06237 #endif
06238     case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
06239     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
06240     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
06241     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
06242 
06243     default:
06244       fprintf(f, "ERROR: undefined anchor type.\n");
06245       break;
06246     }
06247     break;
06248 
06249   case NT_BREF:
06250     {
06251       int* p;
06252       BRefNode* br = NBREF(node);
06253       p = BACKREFS_P(br);
06254       fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
06255       for (i = 0; i < br->back_num; i++) {
06256         if (i > 0) fputs(", ", f);
06257         fprintf(f, "%d", p[i]);
06258       }
06259     }
06260     break;
06261 
06262 #ifdef USE_SUBEXP_CALL
06263   case NT_CALL:
06264     {
06265       CallNode* cn = NCALL(node);
06266       fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
06267       p_string(f, cn->name_end - cn->name, cn->name);
06268     }
06269     break;
06270 #endif
06271 
06272   case NT_QTFR:
06273     fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
06274             NQTFR(node)->lower, NQTFR(node)->upper,
06275             (NQTFR(node)->greedy ? "" : "?"));
06276     print_indent_tree(f, NQTFR(node)->target, indent + add);
06277     break;
06278 
06279   case NT_ENCLOSE:
06280     fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
06281     switch (NENCLOSE(node)->type) {
06282     case ENCLOSE_OPTION:
06283       fprintf(f, "option:%d\n", NENCLOSE(node)->option);
06284       print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06285       break;
06286     case ENCLOSE_MEMORY:
06287       fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06288       break;
06289     case ENCLOSE_STOP_BACKTRACK:
06290       fprintf(f, "stop-bt");
06291       break;
06292 
06293     default:
06294       break;
06295     }
06296     fprintf(f, "\n");
06297     print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06298     break;
06299 
06300   default:
06301     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06302     break;
06303   }
06304 
06305   if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06306       type != NT_ENCLOSE)
06307     fprintf(f, "\n");
06308 
06309   if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
06310 
06311   fflush(f);
06312 }
06313 #endif /* ONIG_DEBUG */
06314 
06315 #ifdef ONIG_DEBUG_PARSE_TREE
06316 static void
06317 print_tree(FILE* f, Node* node)
06318 {
06319   print_indent_tree(f, node, 0);
06320 }
06321 #endif
06322