Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 #ifndef ONIGURUMA_REGINT_H 00002 #define ONIGURUMA_REGINT_H 00003 /********************************************************************** 00004 regint.h - Oniguruma (regular expression library) 00005 **********************************************************************/ 00006 /*- 00007 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00008 * All rights reserved. 00009 * 00010 * Redistribution and use in source and binary forms, with or without 00011 * modification, are permitted provided that the following conditions 00012 * are met: 00013 * 1. Redistributions of source code must retain the above copyright 00014 * notice, this list of conditions and the following disclaimer. 00015 * 2. Redistributions in binary form must reproduce the above copyright 00016 * notice, this list of conditions and the following disclaimer in the 00017 * documentation and/or other materials provided with the distribution. 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00021 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00022 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00024 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00025 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00026 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00027 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00028 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00029 * SUCH DAMAGE. 00030 */ 00031 00032 /* for debug */ 00033 /* #define ONIG_DEBUG_PARSE_TREE */ 00034 /* #define ONIG_DEBUG_COMPILE */ 00035 /* #define ONIG_DEBUG_SEARCH */ 00036 /* #define ONIG_DEBUG_MATCH */ 00037 /* #define ONIG_DONT_OPTIMIZE */ 00038 00039 /* for byte-code statistical data. */ 00040 /* #define ONIG_DEBUG_STATISTICS */ 00041 00042 #if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ 00043 defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ 00044 defined(ONIG_DEBUG_STATISTICS) 00045 #ifndef ONIG_DEBUG 00046 #define ONIG_DEBUG 00047 #endif 00048 #endif 00049 00050 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ 00051 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \ 00052 defined(__mc68020__) 00053 #define PLATFORM_UNALIGNED_WORD_ACCESS 00054 #endif 00055 00056 /* config */ 00057 /* spec. config */ 00058 #define USE_NAMED_GROUP 00059 #define USE_SUBEXP_CALL 00060 #define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ 00061 #define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ 00062 #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ 00063 #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR 00064 /* #define USE_RECOMPILE_API */ 00065 /* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ 00066 00067 /* internal config */ 00068 #define USE_PARSE_TREE_NODE_RECYCLE 00069 #define USE_OP_PUSH_OR_JUMP_EXACT 00070 #define USE_QTFR_PEEK_NEXT 00071 #define USE_ST_LIBRARY 00072 #define USE_SHARED_CCLASS_TABLE 00073 00074 #define INIT_MATCH_STACK_SIZE 160 00075 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ 00076 00077 #if defined(__GNUC__) 00078 # define ARG_UNUSED __attribute__ ((unused)) 00079 #else 00080 # define ARG_UNUSED 00081 #endif 00082 00083 /* */ 00084 /* escape other system UChar definition */ 00085 #ifndef RUBY_DEFINES_H 00086 #include "ruby/ruby.h" 00087 #undef xmalloc 00088 #undef xrealloc 00089 #undef xcalloc 00090 #undef xfree 00091 #endif 00092 #ifdef ONIG_ESCAPE_UCHAR_COLLISION 00093 #undef ONIG_ESCAPE_UCHAR_COLLISION 00094 #endif 00095 #undef USE_MATCH_RANGE_IS_COMPLETE_RANGE 00096 #undef USE_CAPTURE_HISTORY 00097 #define USE_VARIABLE_META_CHARS 00098 #define USE_WORD_BEGIN_END /* "<": word-begin, ">": word-end */ 00099 #define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ 00100 #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 00101 /* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ 00102 /* #define USE_MULTI_THREAD_SYSTEM */ 00103 #define THREAD_SYSTEM_INIT /* depend on thread system */ 00104 #define THREAD_SYSTEM_END /* depend on thread system */ 00105 #define THREAD_ATOMIC_START /* depend on thread system */ 00106 #define THREAD_ATOMIC_END /* depend on thread system */ 00107 #define THREAD_PASS /* depend on thread system */ 00108 #ifndef xmalloc 00109 #define xmalloc malloc 00110 #define xrealloc realloc 00111 #define xcalloc calloc 00112 #define xfree free 00113 #endif 00114 00115 #ifdef RUBY 00116 00117 #define CHECK_INTERRUPT_IN_MATCH_AT rb_thread_check_ints() 00118 #define onig_st_init_table st_init_table 00119 #define onig_st_init_table_with_size st_init_table_with_size 00120 #define onig_st_init_numtable st_init_numtable 00121 #define onig_st_init_numtable_with_size st_init_numtable_with_size 00122 #define onig_st_init_strtable st_init_strtable 00123 #define onig_st_init_strtable_with_size st_init_strtable_with_size 00124 #define onig_st_delete st_delete 00125 #define onig_st_delete_safe st_delete_safe 00126 #define onig_st_insert st_insert 00127 #define onig_st_lookup st_lookup 00128 #define onig_st_foreach st_foreach 00129 #define onig_st_add_direct st_add_direct 00130 #define onig_st_free_table st_free_table 00131 #define onig_st_cleanup_safe st_cleanup_safe 00132 #define onig_st_copy st_copy 00133 #define onig_st_nothing_key_clone st_nothing_key_clone 00134 #define onig_st_nothing_key_free st_nothing_key_free 00135 #define onig_st_is_member st_is_member 00136 00137 #define USE_UPPER_CASE_TABLE 00138 #else 00139 00140 #define st_init_table onig_st_init_table 00141 #define st_init_table_with_size onig_st_init_table_with_size 00142 #define st_init_numtable onig_st_init_numtable 00143 #define st_init_numtable_with_size onig_st_init_numtable_with_size 00144 #define st_init_strtable onig_st_init_strtable 00145 #define st_init_strtable_with_size onig_st_init_strtable_with_size 00146 #define st_delete onig_st_delete 00147 #define st_delete_safe onig_st_delete_safe 00148 #define st_insert onig_st_insert 00149 #define st_lookup onig_st_lookup 00150 #define st_foreach onig_st_foreach 00151 #define st_add_direct onig_st_add_direct 00152 #define st_free_table onig_st_free_table 00153 #define st_cleanup_safe onig_st_cleanup_safe 00154 #define st_copy onig_st_copy 00155 #define st_nothing_key_clone onig_st_nothing_key_clone 00156 #define st_nothing_key_free onig_st_nothing_key_free 00157 /* */ 00158 #define onig_st_is_member st_is_member 00159 00160 #define CHECK_INTERRUPT_IN_MATCH_AT 00161 00162 #endif 00163 00164 #define STATE_CHECK_STRING_THRESHOLD_LEN 7 00165 #define STATE_CHECK_BUFF_MAX_SIZE 0x4000 00166 00167 #define THREAD_PASS_LIMIT_COUNT 8 00168 #define xmemset memset 00169 #define xmemcpy memcpy 00170 #define xmemmove memmove 00171 00172 #if defined(_WIN32) && !defined(__GNUC__) 00173 #define xalloca _alloca 00174 #define xvsnprintf _vsnprintf 00175 #else 00176 #define xalloca alloca 00177 #define xvsnprintf vsnprintf 00178 #endif 00179 00180 00181 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) 00182 #define ONIG_STATE_INC(reg) (reg)->state++ 00183 #define ONIG_STATE_DEC(reg) (reg)->state-- 00184 00185 #define ONIG_STATE_INC_THREAD(reg) do {\ 00186 THREAD_ATOMIC_START;\ 00187 (reg)->state++;\ 00188 THREAD_ATOMIC_END;\ 00189 } while(0) 00190 #define ONIG_STATE_DEC_THREAD(reg) do {\ 00191 THREAD_ATOMIC_START;\ 00192 (reg)->state--;\ 00193 THREAD_ATOMIC_END;\ 00194 } while(0) 00195 #else 00196 #define ONIG_STATE_INC(reg) /* Nothing */ 00197 #define ONIG_STATE_DEC(reg) /* Nothing */ 00198 #define ONIG_STATE_INC_THREAD(reg) /* Nothing */ 00199 #define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ 00200 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ 00201 00202 #ifdef HAVE_STDLIB_H 00203 #include <stdlib.h> 00204 #endif 00205 00206 #if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__)) 00207 #include <alloca.h> 00208 #endif 00209 00210 #ifdef HAVE_STRING_H 00211 # include <string.h> 00212 #else 00213 # include <strings.h> 00214 #endif 00215 00216 #include <ctype.h> 00217 #ifdef HAVE_SYS_TYPES_H 00218 #include <sys/types.h> 00219 #endif 00220 00221 #ifdef ONIG_DEBUG 00222 # include <stdio.h> 00223 #endif 00224 00225 #include "regenc.h" 00226 00227 #if defined __GNUC__ && __GNUC__ >= 4 00228 #pragma GCC visibility push(default) 00229 #endif 00230 00231 #ifdef MIN 00232 #undef MIN 00233 #endif 00234 #ifdef MAX 00235 #undef MAX 00236 #endif 00237 #define MIN(a,b) (((a)>(b))?(b):(a)) 00238 #define MAX(a,b) (((a)<(b))?(b):(a)) 00239 00240 #define IS_NULL(p) (((void*)(p)) == (void*)0) 00241 #define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) 00242 #define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL 00243 #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY 00244 #define NULL_UCHARP ((UChar* )0) 00245 00246 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00247 00248 #define PLATFORM_GET_INC(val,p,type) do{\ 00249 val = *(type* )p;\ 00250 (p) += sizeof(type);\ 00251 } while(0) 00252 00253 #else 00254 00255 #define PLATFORM_GET_INC(val,p,type) do{\ 00256 xmemcpy(&val, (p), sizeof(type));\ 00257 (p) += sizeof(type);\ 00258 } while(0) 00259 00260 /* sizeof(OnigCodePoint) */ 00261 #define WORD_ALIGNMENT_SIZE SIZEOF_LONG 00262 00263 #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ 00264 (pad_size) = WORD_ALIGNMENT_SIZE \ 00265 - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ 00266 if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ 00267 } while (0) 00268 00269 #define ALIGNMENT_RIGHT(addr) do {\ 00270 (addr) += (WORD_ALIGNMENT_SIZE - 1);\ 00271 (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ 00272 } while (0) 00273 00274 #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ 00275 00276 /* stack pop level */ 00277 #define STACK_POP_LEVEL_FREE 0 00278 #define STACK_POP_LEVEL_MEM_START 1 00279 #define STACK_POP_LEVEL_ALL 2 00280 00281 /* optimize flags */ 00282 #define ONIG_OPTIMIZE_NONE 0 00283 #define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ 00284 #define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ 00285 #define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ 00286 #define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ 00287 #define ONIG_OPTIMIZE_MAP 5 /* char map */ 00288 00289 /* bit status */ 00290 typedef unsigned int BitStatusType; 00291 00292 #define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) 00293 #define BIT_STATUS_CLEAR(stats) (stats) = 0 00294 #define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) 00295 #define BIT_STATUS_AT(stats,n) \ 00296 ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) 00297 00298 #define BIT_STATUS_ON_AT(stats,n) do {\ 00299 if ((n) < (int )BIT_STATUS_BITS_NUM) \ 00300 (stats) |= (1 << (n));\ 00301 else\ 00302 (stats) |= 1;\ 00303 } while (0) 00304 00305 #define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ 00306 if ((n) < (int )BIT_STATUS_BITS_NUM)\ 00307 (stats) |= (1 << (n));\ 00308 } while (0) 00309 00310 00311 #define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) 00312 00313 #define DIGITVAL(code) ((code) - '0') 00314 #define ODIGITVAL(code) DIGITVAL(code) 00315 #define XDIGITVAL(enc,code) \ 00316 (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ 00317 : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) 00318 00319 #define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) 00320 #define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) 00321 #define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) 00322 #define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) 00323 #define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) 00324 #define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) 00325 #define IS_FIND_CONDITION(option) ((option) & \ 00326 (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) 00327 #define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) 00328 #define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) 00329 #define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) 00330 00331 /* OP_SET_OPTION is required for these options. 00332 #define IS_DYNAMIC_OPTION(option) \ 00333 (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) 00334 */ 00335 /* ignore-case and multibyte status are included in compiled code. */ 00336 #define IS_DYNAMIC_OPTION(option) 0 00337 00338 #define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ 00339 ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) 00340 00341 #define REPEAT_INFINITE -1 00342 #define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) 00343 00344 /* bitset */ 00345 #define BITS_PER_BYTE 8 00346 #define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) 00347 #define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) 00348 #define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) 00349 00350 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00351 typedef unsigned int Bits; 00352 #else 00353 typedef unsigned char Bits; 00354 #endif 00355 typedef Bits BitSet[BITSET_SIZE]; 00356 typedef Bits* BitSetRef; 00357 00358 #define SIZE_BITSET (int)sizeof(BitSet) 00359 00360 #define BITSET_CLEAR(bs) do {\ 00361 int i;\ 00362 for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ 00363 } while (0) 00364 00365 #define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] 00366 #define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM)) 00367 00368 #define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) 00369 #define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) 00370 #define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) 00371 #define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) 00372 00373 /* bytes buffer */ 00374 typedef struct _BBuf { 00375 UChar* p; 00376 unsigned int used; 00377 unsigned int alloc; 00378 } BBuf; 00379 00380 #define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) 00381 00382 #define BBUF_SIZE_INC(buf,inc) do{\ 00383 (buf)->alloc += (inc);\ 00384 (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ 00385 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ 00386 } while (0) 00387 00388 #define BBUF_EXPAND(buf,low) do{\ 00389 do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ 00390 (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ 00391 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ 00392 } while (0) 00393 00394 #define BBUF_ENSURE_SIZE(buf,size) do{\ 00395 unsigned int new_alloc = (buf)->alloc;\ 00396 while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ 00397 if ((buf)->alloc != new_alloc) {\ 00398 (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ 00399 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ 00400 (buf)->alloc = new_alloc;\ 00401 }\ 00402 } while (0) 00403 00404 #define BBUF_WRITE(buf,pos,bytes,n) do{\ 00405 int used = (pos) + (int)(n);\ 00406 if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ 00407 xmemcpy((buf)->p + (pos), (bytes), (n));\ 00408 if ((buf)->used < (unsigned int )used) (buf)->used = used;\ 00409 } while (0) 00410 00411 #define BBUF_WRITE1(buf,pos,byte) do{\ 00412 int used = (pos) + 1;\ 00413 if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ 00414 (buf)->p[(pos)] = (byte);\ 00415 if ((buf)->used < (unsigned int )used) (buf)->used = used;\ 00416 } while (0) 00417 00418 #define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) 00419 #define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) 00420 #define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) 00421 #define BBUF_GET_OFFSET_POS(buf) ((buf)->used) 00422 00423 /* from < to */ 00424 #define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ 00425 if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ 00426 xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ 00427 if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ 00428 } while (0) 00429 00430 /* from > to */ 00431 #define BBUF_MOVE_LEFT(buf,from,to,n) do {\ 00432 xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ 00433 } while (0) 00434 00435 /* from > to */ 00436 #define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ 00437 xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ 00438 (buf)->used -= (from - to);\ 00439 } while (0) 00440 00441 #define BBUF_INSERT(buf,pos,bytes,n) do {\ 00442 if (pos >= (buf)->used) {\ 00443 BBUF_WRITE(buf,pos,bytes,n);\ 00444 }\ 00445 else {\ 00446 BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ 00447 xmemcpy((buf)->p + (pos), (bytes), (n));\ 00448 }\ 00449 } while (0) 00450 00451 #define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] 00452 00453 00454 #define ANCHOR_BEGIN_BUF (1<<0) 00455 #define ANCHOR_BEGIN_LINE (1<<1) 00456 #define ANCHOR_BEGIN_POSITION (1<<2) 00457 #define ANCHOR_END_BUF (1<<3) 00458 #define ANCHOR_SEMI_END_BUF (1<<4) 00459 #define ANCHOR_END_LINE (1<<5) 00460 00461 #define ANCHOR_WORD_BOUND (1<<6) 00462 #define ANCHOR_NOT_WORD_BOUND (1<<7) 00463 #define ANCHOR_WORD_BEGIN (1<<8) 00464 #define ANCHOR_WORD_END (1<<9) 00465 #define ANCHOR_PREC_READ (1<<10) 00466 #define ANCHOR_PREC_READ_NOT (1<<11) 00467 #define ANCHOR_LOOK_BEHIND (1<<12) 00468 #define ANCHOR_LOOK_BEHIND_NOT (1<<13) 00469 00470 #define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ 00471 #define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ 00472 00473 /* operation code */ 00474 enum OpCode { 00475 OP_FINISH = 0, /* matching process terminator (no more alternative) */ 00476 OP_END = 1, /* pattern code terminator (success end) */ 00477 00478 OP_EXACT1 = 2, /* single byte, N = 1 */ 00479 OP_EXACT2, /* single byte, N = 2 */ 00480 OP_EXACT3, /* single byte, N = 3 */ 00481 OP_EXACT4, /* single byte, N = 4 */ 00482 OP_EXACT5, /* single byte, N = 5 */ 00483 OP_EXACTN, /* single byte */ 00484 OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ 00485 OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ 00486 OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ 00487 OP_EXACTMB2N, /* mb-length = 2 */ 00488 OP_EXACTMB3N, /* mb-length = 3 */ 00489 OP_EXACTMBN, /* other length */ 00490 00491 OP_EXACT1_IC, /* single byte, N = 1, ignore case */ 00492 OP_EXACTN_IC, /* single byte, ignore case */ 00493 00494 OP_CCLASS, 00495 OP_CCLASS_MB, 00496 OP_CCLASS_MIX, 00497 OP_CCLASS_NOT, 00498 OP_CCLASS_MB_NOT, 00499 OP_CCLASS_MIX_NOT, 00500 OP_CCLASS_NODE, /* pointer to CClassNode node */ 00501 00502 OP_ANYCHAR, /* "." */ 00503 OP_ANYCHAR_ML, /* "." multi-line */ 00504 OP_ANYCHAR_STAR, /* ".*" */ 00505 OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ 00506 OP_ANYCHAR_STAR_PEEK_NEXT, 00507 OP_ANYCHAR_ML_STAR_PEEK_NEXT, 00508 00509 OP_WORD, 00510 OP_NOT_WORD, 00511 OP_WORD_BOUND, 00512 OP_NOT_WORD_BOUND, 00513 OP_WORD_BEGIN, 00514 OP_WORD_END, 00515 00516 OP_BEGIN_BUF, 00517 OP_END_BUF, 00518 OP_BEGIN_LINE, 00519 OP_END_LINE, 00520 OP_SEMI_END_BUF, 00521 OP_BEGIN_POSITION, 00522 00523 OP_BACKREF1, 00524 OP_BACKREF2, 00525 OP_BACKREFN, 00526 OP_BACKREFN_IC, 00527 OP_BACKREF_MULTI, 00528 OP_BACKREF_MULTI_IC, 00529 OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ 00530 00531 OP_MEMORY_START, 00532 OP_MEMORY_START_PUSH, /* push back-tracker to stack */ 00533 OP_MEMORY_END_PUSH, /* push back-tracker to stack */ 00534 OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ 00535 OP_MEMORY_END, 00536 OP_MEMORY_END_REC, /* push marker to stack */ 00537 00538 OP_FAIL, /* pop stack and move */ 00539 OP_JUMP, 00540 OP_PUSH, 00541 OP_POP, 00542 OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ 00543 OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ 00544 OP_REPEAT, /* {n,m} */ 00545 OP_REPEAT_NG, /* {n,m}? (non greedy) */ 00546 OP_REPEAT_INC, 00547 OP_REPEAT_INC_NG, /* non greedy */ 00548 OP_REPEAT_INC_SG, /* search and get in stack */ 00549 OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ 00550 OP_NULL_CHECK_START, /* null loop checker start */ 00551 OP_NULL_CHECK_END, /* null loop checker end */ 00552 OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ 00553 OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ 00554 00555 OP_PUSH_POS, /* (?=...) start */ 00556 OP_POP_POS, /* (?=...) end */ 00557 OP_PUSH_POS_NOT, /* (?!...) start */ 00558 OP_FAIL_POS, /* (?!...) end */ 00559 OP_PUSH_STOP_BT, /* (?>...) start */ 00560 OP_POP_STOP_BT, /* (?>...) end */ 00561 OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ 00562 OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */ 00563 OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */ 00564 00565 OP_CALL, /* \g<name> */ 00566 OP_RETURN, 00567 00568 OP_STATE_CHECK_PUSH, /* combination explosion check and push */ 00569 OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ 00570 OP_STATE_CHECK, /* check only */ 00571 OP_STATE_CHECK_ANYCHAR_STAR, 00572 OP_STATE_CHECK_ANYCHAR_ML_STAR, 00573 00574 /* no need: IS_DYNAMIC_OPTION() == 0 */ 00575 OP_SET_OPTION_PUSH, /* set option and push recover option */ 00576 OP_SET_OPTION /* set option */ 00577 }; 00578 00579 typedef int RelAddrType; 00580 typedef int AbsAddrType; 00581 typedef int LengthType; 00582 typedef int RepeatNumType; 00583 typedef short int MemNumType; 00584 typedef short int StateCheckNumType; 00585 typedef void* PointerType; 00586 00587 #define SIZE_OPCODE 1 00588 #define SIZE_RELADDR (int)sizeof(RelAddrType) 00589 #define SIZE_ABSADDR (int)sizeof(AbsAddrType) 00590 #define SIZE_LENGTH (int)sizeof(LengthType) 00591 #define SIZE_MEMNUM (int)sizeof(MemNumType) 00592 #define SIZE_STATE_CHECK_NUM (int)sizeof(StateCheckNumType) 00593 #define SIZE_REPEATNUM (int)sizeof(RepeatNumType) 00594 #define SIZE_OPTION (int)sizeof(OnigOptionType) 00595 #define SIZE_CODE_POINT (int)sizeof(OnigCodePoint) 00596 #define SIZE_POINTER (int)sizeof(PointerType) 00597 00598 00599 #define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) 00600 #define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) 00601 #define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) 00602 #define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) 00603 #define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) 00604 #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) 00605 #define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) 00606 #define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) 00607 00608 /* code point's address must be aligned address. */ 00609 #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) 00610 #define GET_BYTE_INC(byte,p) do{\ 00611 byte = *(p);\ 00612 (p)++;\ 00613 } while(0) 00614 00615 00616 /* op-code + arg size */ 00617 #define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE 00618 #define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) 00619 #define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) 00620 #define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) 00621 #define SIZE_OP_POP SIZE_OPCODE 00622 #define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) 00623 #define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) 00624 #define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) 00625 #define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) 00626 #define SIZE_OP_PUSH_POS SIZE_OPCODE 00627 #define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) 00628 #define SIZE_OP_POP_POS SIZE_OPCODE 00629 #define SIZE_OP_FAIL_POS SIZE_OPCODE 00630 #define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) 00631 #define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) 00632 #define SIZE_OP_FAIL SIZE_OPCODE 00633 #define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) 00634 #define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) 00635 #define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) 00636 #define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) 00637 #define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) 00638 #define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) 00639 #define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE 00640 #define SIZE_OP_POP_STOP_BT SIZE_OPCODE 00641 #define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) 00642 #define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) 00643 #define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) 00644 #define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) 00645 #define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE 00646 #define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) 00647 #define SIZE_OP_RETURN SIZE_OPCODE 00648 00649 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00650 #define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) 00651 #define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) 00652 #define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) 00653 #define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) 00654 #endif 00655 00656 #define MC_ESC(syn) (syn)->meta_char_table.esc 00657 #define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar 00658 #define MC_ANYTIME(syn) (syn)->meta_char_table.anytime 00659 #define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time 00660 #define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time 00661 #define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime 00662 00663 #define IS_MC_ESC_CODE(code, syn) \ 00664 ((code) == MC_ESC(syn) && \ 00665 !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) 00666 00667 00668 #define SYN_POSIX_COMMON_OP \ 00669 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ 00670 ONIG_SYN_OP_DECIMAL_BACKREF | \ 00671 ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ 00672 ONIG_SYN_OP_LINE_ANCHOR | \ 00673 ONIG_SYN_OP_ESC_CONTROL_CHARS ) 00674 00675 #define SYN_GNU_REGEX_OP \ 00676 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ 00677 ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ 00678 ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ 00679 ONIG_SYN_OP_VBAR_ALT | \ 00680 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ 00681 ONIG_SYN_OP_QMARK_ZERO_ONE | \ 00682 ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ 00683 ONIG_SYN_OP_ESC_W_WORD | \ 00684 ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ 00685 ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ 00686 ONIG_SYN_OP_LINE_ANCHOR ) 00687 00688 #define SYN_GNU_REGEX_BV \ 00689 ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ 00690 ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ 00691 ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) 00692 00693 00694 #define NCCLASS_FLAGS(cc) ((cc)->flags) 00695 #define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) 00696 #define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) 00697 #define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) 00698 00699 /* cclass node */ 00700 #define FLAG_NCCLASS_NOT (1<<0) 00701 #define FLAG_NCCLASS_SHARE (1<<1) 00702 00703 #define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) 00704 #define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) 00705 #define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) 00706 #define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) 00707 #define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) 00708 00709 typedef struct { 00710 int type; 00711 /* struct _Node* next; */ 00712 /* unsigned int flags; */ 00713 } NodeBase; 00714 00715 typedef struct { 00716 NodeBase base; 00717 unsigned int flags; 00718 BitSet bs; 00719 BBuf* mbuf; /* multi-byte info or NULL */ 00720 } CClassNode; 00721 00722 typedef intptr_t OnigStackIndex; 00723 00724 typedef struct _OnigStackType { 00725 unsigned int type; 00726 union { 00727 struct { 00728 UChar *pcode; /* byte code position */ 00729 UChar *pstr; /* string position */ 00730 UChar *pstr_prev; /* previous char position of pstr */ 00731 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00732 unsigned int state_check; 00733 #endif 00734 } state; 00735 struct { 00736 int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ 00737 UChar *pcode; /* byte code position (head of repeated target) */ 00738 int num; /* repeat id */ 00739 } repeat; 00740 struct { 00741 OnigStackIndex si; /* index of stack */ 00742 } repeat_inc; 00743 struct { 00744 int num; /* memory num */ 00745 UChar *pstr; /* start/end position */ 00746 /* Following information is setted, if this stack type is MEM-START */ 00747 OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ 00748 OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ 00749 } mem; 00750 struct { 00751 int num; /* null check id */ 00752 UChar *pstr; /* start position */ 00753 } null_check; 00754 #ifdef USE_SUBEXP_CALL 00755 struct { 00756 UChar *ret_addr; /* byte code position */ 00757 int num; /* null check id */ 00758 UChar *pstr; /* string position */ 00759 } call_frame; 00760 #endif 00761 } u; 00762 } OnigStackType; 00763 00764 typedef struct { 00765 void* stack_p; 00766 size_t stack_n; 00767 OnigOptionType options; 00768 OnigRegion* region; 00769 const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ 00770 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 00771 int best_len; /* for ONIG_OPTION_FIND_LONGEST */ 00772 UChar* best_s; 00773 #endif 00774 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00775 void* state_check_buff; 00776 int state_check_buff_size; 00777 #endif 00778 } OnigMatchArg; 00779 00780 00781 #define IS_CODE_SB_WORD(enc,code) \ 00782 (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) 00783 00784 #ifdef ONIG_DEBUG 00785 00786 typedef struct { 00787 short int opcode; 00788 const char* name; 00789 short int arg_type; 00790 } OnigOpInfoType; 00791 00792 extern OnigOpInfoType OnigOpInfo[]; 00793 00794 /* extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc)); */ 00795 00796 #ifdef ONIG_DEBUG_STATISTICS 00797 extern void onig_statistics_init P_((void)); 00798 extern void onig_print_statistics P_((FILE* f)); 00799 #endif 00800 #endif 00801 00802 extern UChar* onig_error_code_to_format P_((int code)); 00803 extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); 00804 extern int onig_bbuf_init P_((BBuf* buf, OnigDistance size)); 00805 extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline)); 00806 extern void onig_chain_reduce P_((regex_t* reg)); 00807 extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); 00808 extern void onig_transfer P_((regex_t* to, regex_t* from)); 00809 extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); 00810 extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); 00811 00812 /* strend hash */ 00813 typedef void hash_table_type; 00814 #ifdef RUBY 00815 #include "ruby/st.h" 00816 typedef st_data_t hash_data_type; 00817 #else 00818 typedef unsigned long hash_data_type; 00819 #endif 00820 00821 extern hash_table_type* onig_st_init_strend_table_with_size P_((st_index_t size)); 00822 extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); 00823 extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); 00824 00825 /* encoding property management */ 00826 #define PROPERTY_LIST_ADD_PROP(Name, CR) \ 00827 r = onigenc_property_list_add_property((UChar* )Name, CR,\ 00828 &PropertyNameTable, &PropertyList, &PropertyListNum,\ 00829 &PropertyListSize);\ 00830 if (r != 0) goto end 00831 00832 #define PROPERTY_LIST_INIT_CHECK \ 00833 if (PropertyInited == 0) {\ 00834 int r = onigenc_property_list_init(init_property_list);\ 00835 if (r != 0) return r;\ 00836 } 00837 00838 extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); 00839 00840 typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); 00841 00842 extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)); 00843 00844 extern size_t onig_memsize P_((const regex_t *reg)); 00845 00846 #if defined __GNUC__ && __GNUC__ >= 4 00847 #pragma GCC visibility pop 00848 #endif 00849 00850 #endif /* ONIGURUMA_REGINT_H */ 00851