Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /********************************************************************** 00002 regsyntax.c - Oniguruma (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * All rights reserved. 00007 * 00008 * Redistribution and use in source and binary forms, with or without 00009 * modification, are permitted provided that the following conditions 00010 * are met: 00011 * 1. Redistributions of source code must retain the above copyright 00012 * notice, this list of conditions and the following disclaimer. 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in the 00015 * documentation and/or other materials provided with the distribution. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00018 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00020 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00021 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00022 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00023 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00024 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00025 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00026 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00027 * SUCH DAMAGE. 00028 */ 00029 00030 #include "regint.h" 00031 00032 const OnigSyntaxType OnigSyntaxASIS = { 00033 0 00034 , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE 00035 , 0 00036 , ONIG_OPTION_NONE 00037 , 00038 { 00039 (OnigCodePoint )'\\' /* esc */ 00040 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00041 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00042 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00043 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00044 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00045 } 00046 }; 00047 00048 const OnigSyntaxType OnigSyntaxPosixBasic = { 00049 ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | 00050 ONIG_SYN_OP_ESC_BRACE_INTERVAL ) 00051 , 0 00052 , 0 00053 , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) 00054 , 00055 { 00056 (OnigCodePoint )'\\' /* esc */ 00057 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00058 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00059 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00060 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00061 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00062 } 00063 }; 00064 00065 const OnigSyntaxType OnigSyntaxPosixExtended = { 00066 ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | 00067 ONIG_SYN_OP_BRACE_INTERVAL | 00068 ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) 00069 , 0 00070 , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | 00071 ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | 00072 ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | 00073 ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) 00074 , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) 00075 , 00076 { 00077 (OnigCodePoint )'\\' /* esc */ 00078 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00079 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00080 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00081 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00082 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00083 } 00084 }; 00085 00086 const OnigSyntaxType OnigSyntaxEmacs = { 00087 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | 00088 ONIG_SYN_OP_ESC_BRACE_INTERVAL | 00089 ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | 00090 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | 00091 ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | 00092 ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) 00093 , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR 00094 , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC 00095 , ONIG_OPTION_NONE 00096 , 00097 { 00098 (OnigCodePoint )'\\' /* esc */ 00099 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00100 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00101 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00102 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00103 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00104 } 00105 }; 00106 00107 const OnigSyntaxType OnigSyntaxGrep = { 00108 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | 00109 ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | 00110 ONIG_SYN_OP_ESC_VBAR_ALT | 00111 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | 00112 ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | 00113 ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | 00114 ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) 00115 , 0 00116 , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) 00117 , ONIG_OPTION_NONE 00118 , 00119 { 00120 (OnigCodePoint )'\\' /* esc */ 00121 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00122 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00123 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00124 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00125 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00126 } 00127 }; 00128 00129 const OnigSyntaxType OnigSyntaxGnuRegex = { 00130 SYN_GNU_REGEX_OP 00131 , 0 00132 , SYN_GNU_REGEX_BV 00133 , ONIG_OPTION_NONE 00134 , 00135 { 00136 (OnigCodePoint )'\\' /* esc */ 00137 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00138 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00139 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00140 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00141 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00142 } 00143 }; 00144 00145 const OnigSyntaxType OnigSyntaxJava = { 00146 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | 00147 ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | 00148 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) 00149 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) 00150 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | 00151 ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | 00152 ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | 00153 ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | 00154 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) 00155 , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) 00156 , ONIG_OPTION_SINGLELINE 00157 , 00158 { 00159 (OnigCodePoint )'\\' /* esc */ 00160 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00161 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00162 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00163 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00164 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00165 } 00166 }; 00167 00168 const OnigSyntaxType OnigSyntaxPerl = { 00169 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | 00170 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | 00171 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | 00172 ONIG_SYN_OP_ESC_C_CONTROL ) 00173 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) 00174 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | 00175 ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | 00176 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | 00177 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) 00178 , SYN_GNU_REGEX_BV 00179 , ONIG_OPTION_SINGLELINE 00180 , 00181 { 00182 (OnigCodePoint )'\\' /* esc */ 00183 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00184 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00185 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00186 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00187 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00188 } 00189 }; 00190 00191 /* Perl + named group */ 00192 const OnigSyntaxType OnigSyntaxPerl_NG = { 00193 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | 00194 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | 00195 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | 00196 ONIG_SYN_OP_ESC_C_CONTROL ) 00197 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) 00198 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | 00199 ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | 00200 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | 00201 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | 00202 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | 00203 ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | 00204 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) 00205 , ( SYN_GNU_REGEX_BV | 00206 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | 00207 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) 00208 , ONIG_OPTION_SINGLELINE 00209 , 00210 { 00211 (OnigCodePoint )'\\' /* esc */ 00212 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ 00213 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ 00214 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ 00215 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ 00216 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ 00217 } 00218 }; 00219 00220 00221 00222 extern int 00223 onig_set_default_syntax(const OnigSyntaxType* syntax) 00224 { 00225 if (IS_NULL(syntax)) 00226 syntax = ONIG_SYNTAX_RUBY; 00227 00228 OnigDefaultSyntax = syntax; 00229 return 0; 00230 } 00231 00232 extern void 00233 onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from) 00234 { 00235 *to = *from; 00236 } 00237 00238 extern void 00239 onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) 00240 { 00241 syntax->op = op; 00242 } 00243 00244 extern void 00245 onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) 00246 { 00247 syntax->op2 = op2; 00248 } 00249 00250 extern void 00251 onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) 00252 { 00253 syntax->behavior = behavior; 00254 } 00255 00256 extern void 00257 onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) 00258 { 00259 syntax->options = options; 00260 } 00261 00262 extern unsigned int 00263 onig_get_syntax_op(OnigSyntaxType* syntax) 00264 { 00265 return syntax->op; 00266 } 00267 00268 extern unsigned int 00269 onig_get_syntax_op2(OnigSyntaxType* syntax) 00270 { 00271 return syntax->op2; 00272 } 00273 00274 extern unsigned int 00275 onig_get_syntax_behavior(OnigSyntaxType* syntax) 00276 { 00277 return syntax->behavior; 00278 } 00279 00280 extern OnigOptionType 00281 onig_get_syntax_options(OnigSyntaxType* syntax) 00282 { 00283 return syntax->options; 00284 } 00285 00286 #ifdef USE_VARIABLE_META_CHARS 00287 extern int onig_set_meta_char(OnigSyntaxType* enc, 00288 unsigned int what, OnigCodePoint code) 00289 { 00290 switch (what) { 00291 case ONIG_META_CHAR_ESCAPE: 00292 enc->meta_char_table.esc = code; 00293 break; 00294 case ONIG_META_CHAR_ANYCHAR: 00295 enc->meta_char_table.anychar = code; 00296 break; 00297 case ONIG_META_CHAR_ANYTIME: 00298 enc->meta_char_table.anytime = code; 00299 break; 00300 case ONIG_META_CHAR_ZERO_OR_ONE_TIME: 00301 enc->meta_char_table.zero_or_one_time = code; 00302 break; 00303 case ONIG_META_CHAR_ONE_OR_MORE_TIME: 00304 enc->meta_char_table.one_or_more_time = code; 00305 break; 00306 case ONIG_META_CHAR_ANYCHAR_ANYTIME: 00307 enc->meta_char_table.anychar_anytime = code; 00308 break; 00309 default: 00310 return ONIGERR_INVALID_ARGUMENT; 00311 break; 00312 } 00313 return 0; 00314 } 00315 #endif /* USE_VARIABLE_META_CHARS */ 00316