Ruby 1.9.3p327(2012-11-10revision37606)
regsyntax.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regsyntax.c -  Oniguruma (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include "regint.h"
00031 
00032 const OnigSyntaxType OnigSyntaxASIS = {
00033     0
00034   , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
00035   , 0
00036   , ONIG_OPTION_NONE
00037   ,
00038   {
00039       (OnigCodePoint )'\\'                       /* esc */
00040     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00041     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00042     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00043     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00044     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00045   }
00046 };
00047 
00048 const OnigSyntaxType OnigSyntaxPosixBasic = {
00049   ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
00050     ONIG_SYN_OP_ESC_BRACE_INTERVAL )
00051   , 0
00052   , 0
00053   , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
00054   ,
00055   {
00056       (OnigCodePoint )'\\'                       /* esc */
00057     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00058     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00059     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00060     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00061     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00062   }
00063 };
00064 
00065 const OnigSyntaxType OnigSyntaxPosixExtended = {
00066   ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
00067     ONIG_SYN_OP_BRACE_INTERVAL |
00068     ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
00069   , 0
00070   , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
00071       ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
00072       ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
00073       ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
00074   , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
00075   ,
00076   {
00077       (OnigCodePoint )'\\'                       /* esc */
00078     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00079     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00080     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00081     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00082     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00083   }
00084 };
00085 
00086 const OnigSyntaxType OnigSyntaxEmacs = {
00087   ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
00088     ONIG_SYN_OP_ESC_BRACE_INTERVAL |
00089     ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
00090     ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
00091     ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
00092     ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
00093   , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
00094   , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
00095   , ONIG_OPTION_NONE
00096   ,
00097   {
00098       (OnigCodePoint )'\\'                       /* esc */
00099     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00100     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00101     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00102     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00103     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00104   }
00105 };
00106 
00107 const OnigSyntaxType OnigSyntaxGrep = {
00108   ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
00109     ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
00110     ONIG_SYN_OP_ESC_VBAR_ALT |
00111     ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
00112     ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
00113     ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
00114     ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
00115   , 0
00116   , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
00117   , ONIG_OPTION_NONE
00118   ,
00119   {
00120       (OnigCodePoint )'\\'                       /* esc */
00121     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00122     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00123     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00124     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00125     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00126   }
00127 };
00128 
00129 const OnigSyntaxType OnigSyntaxGnuRegex = {
00130   SYN_GNU_REGEX_OP
00131   , 0
00132   , SYN_GNU_REGEX_BV
00133   , ONIG_OPTION_NONE
00134   ,
00135   {
00136       (OnigCodePoint )'\\'                       /* esc */
00137     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00138     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00139     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00140     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00141     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00142   }
00143 };
00144 
00145 const OnigSyntaxType OnigSyntaxJava = {
00146   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
00147      ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
00148      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
00149    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
00150   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
00151       ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
00152       ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
00153       ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
00154       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
00155   , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
00156   , ONIG_OPTION_SINGLELINE
00157   ,
00158   {
00159       (OnigCodePoint )'\\'                       /* esc */
00160     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00161     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00162     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00163     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00164     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00165   }
00166 };
00167 
00168 const OnigSyntaxType OnigSyntaxPerl = {
00169   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
00170      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
00171      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
00172      ONIG_SYN_OP_ESC_C_CONTROL )
00173    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
00174   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
00175       ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
00176       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
00177       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT )
00178   , SYN_GNU_REGEX_BV
00179   , ONIG_OPTION_SINGLELINE
00180   ,
00181   {
00182       (OnigCodePoint )'\\'                       /* esc */
00183     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00184     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00185     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00186     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00187     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00188   }
00189 };
00190 
00191 /* Perl + named group */
00192 const OnigSyntaxType OnigSyntaxPerl_NG = {
00193   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
00194      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
00195      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
00196      ONIG_SYN_OP_ESC_C_CONTROL )
00197    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
00198   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
00199       ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
00200       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
00201       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
00202       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       |
00203       ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        |
00204       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
00205   , ( SYN_GNU_REGEX_BV |
00206       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
00207       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
00208   , ONIG_OPTION_SINGLELINE
00209   ,
00210   {
00211       (OnigCodePoint )'\\'                       /* esc */
00212     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00213     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00214     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00215     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00216     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00217   }
00218 };
00219 
00220 
00221 
00222 extern int
00223 onig_set_default_syntax(const OnigSyntaxType* syntax)
00224 {
00225   if (IS_NULL(syntax))
00226     syntax = ONIG_SYNTAX_RUBY;
00227 
00228   OnigDefaultSyntax = syntax;
00229   return 0;
00230 }
00231 
00232 extern void
00233 onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from)
00234 {
00235   *to = *from;
00236 }
00237 
00238 extern void
00239 onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
00240 {
00241   syntax->op = op;
00242 }
00243 
00244 extern void
00245 onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
00246 {
00247   syntax->op2 = op2;
00248 }
00249 
00250 extern void
00251 onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
00252 {
00253   syntax->behavior = behavior;
00254 }
00255 
00256 extern void
00257 onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
00258 {
00259   syntax->options = options;
00260 }
00261 
00262 extern unsigned int
00263 onig_get_syntax_op(OnigSyntaxType* syntax)
00264 {
00265   return syntax->op;
00266 }
00267 
00268 extern unsigned int
00269 onig_get_syntax_op2(OnigSyntaxType* syntax)
00270 {
00271   return syntax->op2;
00272 }
00273 
00274 extern unsigned int
00275 onig_get_syntax_behavior(OnigSyntaxType* syntax)
00276 {
00277   return syntax->behavior;
00278 }
00279 
00280 extern OnigOptionType
00281 onig_get_syntax_options(OnigSyntaxType* syntax)
00282 {
00283   return syntax->options;
00284 }
00285 
00286 #ifdef USE_VARIABLE_META_CHARS
00287 extern int onig_set_meta_char(OnigSyntaxType* enc,
00288                               unsigned int what, OnigCodePoint code)
00289 {
00290   switch (what) {
00291   case ONIG_META_CHAR_ESCAPE:
00292     enc->meta_char_table.esc = code;
00293     break;
00294   case ONIG_META_CHAR_ANYCHAR:
00295     enc->meta_char_table.anychar = code;
00296     break;
00297   case ONIG_META_CHAR_ANYTIME:
00298     enc->meta_char_table.anytime = code;
00299     break;
00300   case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
00301     enc->meta_char_table.zero_or_one_time = code;
00302     break;
00303   case ONIG_META_CHAR_ONE_OR_MORE_TIME:
00304     enc->meta_char_table.one_or_more_time = code;
00305     break;
00306   case ONIG_META_CHAR_ANYCHAR_ANYTIME:
00307     enc->meta_char_table.anychar_anytime = code;
00308     break;
00309   default:
00310     return ONIGERR_INVALID_ARGUMENT;
00311     break;
00312   }
00313   return 0;
00314 }
00315 #endif /* USE_VARIABLE_META_CHARS */
00316