Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /********************************************************************** 00002 regenc.c - Oniguruma (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * All rights reserved. 00007 * 00008 * Redistribution and use in source and binary forms, with or without 00009 * modification, are permitted provided that the following conditions 00010 * are met: 00011 * 1. Redistributions of source code must retain the above copyright 00012 * notice, this list of conditions and the following disclaimer. 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in the 00015 * documentation and/or other materials provided with the distribution. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00018 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00020 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00021 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00022 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00023 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00024 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00025 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00026 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00027 * SUCH DAMAGE. 00028 */ 00029 00030 #include "regint.h" 00031 00032 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; 00033 00034 extern int 00035 onigenc_init(void) 00036 { 00037 return 0; 00038 } 00039 00040 extern OnigEncoding 00041 onigenc_get_default_encoding(void) 00042 { 00043 return OnigEncDefaultCharEncoding; 00044 } 00045 00046 extern int 00047 onigenc_set_default_encoding(OnigEncoding enc) 00048 { 00049 OnigEncDefaultCharEncoding = enc; 00050 return 0; 00051 } 00052 00053 extern int 00054 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) 00055 { 00056 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); 00057 if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) 00058 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); 00059 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) 00060 return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); 00061 return 1; 00062 } 00063 00064 extern UChar* 00065 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 00066 { 00067 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 00068 if (p < s) { 00069 p += enclen(enc, p, end); 00070 } 00071 return p; 00072 } 00073 00074 extern UChar* 00075 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, 00076 const UChar* start, const UChar* s, const UChar* end, const UChar** prev) 00077 { 00078 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 00079 00080 if (p < s) { 00081 if (prev) *prev = (const UChar* )p; 00082 p += enclen(enc, p, end); 00083 } 00084 else { 00085 if (prev) *prev = (const UChar* )NULL; /* Sorry */ 00086 } 00087 return p; 00088 } 00089 00090 extern UChar* 00091 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 00092 { 00093 if (s <= start) 00094 return (UChar* )NULL; 00095 00096 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); 00097 } 00098 00099 extern UChar* 00100 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n) 00101 { 00102 while (ONIG_IS_NOT_NULL(s) && n-- > 0) { 00103 if (s <= start) 00104 return (UChar* )NULL; 00105 00106 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); 00107 } 00108 return (UChar* )s; 00109 } 00110 00111 extern UChar* 00112 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) 00113 { 00114 UChar* q = (UChar* )p; 00115 while (n-- > 0) { 00116 q += ONIGENC_MBC_ENC_LEN(enc, q, end); 00117 } 00118 return (q <= end ? q : NULL); 00119 } 00120 00121 extern int 00122 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) 00123 { 00124 int n = 0; 00125 UChar* q = (UChar* )p; 00126 00127 while (q < end) { 00128 q += ONIGENC_MBC_ENC_LEN(enc, q, end); 00129 n++; 00130 } 00131 return n; 00132 } 00133 00134 extern int 00135 onigenc_strlen_null(OnigEncoding enc, const UChar* s) 00136 { 00137 int n = 0; 00138 UChar* p = (UChar* )s; 00139 UChar* e; 00140 00141 while (1) { 00142 if (*p == '\0') { 00143 UChar* q; 00144 int len = ONIGENC_MBC_MINLEN(enc); 00145 00146 if (len == 1) return n; 00147 q = p + 1; 00148 while (len > 1) { 00149 if (*q != '\0') break; 00150 q++; 00151 len--; 00152 } 00153 if (len == 1) return n; 00154 } 00155 e = p + ONIGENC_MBC_MAXLEN(enc); 00156 p += ONIGENC_MBC_ENC_LEN(enc, p, e); 00157 n++; 00158 } 00159 } 00160 00161 extern int 00162 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) 00163 { 00164 UChar* start = (UChar* )s; 00165 UChar* p = (UChar* )s; 00166 UChar* e; 00167 00168 while (1) { 00169 if (*p == '\0') { 00170 UChar* q; 00171 int len = ONIGENC_MBC_MINLEN(enc); 00172 00173 if (len == 1) return (int )(p - start); 00174 q = p + 1; 00175 while (len > 1) { 00176 if (*q != '\0') break; 00177 q++; 00178 len--; 00179 } 00180 if (len == 1) return (int )(p - start); 00181 } 00182 e = p + ONIGENC_MBC_MAXLEN(enc); 00183 p += ONIGENC_MBC_ENC_LEN(enc, p, e); 00184 } 00185 } 00186 00187 const UChar OnigEncAsciiToLowerCaseTable[] = { 00188 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 00189 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 00190 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 00191 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 00192 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 00193 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 00194 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 00195 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 00196 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 00197 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 00198 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 00199 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 00200 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 00201 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 00202 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 00203 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 00204 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 00205 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 00206 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 00207 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 00208 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 00209 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 00210 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 00211 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 00212 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 00213 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 00214 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 00215 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 00216 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 00217 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 00218 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 00219 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', 00220 }; 00221 00222 #ifdef USE_UPPER_CASE_TABLE 00223 const UChar OnigEncAsciiToUpperCaseTable[256] = { 00224 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 00225 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 00226 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 00227 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 00228 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 00229 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 00230 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 00231 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 00232 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', 00233 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', 00234 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', 00235 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', 00236 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', 00237 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', 00238 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', 00239 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', 00240 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 00241 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 00242 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 00243 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 00244 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 00245 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 00246 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 00247 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 00248 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 00249 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 00250 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 00251 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 00252 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 00253 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 00254 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 00255 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', 00256 }; 00257 #endif 00258 00259 const unsigned short OnigEncAsciiCtypeTable[256] = { 00260 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 00261 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 00262 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 00263 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 00264 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 00265 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 00266 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 00267 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 00268 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 00269 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 00270 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 00271 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 00272 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 00273 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 00274 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 00275 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 00276 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00279 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00280 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00285 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00286 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00287 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00288 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00289 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00290 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00291 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 00292 }; 00293 00294 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { 00295 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 00296 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 00297 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 00298 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 00299 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 00300 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 00301 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 00302 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 00303 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 00304 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 00305 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 00306 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 00307 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 00308 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 00309 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 00310 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 00311 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 00312 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 00313 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 00314 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 00315 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 00316 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 00317 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 00318 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 00319 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 00320 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 00321 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', 00322 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', 00323 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 00324 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 00325 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 00326 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' 00327 }; 00328 00329 #ifdef USE_UPPER_CASE_TABLE 00330 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { 00331 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 00332 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 00333 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 00334 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 00335 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 00336 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 00337 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 00338 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 00339 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', 00340 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', 00341 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', 00342 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', 00343 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', 00344 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', 00345 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', 00346 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', 00347 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 00348 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 00349 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 00350 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 00351 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 00352 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 00353 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 00354 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 00355 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 00356 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 00357 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 00358 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 00359 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 00360 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 00361 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', 00362 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', 00363 }; 00364 #endif 00365 00366 extern void 00367 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) 00368 { 00369 /* nothing */ 00370 /* obsoleted. */ 00371 } 00372 00373 extern UChar* 00374 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 00375 { 00376 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 00377 } 00378 00379 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { 00380 { 0x41, 0x61 }, 00381 { 0x42, 0x62 }, 00382 { 0x43, 0x63 }, 00383 { 0x44, 0x64 }, 00384 { 0x45, 0x65 }, 00385 { 0x46, 0x66 }, 00386 { 0x47, 0x67 }, 00387 { 0x48, 0x68 }, 00388 { 0x49, 0x69 }, 00389 { 0x4a, 0x6a }, 00390 { 0x4b, 0x6b }, 00391 { 0x4c, 0x6c }, 00392 { 0x4d, 0x6d }, 00393 { 0x4e, 0x6e }, 00394 { 0x4f, 0x6f }, 00395 { 0x50, 0x70 }, 00396 { 0x51, 0x71 }, 00397 { 0x52, 0x72 }, 00398 { 0x53, 0x73 }, 00399 { 0x54, 0x74 }, 00400 { 0x55, 0x75 }, 00401 { 0x56, 0x76 }, 00402 { 0x57, 0x77 }, 00403 { 0x58, 0x78 }, 00404 { 0x59, 0x79 }, 00405 { 0x5a, 0x7a } 00406 }; 00407 00408 extern int 00409 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, 00410 OnigApplyAllCaseFoldFunc f, void* arg, 00411 OnigEncoding enc ARG_UNUSED) 00412 { 00413 OnigCodePoint code; 00414 int i, r; 00415 00416 for (i = 0; 00417 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); 00418 i++) { 00419 code = OnigAsciiLowerMap[i].to; 00420 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); 00421 if (r != 0) return r; 00422 00423 code = OnigAsciiLowerMap[i].from; 00424 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); 00425 if (r != 0) return r; 00426 } 00427 00428 return 0; 00429 } 00430 00431 extern int 00432 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, 00433 const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[], 00434 OnigEncoding enc ARG_UNUSED) 00435 { 00436 if (0x41 <= *p && *p <= 0x5a) { 00437 items[0].byte_len = 1; 00438 items[0].code_len = 1; 00439 items[0].code[0] = (OnigCodePoint )(*p + 0x20); 00440 return 1; 00441 } 00442 else if (0x61 <= *p && *p <= 0x7a) { 00443 items[0].byte_len = 1; 00444 items[0].code_len = 1; 00445 items[0].code[0] = (OnigCodePoint )(*p - 0x20); 00446 return 1; 00447 } 00448 else 00449 return 0; 00450 } 00451 00452 static int 00453 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, 00454 OnigApplyAllCaseFoldFunc f, void* arg) 00455 { 00456 OnigCodePoint ss[] = { 0x73, 0x73 }; 00457 00458 return (*f)((OnigCodePoint )0xdf, ss, 2, arg); 00459 } 00460 00461 extern int 00462 onigenc_apply_all_case_fold_with_map(int map_size, 00463 const OnigPairCaseFoldCodes map[], 00464 int ess_tsett_flag, OnigCaseFoldType flag, 00465 OnigApplyAllCaseFoldFunc f, void* arg) 00466 { 00467 OnigCodePoint code; 00468 int i, r; 00469 00470 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0); 00471 if (r != 0) return r; 00472 00473 for (i = 0; i < map_size; i++) { 00474 code = map[i].to; 00475 r = (*f)(map[i].from, &code, 1, arg); 00476 if (r != 0) return r; 00477 00478 code = map[i].from; 00479 r = (*f)(map[i].to, &code, 1, arg); 00480 if (r != 0) return r; 00481 } 00482 00483 if (ess_tsett_flag != 0) 00484 return ss_apply_all_case_fold(flag, f, arg); 00485 00486 return 0; 00487 } 00488 00489 extern int 00490 onigenc_get_case_fold_codes_by_str_with_map(int map_size, 00491 const OnigPairCaseFoldCodes map[], 00492 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, 00493 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 00494 { 00495 if (0x41 <= *p && *p <= 0x5a) { 00496 items[0].byte_len = 1; 00497 items[0].code_len = 1; 00498 items[0].code[0] = (OnigCodePoint )(*p + 0x20); 00499 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 00500 && (*(p+1) == 0x53 || *(p+1) == 0x73)) { 00501 /* SS */ 00502 items[1].byte_len = 2; 00503 items[1].code_len = 1; 00504 items[1].code[0] = (OnigCodePoint )0xdf; 00505 return 2; 00506 } 00507 else 00508 return 1; 00509 } 00510 else if (0x61 <= *p && *p <= 0x7a) { 00511 items[0].byte_len = 1; 00512 items[0].code_len = 1; 00513 items[0].code[0] = (OnigCodePoint )(*p - 0x20); 00514 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 00515 && (*(p+1) == 0x73 || *(p+1) == 0x53)) { 00516 /* ss */ 00517 items[1].byte_len = 2; 00518 items[1].code_len = 1; 00519 items[1].code[0] = (OnigCodePoint )0xdf; 00520 return 2; 00521 } 00522 else 00523 return 1; 00524 } 00525 else if (*p == 0xdf && ess_tsett_flag != 0) { 00526 items[0].byte_len = 1; 00527 items[0].code_len = 2; 00528 items[0].code[0] = (OnigCodePoint )'s'; 00529 items[0].code[1] = (OnigCodePoint )'s'; 00530 00531 items[1].byte_len = 1; 00532 items[1].code_len = 2; 00533 items[1].code[0] = (OnigCodePoint )'S'; 00534 items[1].code[1] = (OnigCodePoint )'S'; 00535 00536 items[2].byte_len = 1; 00537 items[2].code_len = 2; 00538 items[2].code[0] = (OnigCodePoint )'s'; 00539 items[2].code[1] = (OnigCodePoint )'S'; 00540 00541 items[3].byte_len = 1; 00542 items[3].code_len = 2; 00543 items[3].code[0] = (OnigCodePoint )'S'; 00544 items[3].code[1] = (OnigCodePoint )'s'; 00545 00546 return 4; 00547 } 00548 else { 00549 int i; 00550 00551 for (i = 0; i < map_size; i++) { 00552 if (*p == map[i].from) { 00553 items[0].byte_len = 1; 00554 items[0].code_len = 1; 00555 items[0].code[0] = map[i].to; 00556 return 1; 00557 } 00558 else if (*p == map[i].to) { 00559 items[0].byte_len = 1; 00560 items[0].code_len = 1; 00561 items[0].code[0] = map[i].from; 00562 return 1; 00563 } 00564 } 00565 } 00566 00567 return 0; 00568 } 00569 00570 00571 extern int 00572 onigenc_not_support_get_ctype_code_range(OnigCtype ctype, 00573 OnigCodePoint* sb_out, const OnigCodePoint* ranges[], 00574 OnigEncoding enc) 00575 { 00576 return ONIG_NO_SUPPORT_CONFIG; 00577 } 00578 00579 extern int 00580 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED) 00581 { 00582 if (p < end) { 00583 if (*p == 0x0a) return 1; 00584 } 00585 return 0; 00586 } 00587 00588 /* for single byte encodings */ 00589 extern int 00590 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, 00591 const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED) 00592 { 00593 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); 00594 00595 (*p)++; 00596 return 1; /* return byte length of converted char to lower */ 00597 } 00598 00599 #if 0 00600 extern int 00601 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED, 00602 const UChar** pp, const UChar* end ARG_UNUSED) 00603 { 00604 const UChar* p = *pp; 00605 00606 (*pp)++; 00607 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); 00608 } 00609 #endif 00610 00611 extern int 00612 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED, 00613 OnigEncoding enc ARG_UNUSED) 00614 { 00615 return 1; 00616 } 00617 00618 extern OnigCodePoint 00619 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, 00620 OnigEncoding enc ARG_UNUSED) 00621 { 00622 return (OnigCodePoint )(*p); 00623 } 00624 00625 extern int 00626 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) 00627 { 00628 return 1; 00629 } 00630 00631 extern int 00632 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) 00633 { 00634 if (code > 0xff) 00635 rb_raise(rb_eRangeError, "%u out of char range", code); 00636 *buf = (UChar )(code & 0xff); 00637 return 1; 00638 } 00639 00640 extern UChar* 00641 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s, 00642 const UChar* end, 00643 OnigEncoding enc ARG_UNUSED) 00644 { 00645 return (UChar* )s; 00646 } 00647 00648 extern int 00649 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, 00650 OnigEncoding enc ARG_UNUSED) 00651 { 00652 return TRUE; 00653 } 00654 00655 extern int 00656 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, 00657 OnigEncoding enc ARG_UNUSED) 00658 { 00659 return FALSE; 00660 } 00661 00662 extern int 00663 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, 00664 OnigEncoding enc ARG_UNUSED) 00665 { 00666 if (code < 128) 00667 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 00668 else 00669 return FALSE; 00670 } 00671 00672 extern OnigCodePoint 00673 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) 00674 { 00675 int c, i, len; 00676 OnigCodePoint n; 00677 00678 len = enclen(enc, p, end); 00679 n = (OnigCodePoint )(*p++); 00680 if (len == 1) return n; 00681 00682 for (i = 1; i < len; i++) { 00683 if (p >= end) break; 00684 c = *p++; 00685 n <<= 8; n += c; 00686 } 00687 return n; 00688 } 00689 00690 extern int 00691 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, 00692 const UChar** pp, const UChar* end ARG_UNUSED, 00693 UChar* lower) 00694 { 00695 int len; 00696 const UChar *p = *pp; 00697 00698 if (ONIGENC_IS_MBC_ASCII(p)) { 00699 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 00700 (*pp)++; 00701 return 1; 00702 } 00703 else { 00704 int i; 00705 00706 len = enclen(enc, p, end); 00707 for (i = 0; i < len; i++) { 00708 *lower++ = *p++; 00709 } 00710 (*pp) += len; 00711 return len; /* return byte length of converted to lower char */ 00712 } 00713 } 00714 00715 #if 0 00716 extern int 00717 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, 00718 const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED) 00719 { 00720 const UChar* p = *pp; 00721 00722 if (ONIGENC_IS_MBC_ASCII(p)) { 00723 (*pp)++; 00724 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); 00725 } 00726 00727 (*pp) += enclen(enc, p); 00728 return FALSE; 00729 } 00730 #endif 00731 00732 extern int 00733 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) 00734 { 00735 if (code <= 0xff) return 1; 00736 if (code <= 0xffff) return 2; 00737 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 00738 } 00739 00740 extern int 00741 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) 00742 { 00743 if ((code & 0xff000000) != 0) return 4; 00744 else if ((code & 0xff0000) != 0) return 3; 00745 else if ((code & 0xff00) != 0) return 2; 00746 else return 1; 00747 } 00748 00749 extern int 00750 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) 00751 { 00752 UChar *p = buf; 00753 00754 if ((code & 0xff00) != 0) { 00755 *p++ = (UChar )((code >> 8) & 0xff); 00756 } 00757 *p++ = (UChar )(code & 0xff); 00758 00759 #if 1 00760 if (enclen(enc, buf, p) != (p - buf)) 00761 return ONIGERR_INVALID_CODE_POINT_VALUE; 00762 #endif 00763 return (int)(p - buf); 00764 } 00765 00766 extern int 00767 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) 00768 { 00769 UChar *p = buf; 00770 00771 if ((code & 0xff000000) != 0) { 00772 *p++ = (UChar )((code >> 24) & 0xff); 00773 } 00774 if ((code & 0xff0000) != 0 || p != buf) { 00775 *p++ = (UChar )((code >> 16) & 0xff); 00776 } 00777 if ((code & 0xff00) != 0 || p != buf) { 00778 *p++ = (UChar )((code >> 8) & 0xff); 00779 } 00780 *p++ = (UChar )(code & 0xff); 00781 00782 #if 1 00783 if (enclen(enc, buf, p) != (p - buf)) 00784 return ONIGERR_INVALID_CODE_POINT_VALUE; 00785 #endif 00786 return (int)(p - buf); 00787 } 00788 00789 extern int 00790 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) 00791 { 00792 static const PosixBracketEntryType PBS[] = { 00793 PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM), 00794 PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA), 00795 PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK), 00796 PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL), 00797 PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT), 00798 PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH), 00799 PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER), 00800 PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT), 00801 PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT), 00802 PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE), 00803 PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER), 00804 PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT), 00805 PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII), 00806 PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD), 00807 }; 00808 00809 const PosixBracketEntryType *pb, *pbe; 00810 int len; 00811 00812 len = onigenc_strlen(enc, p, end); 00813 for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) { 00814 if (len == pb->len && 00815 STRNCASECMP((char *)p, (char *)pb->name, len) == 0) 00816 return pb->ctype; 00817 } 00818 00819 return ONIGERR_INVALID_CHAR_PROPERTY_NAME; 00820 } 00821 00822 extern int 00823 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, 00824 unsigned int ctype) 00825 { 00826 if (code < 128) 00827 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 00828 else { 00829 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 00830 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); 00831 } 00832 } 00833 00834 return FALSE; 00835 } 00836 00837 extern int 00838 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, 00839 unsigned int ctype) 00840 { 00841 if (code < 128) 00842 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 00843 else { 00844 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 00845 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); 00846 } 00847 } 00848 00849 return FALSE; 00850 } 00851 00852 extern int 00853 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, 00854 const UChar* sascii /* ascii */, int n) 00855 { 00856 int x, c; 00857 00858 while (n-- > 0) { 00859 if (p >= end) return (int )(*sascii); 00860 00861 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); 00862 x = *sascii - c; 00863 if (x) return x; 00864 00865 sascii++; 00866 p += enclen(enc, p, end); 00867 } 00868 return 0; 00869 } 00870 00871 /* Property management */ 00872 static int 00873 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) 00874 { 00875 size_t size; 00876 const OnigCodePoint **list = *plist; 00877 00878 size = sizeof(OnigCodePoint*) * new_size; 00879 if (IS_NULL(list)) { 00880 list = (const OnigCodePoint** )xmalloc(size); 00881 } 00882 else { 00883 list = (const OnigCodePoint** )xrealloc((void* )list, size); 00884 } 00885 00886 if (IS_NULL(list)) return ONIGERR_MEMORY; 00887 00888 *plist = list; 00889 *psize = new_size; 00890 00891 return 0; 00892 } 00893 00894 extern int 00895 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, 00896 hash_table_type **table, const OnigCodePoint*** plist, int *pnum, 00897 int *psize) 00898 { 00899 #define PROP_INIT_SIZE 16 00900 00901 int r; 00902 00903 if (*psize <= *pnum) { 00904 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); 00905 r = resize_property_list(new_size, plist, psize); 00906 if (r != 0) return r; 00907 } 00908 00909 (*plist)[*pnum] = prop; 00910 00911 if (ONIG_IS_NULL(*table)) { 00912 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); 00913 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; 00914 } 00915 00916 *pnum = *pnum + 1; 00917 onig_st_insert_strend(*table, name, name + strlen((char* )name), 00918 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); 00919 return 0; 00920 } 00921 00922 extern int 00923 onigenc_property_list_init(int (*f)(void)) 00924 { 00925 int r; 00926 00927 THREAD_ATOMIC_START; 00928 00929 r = f(); 00930 00931 THREAD_ATOMIC_END; 00932 return r; 00933 } 00934