1use std::error;
2use std::fmt;
3use std::result;
4
5use hir;
6
7pub type Result<T> = result::Result<T, Error>;
9
10type Range = &'static [(char, char)];
13
14#[derive(Debug)]
19pub enum Error {
20 PropertyNotFound,
21 PropertyValueNotFound,
22 #[allow(dead_code)]
24 PerlClassNotFound,
25}
26
27pub type FoldResult<T> = result::Result<T, CaseFoldError>;
29
30#[derive(Debug)]
36pub struct CaseFoldError(());
37
38impl error::Error for CaseFoldError {}
39
40impl fmt::Display for CaseFoldError {
41 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
42 write!(
43 f,
44 "Unicode-aware case folding is not available \
45 (probably because the unicode-case feature is not enabled)"
46 )
47 }
48}
49
50#[derive(Debug)]
56pub struct UnicodeWordError(());
57
58impl error::Error for UnicodeWordError {}
59
60impl fmt::Display for UnicodeWordError {
61 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62 write!(
63 f,
64 "Unicode-aware \\w class is not available \
65 (probably because the unicode-perl feature is not enabled)"
66 )
67 }
68}
69
70pub fn simple_fold(
82 c: char,
83) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>> {
84 #[cfg(not(feature = "unicode-case"))]
85 fn imp(
86 _: char,
87 ) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
88 {
89 use std::option::IntoIter;
90 Err::<result::Result<IntoIter<char>, _>, _>(CaseFoldError(()))
91 }
92
93 #[cfg(feature = "unicode-case")]
94 fn imp(
95 c: char,
96 ) -> FoldResult<result::Result<impl Iterator<Item = char>, Option<char>>>
97 {
98 use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
99
100 Ok(CASE_FOLDING_SIMPLE
101 .binary_search_by_key(&c, |&(c1, _)| c1)
102 .map(|i| CASE_FOLDING_SIMPLE[i].1.iter().map(|&c| c))
103 .map_err(|i| {
104 if i >= CASE_FOLDING_SIMPLE.len() {
105 None
106 } else {
107 Some(CASE_FOLDING_SIMPLE[i].0)
108 }
109 }))
110 }
111
112 imp(c)
113}
114
115pub fn contains_simple_case_mapping(
123 start: char,
124 end: char,
125) -> FoldResult<bool> {
126 #[cfg(not(feature = "unicode-case"))]
127 fn imp(_: char, _: char) -> FoldResult<bool> {
128 Err(CaseFoldError(()))
129 }
130
131 #[cfg(feature = "unicode-case")]
132 fn imp(start: char, end: char) -> FoldResult<bool> {
133 use std::cmp::Ordering;
134 use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
135
136 assert!(start <= end);
137 Ok(CASE_FOLDING_SIMPLE
138 .binary_search_by(|&(c, _)| {
139 if start <= c && c <= end {
140 Ordering::Equal
141 } else if c > end {
142 Ordering::Greater
143 } else {
144 Ordering::Less
145 }
146 })
147 .is_ok())
148 }
149
150 imp(start, end)
151}
152
153#[derive(Debug)]
166pub enum ClassQuery<'a> {
167 OneLetter(char),
170 Binary(&'a str),
176 ByValue {
180 property_name: &'a str,
182 property_value: &'a str,
184 },
185}
186
187impl<'a> ClassQuery<'a> {
188 fn canonicalize(&self) -> Result<CanonicalClassQuery> {
189 match *self {
190 ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
191 ClassQuery::Binary(name) => self.canonical_binary(name),
192 ClassQuery::ByValue { property_name, property_value } => {
193 let property_name = symbolic_name_normalize(property_name);
194 let property_value = symbolic_name_normalize(property_value);
195
196 let canon_name = match canonical_prop(&property_name)? {
197 None => return Err(Error::PropertyNotFound),
198 Some(canon_name) => canon_name,
199 };
200 Ok(match canon_name {
201 "General_Category" => {
202 let canon = match canonical_gencat(&property_value)? {
203 None => return Err(Error::PropertyValueNotFound),
204 Some(canon) => canon,
205 };
206 CanonicalClassQuery::GeneralCategory(canon)
207 }
208 "Script" => {
209 let canon = match canonical_script(&property_value)? {
210 None => return Err(Error::PropertyValueNotFound),
211 Some(canon) => canon,
212 };
213 CanonicalClassQuery::Script(canon)
214 }
215 _ => {
216 let vals = match property_values(canon_name)? {
217 None => return Err(Error::PropertyValueNotFound),
218 Some(vals) => vals,
219 };
220 let canon_val =
221 match canonical_value(vals, &property_value) {
222 None => {
223 return Err(Error::PropertyValueNotFound)
224 }
225 Some(canon_val) => canon_val,
226 };
227 CanonicalClassQuery::ByValue {
228 property_name: canon_name,
229 property_value: canon_val,
230 }
231 }
232 })
233 }
234 }
235 }
236
237 fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
238 let norm = symbolic_name_normalize(name);
239
240 if let Some(canon) = canonical_prop(&norm)? {
241 return Ok(CanonicalClassQuery::Binary(canon));
242 }
243 if let Some(canon) = canonical_gencat(&norm)? {
244 return Ok(CanonicalClassQuery::GeneralCategory(canon));
245 }
246 if let Some(canon) = canonical_script(&norm)? {
247 return Ok(CanonicalClassQuery::Script(canon));
248 }
249 Err(Error::PropertyNotFound)
250 }
251}
252
253#[derive(Debug, Eq, PartialEq)]
257enum CanonicalClassQuery {
258 Binary(&'static str),
260 GeneralCategory(&'static str),
262 Script(&'static str),
264 ByValue {
271 property_name: &'static str,
273 property_value: &'static str,
275 },
276}
277
278pub fn class<'a>(query: ClassQuery<'a>) -> Result<hir::ClassUnicode> {
281 use self::CanonicalClassQuery::*;
282
283 match query.canonicalize()? {
284 Binary(name) => bool_property(name),
285 GeneralCategory(name) => gencat(name),
286 Script(name) => script(name),
287 ByValue { property_name: "Age", property_value } => {
288 let mut class = hir::ClassUnicode::empty();
289 for set in ages(property_value)? {
290 class.union(&hir_class(set));
291 }
292 Ok(class)
293 }
294 ByValue { property_name: "Script_Extensions", property_value } => {
295 script_extension(property_value)
296 }
297 ByValue {
298 property_name: "Grapheme_Cluster_Break",
299 property_value,
300 } => gcb(property_value),
301 ByValue { property_name: "Sentence_Break", property_value } => {
302 sb(property_value)
303 }
304 ByValue { property_name: "Word_Break", property_value } => {
305 wb(property_value)
306 }
307 _ => {
308 Err(Error::PropertyNotFound)
310 }
311 }
312}
313
314pub fn perl_word() -> Result<hir::ClassUnicode> {
318 #[cfg(not(feature = "unicode-perl"))]
319 fn imp() -> Result<hir::ClassUnicode> {
320 Err(Error::PerlClassNotFound)
321 }
322
323 #[cfg(feature = "unicode-perl")]
324 fn imp() -> Result<hir::ClassUnicode> {
325 use unicode_tables::perl_word::PERL_WORD;
326 Ok(hir_class(PERL_WORD))
327 }
328
329 imp()
330}
331
332pub fn perl_space() -> Result<hir::ClassUnicode> {
336 #[cfg(not(any(feature = "unicode-perl", feature = "unicode-bool")))]
337 fn imp() -> Result<hir::ClassUnicode> {
338 Err(Error::PerlClassNotFound)
339 }
340
341 #[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
342 fn imp() -> Result<hir::ClassUnicode> {
343 use unicode_tables::perl_space::WHITE_SPACE;
344 Ok(hir_class(WHITE_SPACE))
345 }
346
347 #[cfg(feature = "unicode-bool")]
348 fn imp() -> Result<hir::ClassUnicode> {
349 use unicode_tables::property_bool::WHITE_SPACE;
350 Ok(hir_class(WHITE_SPACE))
351 }
352
353 imp()
354}
355
356pub fn perl_digit() -> Result<hir::ClassUnicode> {
360 #[cfg(not(any(feature = "unicode-perl", feature = "unicode-gencat")))]
361 fn imp() -> Result<hir::ClassUnicode> {
362 Err(Error::PerlClassNotFound)
363 }
364
365 #[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
366 fn imp() -> Result<hir::ClassUnicode> {
367 use unicode_tables::perl_decimal::DECIMAL_NUMBER;
368 Ok(hir_class(DECIMAL_NUMBER))
369 }
370
371 #[cfg(feature = "unicode-gencat")]
372 fn imp() -> Result<hir::ClassUnicode> {
373 use unicode_tables::general_category::DECIMAL_NUMBER;
374 Ok(hir_class(DECIMAL_NUMBER))
375 }
376
377 imp()
378}
379
380pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
382 let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
383 .iter()
384 .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
385 .collect();
386 hir::ClassUnicode::new(hir_ranges)
387}
388
389pub fn is_word_character(c: char) -> result::Result<bool, UnicodeWordError> {
393 #[cfg(not(feature = "unicode-perl"))]
394 fn imp(_: char) -> result::Result<bool, UnicodeWordError> {
395 Err(UnicodeWordError(()))
396 }
397
398 #[cfg(feature = "unicode-perl")]
399 fn imp(c: char) -> result::Result<bool, UnicodeWordError> {
400 use is_word_byte;
401 use std::cmp::Ordering;
402 use unicode_tables::perl_word::PERL_WORD;
403
404 if c <= 0x7F as char && is_word_byte(c as u8) {
405 return Ok(true);
406 }
407 Ok(PERL_WORD
408 .binary_search_by(|&(start, end)| {
409 if start <= c && c <= end {
410 Ordering::Equal
411 } else if start > c {
412 Ordering::Greater
413 } else {
414 Ordering::Less
415 }
416 })
417 .is_ok())
418 }
419
420 imp(c)
421}
422
423type PropertyValues = &'static [(&'static str, &'static str)];
429
430fn canonical_gencat(normalized_value: &str) -> Result<Option<&'static str>> {
431 Ok(match normalized_value {
432 "any" => Some("Any"),
433 "assigned" => Some("Assigned"),
434 "ascii" => Some("ASCII"),
435 _ => {
436 let gencats = property_values("General_Category")?.unwrap();
437 canonical_value(gencats, normalized_value)
438 }
439 })
440}
441
442fn canonical_script(normalized_value: &str) -> Result<Option<&'static str>> {
443 let scripts = property_values("Script")?.unwrap();
444 Ok(canonical_value(scripts, normalized_value))
445}
446
447fn canonical_prop(normalized_name: &str) -> Result<Option<&'static str>> {
456 #[cfg(not(any(
457 feature = "unicode-age",
458 feature = "unicode-bool",
459 feature = "unicode-gencat",
460 feature = "unicode-perl",
461 feature = "unicode-script",
462 feature = "unicode-segment",
463 )))]
464 fn imp(_: &str) -> Result<Option<&'static str>> {
465 Err(Error::PropertyNotFound)
466 }
467
468 #[cfg(any(
469 feature = "unicode-age",
470 feature = "unicode-bool",
471 feature = "unicode-gencat",
472 feature = "unicode-perl",
473 feature = "unicode-script",
474 feature = "unicode-segment",
475 ))]
476 fn imp(name: &str) -> Result<Option<&'static str>> {
477 use unicode_tables::property_names::PROPERTY_NAMES;
478
479 Ok(PROPERTY_NAMES
480 .binary_search_by_key(&name, |&(n, _)| n)
481 .ok()
482 .map(|i| PROPERTY_NAMES[i].1))
483 }
484
485 imp(normalized_name)
486}
487
488fn canonical_value(
499 vals: PropertyValues,
500 normalized_value: &str,
501) -> Option<&'static str> {
502 vals.binary_search_by_key(&normalized_value, |&(n, _)| n)
503 .ok()
504 .map(|i| vals[i].1)
505}
506
507fn property_values(
511 canonical_property_name: &'static str,
512) -> Result<Option<PropertyValues>> {
513 #[cfg(not(any(
514 feature = "unicode-age",
515 feature = "unicode-bool",
516 feature = "unicode-gencat",
517 feature = "unicode-perl",
518 feature = "unicode-script",
519 feature = "unicode-segment",
520 )))]
521 fn imp(_: &'static str) -> Result<Option<PropertyValues>> {
522 Err(Error::PropertyValueNotFound)
523 }
524
525 #[cfg(any(
526 feature = "unicode-age",
527 feature = "unicode-bool",
528 feature = "unicode-gencat",
529 feature = "unicode-perl",
530 feature = "unicode-script",
531 feature = "unicode-segment",
532 ))]
533 fn imp(name: &'static str) -> Result<Option<PropertyValues>> {
534 use unicode_tables::property_values::PROPERTY_VALUES;
535
536 Ok(PROPERTY_VALUES
537 .binary_search_by_key(&name, |&(n, _)| n)
538 .ok()
539 .map(|i| PROPERTY_VALUES[i].1))
540 }
541
542 imp(canonical_property_name)
543}
544
545#[allow(dead_code)]
548fn property_set(
549 name_map: &'static [(&'static str, Range)],
550 canonical: &'static str,
551) -> Option<Range> {
552 name_map
553 .binary_search_by_key(&canonical, |x| x.0)
554 .ok()
555 .map(|i| name_map[i].1)
556}
557
558fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
565 #[cfg(not(feature = "unicode-age"))]
566 fn imp(_: &str) -> Result<impl Iterator<Item = Range>> {
567 use std::option::IntoIter;
568 Err::<IntoIter<Range>, _>(Error::PropertyNotFound)
569 }
570
571 #[cfg(feature = "unicode-age")]
572 fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
573 use unicode_tables::age;
574
575 const AGES: &'static [(&'static str, Range)] = &[
576 ("V1_1", age::V1_1),
577 ("V2_0", age::V2_0),
578 ("V2_1", age::V2_1),
579 ("V3_0", age::V3_0),
580 ("V3_1", age::V3_1),
581 ("V3_2", age::V3_2),
582 ("V4_0", age::V4_0),
583 ("V4_1", age::V4_1),
584 ("V5_0", age::V5_0),
585 ("V5_1", age::V5_1),
586 ("V5_2", age::V5_2),
587 ("V6_0", age::V6_0),
588 ("V6_1", age::V6_1),
589 ("V6_2", age::V6_2),
590 ("V6_3", age::V6_3),
591 ("V7_0", age::V7_0),
592 ("V8_0", age::V8_0),
593 ("V9_0", age::V9_0),
594 ("V10_0", age::V10_0),
595 ("V11_0", age::V11_0),
596 ("V12_0", age::V12_0),
597 ("V12_1", age::V12_1),
598 ];
599 assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
600
601 let pos = AGES.iter().position(|&(age, _)| canonical_age == age);
602 match pos {
603 None => Err(Error::PropertyValueNotFound),
604 Some(i) => Ok(AGES[..i + 1].iter().map(|&(_, classes)| classes)),
605 }
606 }
607
608 imp(canonical_age)
609}
610
611fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
618 #[cfg(not(feature = "unicode-gencat"))]
619 fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
620 Err(Error::PropertyNotFound)
621 }
622
623 #[cfg(feature = "unicode-gencat")]
624 fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
625 use unicode_tables::general_category::BY_NAME;
626 match name {
627 "ASCII" => Ok(hir_class(&[('\0', '\x7F')])),
628 "Any" => Ok(hir_class(&[('\0', '\u{10FFFF}')])),
629 "Assigned" => {
630 let mut cls = gencat("Unassigned")?;
631 cls.negate();
632 Ok(cls)
633 }
634 name => property_set(BY_NAME, name)
635 .map(hir_class)
636 .ok_or(Error::PropertyValueNotFound),
637 }
638 }
639
640 match canonical_name {
641 "Decimal_Number" => perl_digit(),
642 name => imp(name),
643 }
644}
645
646fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
653 #[cfg(not(feature = "unicode-script"))]
654 fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
655 Err(Error::PropertyNotFound)
656 }
657
658 #[cfg(feature = "unicode-script")]
659 fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
660 use unicode_tables::script::BY_NAME;
661 property_set(BY_NAME, name)
662 .map(hir_class)
663 .ok_or(Error::PropertyValueNotFound)
664 }
665
666 imp(canonical_name)
667}
668
669fn script_extension(
676 canonical_name: &'static str,
677) -> Result<hir::ClassUnicode> {
678 #[cfg(not(feature = "unicode-script"))]
679 fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
680 Err(Error::PropertyNotFound)
681 }
682
683 #[cfg(feature = "unicode-script")]
684 fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
685 use unicode_tables::script_extension::BY_NAME;
686 property_set(BY_NAME, name)
687 .map(hir_class)
688 .ok_or(Error::PropertyValueNotFound)
689 }
690
691 imp(canonical_name)
692}
693
694fn bool_property(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
702 #[cfg(not(feature = "unicode-bool"))]
703 fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
704 Err(Error::PropertyNotFound)
705 }
706
707 #[cfg(feature = "unicode-bool")]
708 fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
709 use unicode_tables::property_bool::BY_NAME;
710 property_set(BY_NAME, name)
711 .map(hir_class)
712 .ok_or(Error::PropertyNotFound)
713 }
714
715 match canonical_name {
716 "Decimal_Number" => perl_digit(),
717 "White_Space" => perl_space(),
718 name => imp(name),
719 }
720}
721
722fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
730 #[cfg(not(feature = "unicode-segment"))]
731 fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
732 Err(Error::PropertyNotFound)
733 }
734
735 #[cfg(feature = "unicode-segment")]
736 fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
737 use unicode_tables::grapheme_cluster_break::BY_NAME;
738 property_set(BY_NAME, name)
739 .map(hir_class)
740 .ok_or(Error::PropertyValueNotFound)
741 }
742
743 imp(canonical_name)
744}
745
746fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
754 #[cfg(not(feature = "unicode-segment"))]
755 fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
756 Err(Error::PropertyNotFound)
757 }
758
759 #[cfg(feature = "unicode-segment")]
760 fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
761 use unicode_tables::word_break::BY_NAME;
762 property_set(BY_NAME, name)
763 .map(hir_class)
764 .ok_or(Error::PropertyValueNotFound)
765 }
766
767 imp(canonical_name)
768}
769
770fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode> {
778 #[cfg(not(feature = "unicode-segment"))]
779 fn imp(_: &'static str) -> Result<hir::ClassUnicode> {
780 Err(Error::PropertyNotFound)
781 }
782
783 #[cfg(feature = "unicode-segment")]
784 fn imp(name: &'static str) -> Result<hir::ClassUnicode> {
785 use unicode_tables::sentence_break::BY_NAME;
786 property_set(BY_NAME, name)
787 .map(hir_class)
788 .ok_or(Error::PropertyValueNotFound)
789 }
790
791 imp(canonical_name)
792}
793
794fn symbolic_name_normalize(x: &str) -> String {
796 let mut tmp = x.as_bytes().to_vec();
797 let len = symbolic_name_normalize_bytes(&mut tmp).len();
798 tmp.truncate(len);
799 String::from_utf8(tmp).unwrap()
806}
807
808fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
819 let mut start = 0;
823 let mut starts_with_is = false;
824 if slice.len() >= 2 {
825 starts_with_is = slice[0..2] == b"is"[..]
827 || slice[0..2] == b"IS"[..]
828 || slice[0..2] == b"iS"[..]
829 || slice[0..2] == b"Is"[..];
830 if starts_with_is {
831 start = 2;
832 }
833 }
834 let mut next_write = 0;
835 for i in start..slice.len() {
836 let b = slice[i];
840 if b == b' ' || b == b'_' || b == b'-' {
841 continue;
842 } else if b'A' <= b && b <= b'Z' {
843 slice[next_write] = b + (b'a' - b'A');
844 next_write += 1;
845 } else if b <= 0x7F {
846 slice[next_write] = b;
847 next_write += 1;
848 }
849 }
850 if starts_with_is && next_write == 1 && slice[0] == b'c' {
855 slice[0] = b'i';
856 slice[1] = b's';
857 slice[2] = b'c';
858 next_write = 3;
859 }
860 &mut slice[..next_write]
861}
862
863#[cfg(test)]
864mod tests {
865 use super::{
866 contains_simple_case_mapping, simple_fold, symbolic_name_normalize,
867 symbolic_name_normalize_bytes,
868 };
869
870 #[cfg(feature = "unicode-case")]
871 fn simple_fold_ok(c: char) -> impl Iterator<Item = char> {
872 simple_fold(c).unwrap().unwrap()
873 }
874
875 #[cfg(feature = "unicode-case")]
876 fn simple_fold_err(c: char) -> Option<char> {
877 match simple_fold(c).unwrap() {
878 Ok(_) => unreachable!("simple_fold returned Ok iterator"),
879 Err(next) => next,
880 }
881 }
882
883 #[cfg(feature = "unicode-case")]
884 fn contains_case_map(start: char, end: char) -> bool {
885 contains_simple_case_mapping(start, end).unwrap()
886 }
887
888 #[test]
889 #[cfg(feature = "unicode-case")]
890 fn simple_fold_k() {
891 let xs: Vec<char> = simple_fold_ok('k').collect();
892 assert_eq!(xs, vec!['K', 'K']);
893
894 let xs: Vec<char> = simple_fold_ok('K').collect();
895 assert_eq!(xs, vec!['k', 'K']);
896
897 let xs: Vec<char> = simple_fold_ok('K').collect();
898 assert_eq!(xs, vec!['K', 'k']);
899 }
900
901 #[test]
902 #[cfg(feature = "unicode-case")]
903 fn simple_fold_a() {
904 let xs: Vec<char> = simple_fold_ok('a').collect();
905 assert_eq!(xs, vec!['A']);
906
907 let xs: Vec<char> = simple_fold_ok('A').collect();
908 assert_eq!(xs, vec!['a']);
909 }
910
911 #[test]
912 #[cfg(feature = "unicode-case")]
913 fn simple_fold_empty() {
914 assert_eq!(Some('A'), simple_fold_err('?'));
915 assert_eq!(Some('A'), simple_fold_err('@'));
916 assert_eq!(Some('a'), simple_fold_err('['));
917 assert_eq!(Some('Ⰰ'), simple_fold_err('☃'));
918 }
919
920 #[test]
921 #[cfg(feature = "unicode-case")]
922 fn simple_fold_max() {
923 assert_eq!(None, simple_fold_err('\u{10FFFE}'));
924 assert_eq!(None, simple_fold_err('\u{10FFFF}'));
925 }
926
927 #[test]
928 #[cfg(not(feature = "unicode-case"))]
929 fn simple_fold_disabled() {
930 assert!(simple_fold('a').is_err());
931 }
932
933 #[test]
934 #[cfg(feature = "unicode-case")]
935 fn range_contains() {
936 assert!(contains_case_map('A', 'A'));
937 assert!(contains_case_map('Z', 'Z'));
938 assert!(contains_case_map('A', 'Z'));
939 assert!(contains_case_map('@', 'A'));
940 assert!(contains_case_map('Z', '['));
941 assert!(contains_case_map('☃', 'Ⰰ'));
942
943 assert!(!contains_case_map('[', '['));
944 assert!(!contains_case_map('[', '`'));
945
946 assert!(!contains_case_map('☃', '☃'));
947 }
948
949 #[test]
950 #[cfg(not(feature = "unicode-case"))]
951 fn range_contains_disabled() {
952 assert!(contains_simple_case_mapping('a', 'a').is_err());
953 }
954
955 #[test]
956 #[cfg(feature = "unicode-gencat")]
957 fn regression_466() {
958 use super::{CanonicalClassQuery, ClassQuery};
959
960 let q = ClassQuery::OneLetter('C');
961 assert_eq!(
962 q.canonicalize().unwrap(),
963 CanonicalClassQuery::GeneralCategory("Other")
964 );
965 }
966
967 #[test]
968 fn sym_normalize() {
969 let sym_norm = symbolic_name_normalize;
970
971 assert_eq!(sym_norm("Line_Break"), "linebreak");
972 assert_eq!(sym_norm("Line-break"), "linebreak");
973 assert_eq!(sym_norm("linebreak"), "linebreak");
974 assert_eq!(sym_norm("BA"), "ba");
975 assert_eq!(sym_norm("ba"), "ba");
976 assert_eq!(sym_norm("Greek"), "greek");
977 assert_eq!(sym_norm("isGreek"), "greek");
978 assert_eq!(sym_norm("IS_Greek"), "greek");
979 assert_eq!(sym_norm("isc"), "isc");
980 assert_eq!(sym_norm("is c"), "isc");
981 assert_eq!(sym_norm("is_c"), "isc");
982 }
983
984 #[test]
985 fn valid_utf8_symbolic() {
986 let mut x = b"abc\xFFxyz".to_vec();
987 let y = symbolic_name_normalize_bytes(&mut x);
988 assert_eq!(y, b"abcxyz");
989 }
990}