ICU 4.6 4.6
|
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2009-2010, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: normalizer2.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 2009nov22 00014 * created by: Markus W. Scherer 00015 */ 00016 00017 #ifndef __NORMALIZER2_H__ 00018 #define __NORMALIZER2_H__ 00019 00025 #include "unicode/utypes.h" 00026 00027 #if !UCONFIG_NO_NORMALIZATION 00028 00029 #include "unicode/uniset.h" 00030 #include "unicode/unistr.h" 00031 #include "unicode/unorm2.h" 00032 00033 U_NAMESPACE_BEGIN 00034 00078 class U_COMMON_API Normalizer2 : public UObject { 00079 public: 00101 static const Normalizer2 * 00102 getInstance(const char *packageName, 00103 const char *name, 00104 UNormalization2Mode mode, 00105 UErrorCode &errorCode); 00106 00117 UnicodeString 00118 normalize(const UnicodeString &src, UErrorCode &errorCode) const { 00119 UnicodeString result; 00120 normalize(src, result, errorCode); 00121 return result; 00122 } 00136 virtual UnicodeString & 00137 normalize(const UnicodeString &src, 00138 UnicodeString &dest, 00139 UErrorCode &errorCode) const = 0; 00154 virtual UnicodeString & 00155 normalizeSecondAndAppend(UnicodeString &first, 00156 const UnicodeString &second, 00157 UErrorCode &errorCode) const = 0; 00172 virtual UnicodeString & 00173 append(UnicodeString &first, 00174 const UnicodeString &second, 00175 UErrorCode &errorCode) const = 0; 00176 00187 virtual UBool 00188 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; 00189 00204 virtual UBool 00205 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00206 00222 virtual UNormalizationCheckResult 00223 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00224 00247 virtual int32_t 00248 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00249 00263 virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 00264 00279 virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 00280 00294 virtual UBool isInert(UChar32 c) const = 0; 00295 00296 private: 00297 // No ICU "poor man's RTTI" for this class nor its subclasses. 00298 virtual UClassID getDynamicClassID() const; 00299 }; 00300 00312 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 00313 public: 00324 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 00325 norm2(n2), set(filterSet) {} 00326 00340 virtual UnicodeString & 00341 normalize(const UnicodeString &src, 00342 UnicodeString &dest, 00343 UErrorCode &errorCode) const; 00358 virtual UnicodeString & 00359 normalizeSecondAndAppend(UnicodeString &first, 00360 const UnicodeString &second, 00361 UErrorCode &errorCode) const; 00376 virtual UnicodeString & 00377 append(UnicodeString &first, 00378 const UnicodeString &second, 00379 UErrorCode &errorCode) const; 00380 00391 virtual UBool 00392 getDecomposition(UChar32 c, UnicodeString &decomposition) const; 00393 00405 virtual UBool 00406 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 00418 virtual UNormalizationCheckResult 00419 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 00431 virtual int32_t 00432 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 00433 00442 virtual UBool hasBoundaryBefore(UChar32 c) const; 00443 00452 virtual UBool hasBoundaryAfter(UChar32 c) const; 00453 00461 virtual UBool isInert(UChar32 c) const; 00462 private: 00463 UnicodeString & 00464 normalize(const UnicodeString &src, 00465 UnicodeString &dest, 00466 USetSpanCondition spanCondition, 00467 UErrorCode &errorCode) const; 00468 00469 UnicodeString & 00470 normalizeSecondAndAppend(UnicodeString &first, 00471 const UnicodeString &second, 00472 UBool doNormalize, 00473 UErrorCode &errorCode) const; 00474 00475 const Normalizer2 &norm2; 00476 const UnicodeSet &set; 00477 }; 00478 00479 U_NAMESPACE_END 00480 00481 #endif // !UCONFIG_NO_NORMALIZATION 00482 #endif // __NORMALIZER2_H__