Main Page | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals

uniset.h

00001 /*
00002 **********************************************************************
00003 * Copyright (C) 1999-2004, International Business Machines Corporation and others. All Rights Reserved.
00004 **********************************************************************
00005 *   Date        Name        Description
00006 *   10/20/99    alan        Creation.
00007 **********************************************************************
00008 */
00009 
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012 
00013 #include "unicode/unifilt.h"
00014 #include "unicode/unistr.h"
00015 #include "unicode/uset.h"
00016 
00017 U_NAMESPACE_BEGIN
00018 
00019 class ParsePosition;
00020 class SymbolTable;
00021 class UVector;
00022 class CaseEquivClass;
00023 class RuleCharacterIterator;
00024 
00256 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00257 
00258     int32_t len; // length of list used; 0 <= len <= capacity
00259     int32_t capacity; // capacity of list
00260     int32_t bufferCapacity; // capacity of buffer
00261     UChar32* list; // MUST be terminated with HIGH
00262     UChar32* buffer; // internal buffer, may be NULL
00263 
00264     UVector* strings; // maintained in sorted order
00265 
00275     UnicodeString pat;
00276 
00277 public:
00278 
00283 #ifdef U_CYGWIN
00284     static U_COMMON_API const UChar32 MIN_VALUE;
00285 #else
00286     static const UChar32 MIN_VALUE;
00287 #endif
00288 
00293 #ifdef U_CYGWIN
00294     static U_COMMON_API const UChar32 MAX_VALUE;
00295 #else
00296     static const UChar32 MAX_VALUE;
00297 #endif
00298 
00299     //----------------------------------------------------------------
00300     // Constructors &c
00301     //----------------------------------------------------------------
00302 
00303 public:
00304 
00309     UnicodeSet();
00310 
00319     UnicodeSet(UChar32 start, UChar32 end);
00320 
00329     UnicodeSet(const UnicodeString& pattern,
00330                UErrorCode& status);
00331 
00344     UnicodeSet(const UnicodeString& pattern,
00345                uint32_t options,
00346                const SymbolTable* symbols,
00347                UErrorCode& status);
00348 
00362     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00363                uint32_t options,
00364                const SymbolTable* symbols,
00365                UErrorCode& status);
00366 
00367 #ifdef U_USE_UNICODESET_DEPRECATES
00368 
00374     UnicodeSet(int8_t category, UErrorCode& status);
00375 #endif
00376 
00381     UnicodeSet(const UnicodeSet& o);
00382 
00387     virtual ~UnicodeSet();
00388 
00393     UnicodeSet& operator=(const UnicodeSet& o);
00394 
00406     virtual UBool operator==(const UnicodeSet& o) const;
00407 
00413     UBool operator!=(const UnicodeSet& o) const;
00414 
00421     virtual UnicodeFunctor* clone() const;
00422 
00430     virtual int32_t hashCode(void) const;
00431 
00432     //----------------------------------------------------------------
00433     // Public API
00434     //----------------------------------------------------------------
00435 
00445     UnicodeSet& set(UChar32 start, UChar32 end);
00446 
00452     static UBool resemblesPattern(const UnicodeString& pattern,
00453                                   int32_t pos);
00454 
00466     UnicodeSet& applyPattern(const UnicodeString& pattern,
00467                              UErrorCode& status);
00468 
00484     UnicodeSet& applyPattern(const UnicodeString& pattern,
00485                              uint32_t options,
00486                              const SymbolTable* symbols,
00487                              UErrorCode& status);
00488 
00519     UnicodeSet& applyPattern(const UnicodeString& pattern,
00520                              ParsePosition& pos,
00521                              uint32_t options,
00522                              const SymbolTable* symbols,
00523                              UErrorCode& status);
00524 
00537     virtual UnicodeString& toPattern(UnicodeString& result,
00538                              UBool escapeUnprintable = FALSE) const;
00539 
00561     UnicodeSet& applyIntPropertyValue(UProperty prop,
00562                                       int32_t value,
00563                                       UErrorCode& ec);
00564 
00592     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00593                                    const UnicodeString& value,
00594                                    UErrorCode& ec);
00595 
00604     virtual int32_t size(void) const;
00605 
00612     virtual UBool isEmpty(void) const;
00613 
00620     virtual UBool contains(UChar32 c) const;
00621 
00630     virtual UBool contains(UChar32 start, UChar32 end) const;
00631 
00639     UBool contains(const UnicodeString& s) const;
00640 
00648     virtual UBool containsAll(const UnicodeSet& c) const;
00649 
00657     UBool containsAll(const UnicodeString& s) const;
00658 
00667     UBool containsNone(UChar32 start, UChar32 end) const;
00668 
00676     UBool containsNone(const UnicodeSet& c) const;
00677 
00685     UBool containsNone(const UnicodeString& s) const;
00686 
00695     inline UBool containsSome(UChar32 start, UChar32 end) const;
00696 
00704     inline UBool containsSome(const UnicodeSet& s) const;
00705 
00713     inline UBool containsSome(const UnicodeString& s) const;
00714 
00719     virtual UMatchDegree matches(const Replaceable& text,
00720                          int32_t& offset,
00721                          int32_t limit,
00722                          UBool incremental);
00723 
00724 private:
00746     static int32_t matchRest(const Replaceable& text,
00747                              int32_t start, int32_t limit,
00748                              const UnicodeString& s);
00749 
00759     int32_t findCodePoint(UChar32 c) const;
00760 
00761 public:
00762 
00770     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00771 
00780     int32_t indexOf(UChar32 c) const;
00781 
00791     UChar32 charAt(int32_t index) const;
00792 
00806     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00807 
00814     UnicodeSet& add(UChar32 c);
00815 
00826     UnicodeSet& add(const UnicodeString& s);
00827 
00828  private:
00834     static int32_t getSingleCP(const UnicodeString& s);
00835 
00836     void _add(const UnicodeString& s);
00837 
00838  public:
00846     UnicodeSet& addAll(const UnicodeString& s);
00847 
00855     UnicodeSet& retainAll(const UnicodeString& s);
00856 
00864     UnicodeSet& complementAll(const UnicodeString& s);
00865 
00873     UnicodeSet& removeAll(const UnicodeString& s);
00874 
00883     static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
00884 
00885 
00893     static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
00894 
00907     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00908 
00909 
00914     UnicodeSet& retain(UChar32 c);
00915 
00928     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00929 
00936     UnicodeSet& remove(UChar32 c);
00937 
00946     UnicodeSet& remove(const UnicodeString& s);
00947 
00954     virtual UnicodeSet& complement(void);
00955 
00969     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00970 
00977     UnicodeSet& complement(UChar32 c);
00978 
00988     UnicodeSet& complement(const UnicodeString& s);
00989 
01001     virtual UnicodeSet& addAll(const UnicodeSet& c);
01002 
01013     virtual UnicodeSet& retainAll(const UnicodeSet& c);
01014 
01025     virtual UnicodeSet& removeAll(const UnicodeSet& c);
01026 
01036     virtual UnicodeSet& complementAll(const UnicodeSet& c);
01037 
01043     virtual UnicodeSet& clear(void);
01044 
01068     UnicodeSet& closeOver(int32_t attribute);
01069 
01077     virtual int32_t getRangeCount(void) const;
01078 
01086     virtual UChar32 getRangeStart(int32_t index) const;
01087 
01095     virtual UChar32 getRangeEnd(int32_t index) const;
01096 
01145     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01146 
01152     virtual UnicodeSet& compact();
01153 
01165     static UClassID U_EXPORT2 getStaticClassID(void);
01166 
01175     virtual UClassID getDynamicClassID(void) const;
01176 
01177 private:
01178 
01179     // Private API for the USet API
01180 
01181     friend class USetAccess;
01182 
01183     int32_t getStringCount() const;
01184 
01185     const UnicodeString* getString(int32_t index) const;
01186 
01187     //----------------------------------------------------------------
01188     // RuleBasedTransliterator support
01189     //----------------------------------------------------------------
01190 
01191 private:
01192 
01198     virtual UBool matchesIndexValue(uint8_t v) const;
01199 
01200 private:
01201 
01202     //----------------------------------------------------------------
01203     // Implementation: Pattern parsing
01204     //----------------------------------------------------------------
01205 
01206     void applyPattern(RuleCharacterIterator& chars,
01207                       const SymbolTable* symbols,
01208                       UnicodeString& rebuiltPat,
01209                       uint32_t options,
01210                       UErrorCode& ec);
01211 
01212     //----------------------------------------------------------------
01213     // Implementation: Utility methods
01214     //----------------------------------------------------------------
01215 
01216     void ensureCapacity(int32_t newLen);
01217 
01218     void ensureBufferCapacity(int32_t newLen);
01219 
01220     void swapBuffers(void);
01221 
01222     UBool allocateStrings();
01223 
01224     UnicodeString& _toPattern(UnicodeString& result,
01225                               UBool escapeUnprintable) const;
01226 
01227     UnicodeString& _generatePattern(UnicodeString& result,
01228                                     UBool escapeUnprintable) const;
01229 
01230     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01231 
01232     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01233 
01234     //----------------------------------------------------------------
01235     // Implementation: Fundamental operators
01236     //----------------------------------------------------------------
01237 
01238     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01239 
01240     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01241 
01242     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01243 
01249     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01250                                           int32_t pos);
01251 
01252     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01253                                           int32_t iterOpts);
01254 
01293     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01294                                      ParsePosition& ppos,
01295                                      UErrorCode &ec);
01296 
01297     void applyPropertyPattern(RuleCharacterIterator& chars,
01298                               UnicodeString& rebuiltPat,
01299                               UErrorCode& ec);
01300 
01305     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01306 
01316     void applyFilter(Filter filter,
01317                      void* context,
01318                      int32_t src,
01319                      UErrorCode &status);
01320 
01324     static const UnicodeSet* getInclusions(int32_t src, UErrorCode &errorCode);
01325 
01326     friend class UnicodeSetIterator;
01327 
01328     //----------------------------------------------------------------
01329     // Implementation: closeOver
01330     //----------------------------------------------------------------
01331 
01332     void caseCloseOne(const UnicodeString& folded);
01333 
01334     void caseCloseOne(const CaseEquivClass& c);
01335 
01336     void caseCloseOne(UChar folded);
01337 
01338     static const CaseEquivClass* getCaseMapOf(const UnicodeString& folded);
01339 
01340     static const CaseEquivClass* getCaseMapOf(UChar folded);
01341 };
01342 
01343 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01344     return !operator==(o);
01345 }
01346 
01347 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01348     return !containsNone(start, end);
01349 }
01350 
01351 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01352     return !containsNone(s);
01353 }
01354 
01355 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01356     return !containsNone(s);
01357 }
01358 
01359 U_NAMESPACE_END
01360 
01361 #endif

Generated on Tue Jul 26 00:40:01 2005 for ICU 3.2 by  doxygen 1.3.9.1