Ruby 1.9.3p327(2012-11-10revision37606)
|
00001 /********************************************************************** 00002 00003 transcode_data.h - 00004 00005 $Author: nobu $ 00006 created at: Mon 10 Dec 2007 14:01:47 JST 2007 00007 00008 Copyright (C) 2007 Martin Duerst 00009 00010 **********************************************************************/ 00011 00012 #include "ruby/ruby.h" 00013 00014 #ifndef RUBY_TRANSCODE_DATA_H 00015 #define RUBY_TRANSCODE_DATA_H 1 00016 00017 #if defined __GNUC__ && __GNUC__ >= 4 00018 #pragma GCC visibility push(default) 00019 #endif 00020 00021 #define WORDINDEX_SHIFT_BITS 2 00022 #define WORDINDEX2INFO(widx) ((widx) << WORDINDEX_SHIFT_BITS) 00023 #define INFO2WORDINDEX(info) ((info) >> WORDINDEX_SHIFT_BITS) 00024 #define BYTE_LOOKUP_BASE(bl) ((bl)[0]) 00025 #define BYTE_LOOKUP_INFO(bl) ((bl)[1]) 00026 00027 #define PType (unsigned int) 00028 00029 #define NOMAP (PType 0x01) /* direct map */ 00030 #define ONEbt (0x02) /* one byte payload */ 00031 #define TWObt (0x03) /* two bytes payload */ 00032 #define THREEbt (0x05) /* three bytes payload */ 00033 #define FOURbt (0x06) /* four bytes payload, UTF-8 only, macros start at getBT0 */ 00034 #define INVALID (PType 0x07) /* invalid byte sequence */ 00035 #define UNDEF (PType 0x09) /* legal but undefined */ 00036 #define ZERObt (PType 0x0A) /* zero bytes of payload, i.e. remove */ 00037 #define FUNii (PType 0x0B) /* function from info to info */ 00038 #define FUNsi (PType 0x0D) /* function from start to info */ 00039 #define FUNio (PType 0x0E) /* function from info to output */ 00040 #define FUNso (PType 0x0F) /* function from start to output */ 00041 #define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */ 00042 #define GB4bt (PType 0x12) /* GB18030 four bytes payload */ 00043 #define FUNsio (PType 0x13) /* function from start and info to output */ 00044 00045 #define STR1_LENGTH(byte_addr) (unsigned int)(*(byte_addr) + 4) 00046 #define STR1_BYTEINDEX(w) ((w) >> 6) 00047 #define makeSTR1(bi) (((bi) << 6) | STR1) 00048 #define makeSTR1LEN(len) ((len)-4) 00049 00050 #define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt)) 00051 #define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt)) 00052 #define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned int)(unsigned char)(b3))<<24)|THREEbt)&0xffffffffU)) 00053 #define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU)) 00054 #define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|(((unsigned char)(b2))<<16)|((((unsigned char)(b1))&0x0f)<<24)|((((unsigned int)(unsigned char)(b3))&0x0f)<<28)|GB4bt)&0xffffffffU)) 00055 #define funsio(diff) (PType((((unsigned int)(diff))<<8)|FUNsio)) 00056 00057 #define getBT1(a) ((unsigned char)((a)>> 8)) 00058 #define getBT2(a) ((unsigned char)((a)>>16)) 00059 #define getBT3(a) ((unsigned char)((a)>>24)) 00060 #define getBT0(a) (((unsigned char)((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */ 00061 00062 #define getGB4bt0(a) ((unsigned char)((a)>> 8)) 00063 #define getGB4bt1(a) (((unsigned char)((a)>>24)&0x0F)|0x30) 00064 #define getGB4bt2(a) ((unsigned char)((a)>>16)) 00065 #define getGB4bt3(a) (((unsigned char)((a)>>28)&0x0F)|0x30) 00066 00067 #define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii)) 00068 00069 /* do we need these??? maybe not, can be done with simple tables */ 00070 #define ONETRAIL /* legal but undefined if one more trailing UTF-8 */ 00071 #define TWOTRAIL /* legal but undefined if two more trailing UTF-8 */ 00072 #define THREETRAIL /* legal but undefined if three more trailing UTF-8 */ 00073 00074 typedef enum { 00075 asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */ 00076 asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */ 00077 asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */ 00078 /* ASCII-incompatible -> ASCII-incompatible is intentionally omitted. */ 00079 } rb_transcoder_asciicompat_type_t; 00080 00081 typedef struct rb_transcoder rb_transcoder; 00082 00083 /* static structure, one per supported encoding pair */ 00084 struct rb_transcoder { 00085 const char *src_encoding; 00086 const char *dst_encoding; 00087 unsigned int conv_tree_start; 00088 const unsigned char *byte_array; 00089 unsigned int byte_array_length; 00090 const unsigned int *word_array; 00091 unsigned int word_array_length; 00092 int word_size; 00093 int input_unit_length; 00094 int max_input; 00095 int max_output; 00096 rb_transcoder_asciicompat_type_t asciicompat_type; 00097 size_t state_size; 00098 int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */ 00099 int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */ 00100 VALUE (*func_ii)(void*, VALUE); /* info -> info */ 00101 VALUE (*func_si)(void*, const unsigned char*, size_t); /* start -> info */ 00102 ssize_t (*func_io)(void*, VALUE, const unsigned char*, size_t); /* info -> output */ 00103 ssize_t (*func_so)(void*, const unsigned char*, size_t, unsigned char*, size_t); /* start -> output */ 00104 ssize_t (*finish_func)(void*, unsigned char*, size_t); /* -> output */ 00105 ssize_t (*resetsize_func)(void*); /* -> len */ 00106 ssize_t (*resetstate_func)(void*, unsigned char*, size_t); /* -> output */ 00107 ssize_t (*func_sio)(void*, const unsigned char*, size_t, VALUE, unsigned char*, size_t); /* start -> output */ 00108 }; 00109 00110 void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib); 00111 void rb_register_transcoder(const rb_transcoder *); 00112 00113 #if defined __GNUC__ && __GNUC__ >= 4 00114 #pragma GCC visibility pop 00115 #endif 00116 00117 #endif /* RUBY_TRANSCODE_DATA_H */ 00118