00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef _m_ctype_h
00023 #define _m_ctype_h
00024
00025 #ifdef __cplusplus
00026 extern "C" {
00027 #endif
00028
00029 #define MY_CS_NAME_SIZE 32
00030 #define MY_CS_CTYPE_TABLE_SIZE 257
00031 #define MY_CS_TO_LOWER_TABLE_SIZE 256
00032 #define MY_CS_TO_UPPER_TABLE_SIZE 256
00033 #define MY_CS_SORT_ORDER_TABLE_SIZE 256
00034 #define MY_CS_TO_UNI_TABLE_SIZE 256
00035
00036 #define CHARSET_DIR "charsets/"
00037
00038 #define my_wc_t ulong
00039
00040 typedef struct unicase_info_st
00041 {
00042 uint16 toupper;
00043 uint16 tolower;
00044 uint16 sort;
00045 } MY_UNICASE_INFO;
00046
00047 #define MY_CS_ILSEQ 0
00048 #define MY_CS_ILUNI 0
00049 #define MY_CS_TOOSMALL -1
00050 #define MY_CS_TOOFEW(n) (-1-(n))
00051
00052 #define MY_SEQ_INTTAIL 1
00053 #define MY_SEQ_SPACES 2
00054
00055
00056 #define MY_CS_COMPILED 1
00057 #define MY_CS_CONFIG 2
00058 #define MY_CS_INDEX 4
00059 #define MY_CS_LOADED 8
00060 #define MY_CS_BINSORT 16
00061 #define MY_CS_PRIMARY 32
00062 #define MY_CS_STRNXFRM 64
00063 #define MY_CS_UNICODE 128
00064 #define MY_CS_READY 256
00065 #define MY_CS_AVAILABLE 512
00066 #define MY_CS_CSSORT 1024
00067 #define MY_CHARSET_UNDEFINED 0
00068
00069
00070 typedef struct my_uni_idx_st
00071 {
00072 uint16 from;
00073 uint16 to;
00074 uchar *tab;
00075 } MY_UNI_IDX;
00076
00077 typedef struct
00078 {
00079 uint beg;
00080 uint end;
00081 uint mblen;
00082 } my_match_t;
00083
00084 enum my_lex_states
00085 {
00086 MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT,
00087 MY_LEX_IDENT_SEP, MY_LEX_IDENT_START,
00088 MY_LEX_REAL, MY_LEX_HEX_NUMBER,
00089 MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END,
00090 MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL,
00091 MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE,
00092 MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON,
00093 MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP,
00094 MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR,
00095 MY_LEX_IDENT_OR_KEYWORD,
00096 MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN, MY_LEX_IDENT_OR_NCHAR,
00097 MY_LEX_STRING_OR_DELIMITER
00098 };
00099
00100 struct charset_info_st;
00101
00102 typedef struct my_collation_handler_st
00103 {
00104 my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
00105
00106 int (*strnncoll)(struct charset_info_st *,
00107 const uchar *, uint, const uchar *, uint, my_bool);
00108 int (*strnncollsp)(struct charset_info_st *,
00109 const uchar *, uint, const uchar *, uint);
00110 int (*strnxfrm)(struct charset_info_st *,
00111 uchar *, uint, const uchar *, uint);
00112 my_bool (*like_range)(struct charset_info_st *,
00113 const char *s, uint s_length,
00114 pchar w_prefix, pchar w_one, pchar w_many,
00115 uint res_length,
00116 char *min_str, char *max_str,
00117 uint *min_len, uint *max_len);
00118 int (*wildcmp)(struct charset_info_st *,
00119 const char *str,const char *str_end,
00120 const char *wildstr,const char *wildend,
00121 int escape,int w_one, int w_many);
00122
00123 int (*strcasecmp)(struct charset_info_st *, const char *, const char *);
00124
00125 uint (*instr)(struct charset_info_st *,
00126 const char *b, uint b_length,
00127 const char *s, uint s_length,
00128 my_match_t *match, uint nmatch);
00129
00130
00131 void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len,
00132 ulong *nr1, ulong *nr2);
00133 } MY_COLLATION_HANDLER;
00134
00135 extern MY_COLLATION_HANDLER my_collation_mb_bin_handler;
00136 extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
00137 extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
00138 extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
00139
00140
00141 typedef struct my_charset_handler_st
00142 {
00143 my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
00144
00145 int (*ismbchar)(struct charset_info_st *, const char *, const char *);
00146 int (*mbcharlen)(struct charset_info_st *, uint);
00147 uint (*numchars)(struct charset_info_st *, const char *b, const char *e);
00148 uint (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos);
00149 uint (*well_formed_len)(struct charset_info_st *,
00150 const char *b,const char *e,
00151 uint nchars, int *error);
00152 uint (*lengthsp)(struct charset_info_st *, const char *ptr, uint length);
00153 uint (*numcells)(struct charset_info_st *, const char *b, const char *e);
00154
00155
00156 int (*mb_wc)(struct charset_info_st *cs,my_wc_t *wc,
00157 const unsigned char *s,const unsigned char *e);
00158 int (*wc_mb)(struct charset_info_st *cs,my_wc_t wc,
00159 unsigned char *s,unsigned char *e);
00160
00161
00162 void (*caseup_str)(struct charset_info_st *, char *);
00163 void (*casedn_str)(struct charset_info_st *, char *);
00164 void (*caseup)(struct charset_info_st *, char *, uint);
00165 void (*casedn)(struct charset_info_st *, char *, uint);
00166
00167
00168 int (*snprintf)(struct charset_info_st *, char *to, uint n, const char *fmt,
00169 ...);
00170 int (*long10_to_str)(struct charset_info_st *, char *to, uint n, int radix,
00171 long int val);
00172 int (*longlong10_to_str)(struct charset_info_st *, char *to, uint n,
00173 int radix, longlong val);
00174
00175 void (*fill)(struct charset_info_st *, char *to, uint len, int fill);
00176
00177
00178 long (*strntol)(struct charset_info_st *, const char *s, uint l,
00179 int base, char **e, int *err);
00180 ulong (*strntoul)(struct charset_info_st *, const char *s, uint l,
00181 int base, char **e, int *err);
00182 longlong (*strntoll)(struct charset_info_st *, const char *s, uint l,
00183 int base, char **e, int *err);
00184 ulonglong (*strntoull)(struct charset_info_st *, const char *s, uint l,
00185 int base, char **e, int *err);
00186 double (*strntod)(struct charset_info_st *, char *s, uint l, char **e,
00187 int *err);
00188 longlong (*my_strtoll10)(struct charset_info_st *cs,
00189 const char *nptr, char **endptr, int *error);
00190 ulong (*scan)(struct charset_info_st *, const char *b, const char *e,
00191 int sq);
00192 } MY_CHARSET_HANDLER;
00193
00194 extern MY_CHARSET_HANDLER my_charset_8bit_handler;
00195 extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
00196
00197
00198 typedef struct charset_info_st
00199 {
00200 uint number;
00201 uint primary_number;
00202 uint binary_number;
00203 uint state;
00204 const char *csname;
00205 const char *name;
00206 const char *comment;
00207 const char *tailoring;
00208 uchar *ctype;
00209 uchar *to_lower;
00210 uchar *to_upper;
00211 uchar *sort_order;
00212 uint16 *contractions;
00213 uint16 **sort_order_big;
00214 uint16 *tab_to_uni;
00215 MY_UNI_IDX *tab_from_uni;
00216 uchar *state_map;
00217 uchar *ident_map;
00218 uint strxfrm_multiply;
00219 uint mbminlen;
00220 uint mbmaxlen;
00221 uint16 min_sort_char;
00222 uint16 max_sort_char;
00223
00224 MY_CHARSET_HANDLER *cset;
00225 MY_COLLATION_HANDLER *coll;
00226
00227 } CHARSET_INFO;
00228
00229
00230 extern CHARSET_INFO my_charset_bin;
00231 extern CHARSET_INFO my_charset_big5_chinese_ci;
00232 extern CHARSET_INFO my_charset_big5_bin;
00233 extern CHARSET_INFO my_charset_cp932_japanese_ci;
00234 extern CHARSET_INFO my_charset_cp932_bin;
00235 extern CHARSET_INFO my_charset_euckr_korean_ci;
00236 extern CHARSET_INFO my_charset_euckr_bin;
00237 extern CHARSET_INFO my_charset_gb2312_chinese_ci;
00238 extern CHARSET_INFO my_charset_gb2312_bin;
00239 extern CHARSET_INFO my_charset_gbk_chinese_ci;
00240 extern CHARSET_INFO my_charset_gbk_bin;
00241 extern CHARSET_INFO my_charset_latin1;
00242 extern CHARSET_INFO my_charset_latin1_german2_ci;
00243 extern CHARSET_INFO my_charset_latin1_bin;
00244 extern CHARSET_INFO my_charset_latin2_czech_ci;
00245 extern CHARSET_INFO my_charset_sjis_japanese_ci;
00246 extern CHARSET_INFO my_charset_sjis_bin;
00247 extern CHARSET_INFO my_charset_tis620_thai_ci;
00248 extern CHARSET_INFO my_charset_tis620_bin;
00249 extern CHARSET_INFO my_charset_ucs2_general_ci;
00250 extern CHARSET_INFO my_charset_ucs2_bin;
00251 extern CHARSET_INFO my_charset_ucs2_general_uca;
00252 extern CHARSET_INFO my_charset_ujis_japanese_ci;
00253 extern CHARSET_INFO my_charset_ujis_bin;
00254 extern CHARSET_INFO my_charset_utf8_general_ci;
00255 extern CHARSET_INFO my_charset_utf8_bin;
00256 extern CHARSET_INFO my_charset_cp1250_czech_ci;
00257
00258
00259 extern int my_strnxfrm_simple(CHARSET_INFO *, uchar *, uint, const uchar *,
00260 uint);
00261 extern int my_strnncoll_simple(CHARSET_INFO *, const uchar *, uint,
00262 const uchar *, uint, my_bool);
00263
00264 extern int my_strnncollsp_simple(CHARSET_INFO *, const uchar *, uint,
00265 const uchar *, uint);
00266
00267 extern void my_hash_sort_simple(CHARSET_INFO *cs,
00268 const uchar *key, uint len,
00269 ulong *nr1, ulong *nr2);
00270
00271 extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length);
00272
00273 extern uint my_instr_simple(struct charset_info_st *,
00274 const char *b, uint b_length,
00275 const char *s, uint s_length,
00276 my_match_t *match, uint nmatch);
00277
00278
00279
00280 extern void my_caseup_str_8bit(CHARSET_INFO *, char *);
00281 extern void my_casedn_str_8bit(CHARSET_INFO *, char *);
00282 extern void my_caseup_8bit(CHARSET_INFO *, char *, uint);
00283 extern void my_casedn_8bit(CHARSET_INFO *, char *, uint);
00284
00285 extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
00286
00287 int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e);
00288 int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
00289
00290 ulong my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq);
00291
00292 int my_snprintf_8bit(struct charset_info_st *, char *to, uint n,
00293 const char *fmt, ...);
00294
00295 long my_strntol_8bit(CHARSET_INFO *, const char *s, uint l, int base,
00296 char **e, int *err);
00297 ulong my_strntoul_8bit(CHARSET_INFO *, const char *s, uint l, int base,
00298 char **e, int *err);
00299 longlong my_strntoll_8bit(CHARSET_INFO *, const char *s, uint l, int base,
00300 char **e, int *err);
00301 ulonglong my_strntoull_8bit(CHARSET_INFO *, const char *s, uint l, int base,
00302 char **e, int *err);
00303 double my_strntod_8bit(CHARSET_INFO *, char *s, uint l,char **e,
00304 int *err);
00305 int my_long10_to_str_8bit(CHARSET_INFO *, char *to, uint l, int radix,
00306 long int val);
00307 int my_longlong10_to_str_8bit(CHARSET_INFO *, char *to, uint l, int radix,
00308 longlong val);
00309
00310 longlong my_strtoll10_8bit(CHARSET_INFO *cs,
00311 const char *nptr, char **endptr, int *error);
00312 longlong my_strtoll10_ucs2(CHARSET_INFO *cs,
00313 const char *nptr, char **endptr, int *error);
00314
00315 void my_fill_8bit(CHARSET_INFO *cs, char* to, uint l, int fill);
00316
00317 my_bool my_like_range_simple(CHARSET_INFO *cs,
00318 const char *ptr, uint ptr_length,
00319 pbool escape, pbool w_one, pbool w_many,
00320 uint res_length,
00321 char *min_str, char *max_str,
00322 uint *min_length, uint *max_length);
00323
00324 my_bool my_like_range_mb(CHARSET_INFO *cs,
00325 const char *ptr, uint ptr_length,
00326 pbool escape, pbool w_one, pbool w_many,
00327 uint res_length,
00328 char *min_str, char *max_str,
00329 uint *min_length, uint *max_length);
00330
00331 my_bool my_like_range_ucs2(CHARSET_INFO *cs,
00332 const char *ptr, uint ptr_length,
00333 pbool escape, pbool w_one, pbool w_many,
00334 uint res_length,
00335 char *min_str, char *max_str,
00336 uint *min_length, uint *max_length);
00337
00338
00339 int my_wildcmp_8bit(CHARSET_INFO *,
00340 const char *str,const char *str_end,
00341 const char *wildstr,const char *wildend,
00342 int escape, int w_one, int w_many);
00343
00344 uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e);
00345 uint my_numcells_8bit(CHARSET_INFO *, const char *b, const char *e);
00346 uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
00347 uint my_well_formed_len_8bit(CHARSET_INFO *, const char *b, const char *e,
00348 uint pos, int *error);
00349 int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
00350
00351
00352
00353 extern void my_caseup_str_mb(CHARSET_INFO *, char *);
00354 extern void my_casedn_str_mb(CHARSET_INFO *, char *);
00355 extern void my_caseup_mb(CHARSET_INFO *, char *, uint);
00356 extern void my_casedn_mb(CHARSET_INFO *, char *, uint);
00357 extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *);
00358
00359 int my_wildcmp_mb(CHARSET_INFO *,
00360 const char *str,const char *str_end,
00361 const char *wildstr,const char *wildend,
00362 int escape, int w_one, int w_many);
00363 uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
00364 uint my_numcells_mb(CHARSET_INFO *, const char *b, const char *e);
00365 uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
00366 uint my_well_formed_len_mb(CHARSET_INFO *, const char *b, const char *e,
00367 uint pos, int *error);
00368 uint my_instr_mb(struct charset_info_st *,
00369 const char *b, uint b_length,
00370 const char *s, uint s_length,
00371 my_match_t *match, uint nmatch);
00372
00373 int my_wildcmp_unicode(CHARSET_INFO *cs,
00374 const char *str, const char *str_end,
00375 const char *wildstr, const char *wildend,
00376 int escape, int w_one, int w_many,
00377 MY_UNICASE_INFO **weights);
00378
00379 extern my_bool my_parse_charset_xml(const char *bug, uint len,
00380 int (*add)(CHARSET_INFO *cs));
00381
00382 #define _MY_U 01
00383 #define _MY_L 02
00384 #define _MY_NMR 04
00385 #define _MY_SPC 010
00386 #define _MY_PNT 020
00387 #define _MY_CTR 040
00388 #define _MY_B 0100
00389 #define _MY_X 0200
00390
00391
00392 #define my_isascii(c) (!((c) & ~0177))
00393 #define my_toascii(c) ((c) & 0177)
00394 #define my_tocntrl(c) ((c) & 31)
00395 #define my_toprint(c) ((c) | 64)
00396 #define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)])
00397 #define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)])
00398 #define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L))
00399 #define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_U)
00400 #define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_L)
00401 #define my_isdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_NMR)
00402 #define my_isxdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_X)
00403 #define my_isalnum(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR))
00404 #define my_isspace(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_SPC)
00405 #define my_ispunct(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_PNT)
00406 #define my_isprint(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B))
00407 #define my_isgraph(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR))
00408 #define my_iscntrl(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_CTR)
00409
00410
00411 #define my_isvar(s,c) (my_isalnum(s,c) || (c) == '_')
00412 #define my_isvar_start(s,c) (my_isalpha(s,c) || (c) == '_')
00413
00414 #define my_binary_compare(s) ((s)->state & MY_CS_BINSORT)
00415 #define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM)
00416 #define my_strnxfrm(s, a, b, c, d) ((s)->coll->strnxfrm((s), (a), (b), (c), (d)))
00417 #define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
00418 #define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \
00419 ((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
00420 #define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))
00421 #define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
00422 #define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
00423
00424
00425 #define use_mb(s) ((s)->cset->ismbchar != NULL)
00426 #define my_ismbchar(s, a, b) ((s)->cset->ismbchar((s), (a), (b)))
00427 #ifdef USE_MB
00428 #define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
00429 #else
00430 #define my_mbcharlen(s, a) 1
00431 #endif
00432
00433 #define my_caseup(s, a, l) ((s)->cset->caseup((s), (a), (l)))
00434 #define my_casedn(s, a, l) ((s)->cset->casedn((s), (a), (l)))
00435 #define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a)))
00436 #define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a)))
00437 #define my_strntol(s, a, b, c, d, e) ((s)->cset->strntol((s),(a),(b),(c),(d),(e)))
00438 #define my_strntoul(s, a, b, c, d, e) ((s)->cset->strntoul((s),(a),(b),(c),(d),(e)))
00439 #define my_strntoll(s, a, b, c, d, e) ((s)->cset->strntoll((s),(a),(b),(c),(d),(e)))
00440 #define my_strntoull(s, a, b, c,d, e) ((s)->cset->strntoull((s),(a),(b),(c),(d),(e)))
00441 #define my_strntod(s, a, b, c, d) ((s)->cset->strntod((s),(a),(b),(c),(d)))
00442
00443
00444
00445 #ifdef MY_CHARSET_TIS620
00446 #error The TIS620 charset is broken at the moment. Tell tim to fix it.
00447 #define USE_TIS620
00448 #include "t_ctype.h"
00449 #endif
00450
00451 #ifdef __cplusplus
00452 }
00453 #endif
00454
00455 #endif