From 72e7f011b29998d8a3e15eb5b381ef962af5fe5b Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 5 Apr 2019 10:30:58 +0300 Subject: Upgrade to 8.0.15 --- mysql/strings/ctype-mb.c | 1502 ---------------------------------------------- 1 file changed, 1502 deletions(-) delete mode 100644 mysql/strings/ctype-mb.c (limited to 'mysql/strings/ctype-mb.c') diff --git a/mysql/strings/ctype-mb.c b/mysql/strings/ctype-mb.c deleted file mode 100644 index 784aab1..0000000 --- a/mysql/strings/ctype-mb.c +++ /dev/null @@ -1,1502 +0,0 @@ -/* Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - -#include -#include "m_ctype.h" -#include "m_string.h" - - -size_t my_caseup_str_mb(const CHARSET_INFO *cs, char *str) -{ - uint32 l; - const uchar *map= cs->to_upper; - char *str_orig= str; - - while (*str) - { - /* Pointing after the '\0' is safe here. */ - if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen))) - str+= l; - else - { - *str= (char) map[(uchar)*str]; - str++; - } - } - return (size_t) (str - str_orig); -} - - -size_t my_casedn_str_mb(const CHARSET_INFO *cs, char *str) -{ - uint32 l; - const uchar *map= cs->to_lower; - char *str_orig= str; - - while (*str) - { - /* Pointing after the '\0' is safe here. */ - if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen))) - str+= l; - else - { - *str= (char) map[(uchar)*str]; - str++; - } - } - return (size_t) (str - str_orig); -} - - -static inline const MY_UNICASE_CHARACTER* -get_case_info_for_ch(const CHARSET_INFO *cs, uint page, uint offs) -{ - const MY_UNICASE_CHARACTER *p; - return cs->caseinfo ? ((p= cs->caseinfo->page[page]) ? &p[offs] : NULL) : NULL; -} - - -/* - For character sets which don't change octet length in case conversion. -*/ -size_t my_caseup_mb(const CHARSET_INFO *cs, char *src, size_t srclen, - char *dst MY_ATTRIBUTE((unused)), - size_t dstlen MY_ATTRIBUTE((unused))) -{ - uint32 l; - char *srcend= src + srclen; - const uchar *map= cs->to_upper; - - DBUG_ASSERT(cs->caseup_multiply == 1); - DBUG_ASSERT(src == dst && srclen == dstlen); - DBUG_ASSERT(cs->mbmaxlen == 2); - - while (src < srcend) - { - if ((l=my_ismbchar(cs, src, srcend))) - { - const MY_UNICASE_CHARACTER *ch; - if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1]))) - { - *src++= ch->toupper >> 8; - *src++= ch->toupper & 0xFF; - } - else - src+= l; - } - else - { - *src=(char) map[(uchar) *src]; - src++; - } - } - return srclen; -} - - -size_t my_casedn_mb(const CHARSET_INFO *cs, char *src, size_t srclen, - char *dst MY_ATTRIBUTE((unused)), - size_t dstlen MY_ATTRIBUTE((unused))) -{ - uint32 l; - char *srcend= src + srclen; - const uchar *map=cs->to_lower; - - DBUG_ASSERT(cs->casedn_multiply == 1); - DBUG_ASSERT(src == dst && srclen == dstlen); - DBUG_ASSERT(cs->mbmaxlen == 2); - - while (src < srcend) - { - if ((l= my_ismbchar(cs, src, srcend))) - { - const MY_UNICASE_CHARACTER *ch; - if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1]))) - { - *src++= ch->tolower >> 8; - *src++= ch->tolower & 0xFF; - } - else - src+= l; - } - else - { - *src= (char) map[(uchar)*src]; - src++; - } - } - return srclen; -} - - -/* - Case folding functions for character set - where case conversion can change string octet length. - For example, in EUCKR, - _euckr 0xA9A5 == "LATIN LETTER DOTLESS I" (Turkish letter) - is upper-cased to to - _euckr 0x49 "LATIN CAPITAL LETTER I" ('usual' letter I) - Length is reduced in this example from two bytes to one byte. -*/ -static size_t -my_casefold_mb_varlen(const CHARSET_INFO *cs, - char *src, size_t srclen, - char *dst, size_t dstlen MY_ATTRIBUTE((unused)), - const uchar *map, - size_t is_upper) -{ - char *srcend= src + srclen, *dst0= dst; - - DBUG_ASSERT(cs->mbmaxlen == 2); - - while (src < srcend) - { - size_t mblen= my_ismbchar(cs, src, srcend); - if (mblen) - { - const MY_UNICASE_CHARACTER *ch; - if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1]))) - { - int code= is_upper ? ch->toupper : ch->tolower; - src+= 2; - if (code > 0xFF) - *dst++= code >> 8; - *dst++= code & 0xFF; - } - else - { - *dst++= *src++; - *dst++= *src++; - } - } - else - { - *dst++= (char) map[(uchar) *src++]; - } - } - return (size_t) (dst - dst0); -} - - -size_t -my_casedn_mb_varlen(const CHARSET_INFO *cs, char *src, size_t srclen, - char *dst, size_t dstlen) -{ - DBUG_ASSERT(dstlen >= srclen * cs->casedn_multiply); - DBUG_ASSERT(src != dst || cs->casedn_multiply == 1); - return my_casefold_mb_varlen(cs, src, srclen, dst, dstlen, cs->to_lower, 0); -} - - -size_t -my_caseup_mb_varlen(const CHARSET_INFO *cs, char *src, size_t srclen, - char *dst, size_t dstlen) -{ - DBUG_ASSERT(dstlen >= srclen * cs->caseup_multiply); - DBUG_ASSERT(src != dst || cs->caseup_multiply == 1); - return my_casefold_mb_varlen(cs, src, srclen, dst, dstlen, cs->to_upper, 1); -} - - -/* - my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise. - */ - -int my_strcasecmp_mb(const CHARSET_INFO *cs,const char *s, const char *t) -{ - uint32 l; - const uchar *map=cs->to_upper; - - while (*s && *t) - { - /* Pointing after the '\0' is safe here. */ - if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen))) - { - while (l--) - if (*s++ != *t++) - return 1; - } - else if (my_mbcharlen(cs, *t) != 1 || - map[(uchar) *s++] != map[(uchar) *t++]) - return 1; - } - /* At least one of '*s' and '*t' is zero here. */ - DBUG_ASSERT(!*t || !*s); - return (*t != *s); -} - - -/* -** Compare string against string with wildcard -** 0 if matched -** -1 if not matched with wildcard -** 1 if matched with wildcard -*/ - -#define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1) - -#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)] - -static -int my_wildcmp_mb_impl(const CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, int recurse_level) -{ - int result= -1; /* Not found, using wildcards */ - - if (my_string_stack_guard && my_string_stack_guard(recurse_level)) - return 1; - while (wildstr != wildend) - { - while (*wildstr != w_many && *wildstr != w_one) - { - int l; - if (*wildstr == escape && wildstr+1 != wildend) - wildstr++; - if ((l = my_ismbchar(cs, wildstr, wildend))) - { - if (str+l > str_end || memcmp(str, wildstr, l) != 0) - return 1; - str += l; - wildstr += l; - } - else - if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++)) - return(1); /* No match */ - if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ - result=1; /* Found an anchor char */ - } - if (*wildstr == w_one) - { - do - { - if (str == str_end) /* Skip one char if possible */ - return (result); - INC_PTR(cs,str,str_end); - } while (++wildstr < wildend && *wildstr == w_one); - if (wildstr == wildend) - break; - } - if (*wildstr == w_many) - { /* Found w_many */ - uchar cmp; - const char* mb = wildstr; - int mb_len=0; - - wildstr++; - /* Remove any '%' and '_' from the wild search string */ - for (; wildstr != wildend ; wildstr++) - { - if (*wildstr == w_many) - continue; - if (*wildstr == w_one) - { - if (str == str_end) - return (-1); - INC_PTR(cs,str,str_end); - continue; - } - break; /* Not a wild character */ - } - if (wildstr == wildend) - return(0); /* Ok if w_many is last */ - if (str == str_end) - return -1; - - if ((cmp= *wildstr) == escape && wildstr+1 != wildend) - cmp= *++wildstr; - - mb=wildstr; - mb_len= my_ismbchar(cs, wildstr, wildend); - INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */ - cmp=likeconv(cs,cmp); - do - { - for (;;) - { - if (str >= str_end) - return -1; - if (mb_len) - { - if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0) - { - str += mb_len; - break; - } - } - else if (!my_ismbchar(cs, str, str_end) && - likeconv(cs,*str) == cmp) - { - str++; - break; - } - INC_PTR(cs,str, str_end); - } - { - int tmp=my_wildcmp_mb_impl(cs,str,str_end, - wildstr,wildend,escape,w_one, - w_many, recurse_level + 1); - if (tmp <= 0) - return (tmp); - } - } while (str != str_end && wildstr[0] != w_many); - return(-1); - } - } - return (str != str_end ? 1 : 0); -} - -int my_wildcmp_mb(const CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - return my_wildcmp_mb_impl(cs, str, str_end, - wildstr, wildend, - escape, w_one, w_many, 1); -} - - -size_t my_numchars_mb(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)), - const char *pos, const char *end) -{ - size_t count= 0; - while (pos < end) - { - uint mb_len; - pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1; - count++; - } - return count; -} - - -size_t my_charpos_mb(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)), - const char *pos, const char *end, size_t length) -{ - const char *start= pos; - - while (length && pos < end) - { - uint mb_len; - pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1; - length--; - } - return (size_t) (length ? end+2-start : pos-start); -} - - -size_t my_well_formed_len_mb(const CHARSET_INFO *cs, const char *b, - const char *e, size_t pos, int *error) -{ - const char *b_start= b; - *error= 0; - while (pos) - { - my_wc_t wc; - int mb_len; - - if ((mb_len= cs->cset->mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0) - { - *error= b < e ? 1 : 0; - break; - } - b+= mb_len; - pos--; - } - return (size_t) (b - b_start); -} - - -uint my_instr_mb(const CHARSET_INFO *cs, - const char *b, size_t b_length, - const char *s, size_t s_length, - my_match_t *match, uint nmatch) -{ - const char *end, *b0; - int res= 0; - - if (s_length <= b_length) - { - if (!s_length) - { - if (nmatch) - { - match->beg= 0; - match->end= 0; - match->mb_len= 0; - } - return 1; /* Empty string is always found */ - } - - b0= b; - end= b+b_length-s_length+1; - - while (b < end) - { - int mb_len; - - if (!cs->coll->strnncoll(cs, (uchar*) b, s_length, - (uchar*) s, s_length, 0)) - { - if (nmatch) - { - match[0].beg= 0; - match[0].end= (uint) (b-b0); - match[0].mb_len= res; - if (nmatch > 1) - { - match[1].beg= match[0].end; - match[1].end= match[0].end + (uint)s_length; - match[1].mb_len= 0; /* Not computed */ - } - } - return 2; - } - mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1; - b+= mb_len; - b_length-= mb_len; - res++; - } - } - return 0; -} - - -/* BINARY collations handlers for MB charsets */ - -int -my_strnncoll_mb_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - size_t len= MY_MIN(slen,tlen); - int cmp= memcmp(s,t,len); - return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen); -} - - -/* - Compare two strings. - - SYNOPSIS - my_strnncollsp_mb_bin() - cs Chararacter set - s String to compare - slen Length of 's' - t String to compare - tlen Length of 't' - diff_if_only_endspace_difference - Set to 1 if the strings should be regarded as different - if they only difference in end space - - NOTE - This function is used for character strings with binary collations. - The shorter string is extended with end space to be as long as the longer - one. - - RETURN - A negative number if s < t - A positive number if s > t - 0 if strings are equal -*/ - -int -my_strnncollsp_mb_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)), - const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference) -{ - const uchar *end; - size_t length; - int res; - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif - - end= a + (length= MY_MIN(a_length, b_length)); - while (a < end) - { - if (*a++ != *b++) - return ((int) a[-1] - (int) b[-1]); - } - res= 0; - if (a_length != b_length) - { - int swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 'a' is bigger */ - /* - Check the next not space character of the longer key. If it's < ' ', - then it's smaller than the other key. - */ - if (a_length < b_length) - { - /* put shorter key in s */ - a_length= b_length; - a= b; - swap= -1; /* swap sign of result */ - res= -res; - } - for (end= a + a_length-length; a < end ; a++) - { - if (*a != ' ') - return (*a < ' ') ? -swap : swap; - } - } - return res; -} - - -/* - Copy one non-ascii character. - "dst" must have enough room for the character. - Note, we don't use sort_order[] in this macros. - This is correct even for case insensitive collations: - - basic Latin letters are processed outside this macros; - - for other characters sort_order[x] is equal to x. -*/ -#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se) \ -{ \ - switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \ - case 4: \ - *dst++= *src++; \ - /* fall through */ \ - case 3: \ - *dst++= *src++; \ - /* fall through */ \ - case 2: \ - *dst++= *src++; \ - /* fall through */ \ - case 0: \ - *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */ \ - } \ -} - - -/* - For character sets with two or three byte multi-byte - characters having multibyte weights *equal* to their codes: - cp932, euckr, gb2312, sjis, eucjpms, ujis. -*/ -size_t -my_strnxfrm_mb(const CHARSET_INFO *cs, - uchar *dst, size_t dstlen, uint nweights, - const uchar *src, size_t srclen, uint flags) -{ - uchar *d0= dst; - uchar *de= dst + dstlen; - const uchar *se= src + srclen; - const uchar *sort_order= cs->sort_order; - - DBUG_ASSERT(cs->mbmaxlen <= 4); - - /* - If "srclen" is smaller than both "dstlen" and "nweights" - then we can run a simplified loop - - without checking "nweights" and "de". - */ - if (dstlen >= srclen && nweights >= srclen) - { - if (sort_order) - { - /* Optimized version for a case insensitive collation */ - for (; src < se; nweights--) - { - if (*src < 128) /* quickly catch ASCII characters */ - *dst++= sort_order[*src++]; - else - my_strnxfrm_mb_non_ascii_char(cs, dst, src, se); - } - } - else - { - /* Optimized version for a case sensitive collation (no sort_order) */ - for (; src < se; nweights--) - { - if (*src < 128) /* quickly catch ASCII characters */ - *dst++= *src++; - else - my_strnxfrm_mb_non_ascii_char(cs, dst, src, se); - } - } - goto pad; - } - - /* - A thourough loop, checking all possible limits: - "se", "nweights" and "de". - */ - for (; src < se && nweights && dst < de; nweights--) - { - int chlen; - if (*src < 128 || - !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se))) - { - /* Single byte character */ - *dst++= sort_order ? sort_order[*src++] : *src++; - } - else - { - /* Multi-byte character */ - size_t len= (dst + chlen <= de) ? chlen : de - dst; - memcpy(dst, src, len); - dst+= len; - src+= len; - } - } - -pad: - return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); -} - - -int -my_strcasecmp_mb_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)), - const char *s, const char *t) -{ - return strcmp(s,t); -} - - -void -my_hash_sort_mb_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)), - const uchar *key, size_t len,ulong *nr1, ulong *nr2) -{ - const uchar *pos = key; - - /* - Remove trailing spaces. We have to do this to be able to compare - 'A ' and 'A' as identical - */ - key= skip_trailing_space(key, len); - - for (; pos < (uchar*) key ; pos++) - { - nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * - ((uint)*pos)) + (nr1[0] << 8); - nr2[0]+=3; - } -} - - -/* - Fill the given buffer with 'maximum character' for given charset - SYNOPSIS - pad_max_char() - cs Character set - str Start of buffer to fill - end End of buffer to fill - - DESCRIPTION - Write max key: - - for non-Unicode character sets: - just memset using max_sort_char if max_sort_char is one byte. - In case when max_sort_char is two bytes, fill with double-byte pairs - and optionally pad with a single space character. - - for Unicode character set (utf-8): - create a buffer with multibyte representation of the max_sort_char - character, and copy it into max_str in a loop. -*/ -static void pad_max_char(const CHARSET_INFO *cs, char *str, char *end) -{ - char buf[10]; - char buflen; - - if (!(cs->state & MY_CS_UNICODE)) - { - if (cs->max_sort_char <= 255) - { - memset(str, cs->max_sort_char, end - str); - return; - } - else if (cs->max_sort_char <= 0xFFFF) - { - buf[0]= (char)(cs->max_sort_char >> 8); - buf[1]= cs->max_sort_char & 0xFF; - buflen= 2; - } - else - { - /* Currently, it's only for GB18030, so it must be a 4-byte char */ - DBUG_ASSERT(cs->max_sort_char > 0xFFFFFF); - buf[0]= cs->max_sort_char >> 24 & 0xFF; - buf[1]= cs->max_sort_char >> 16 & 0xFF; - buf[2]= cs->max_sort_char >> 8 & 0xFF; - buf[3]= cs->max_sort_char & 0xFF; - buflen= 4; - } - } - else - { - buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf, - (uchar*) buf + sizeof(buf)); - } - - DBUG_ASSERT(buflen > 0); - do - { - if ((str + buflen) <= end) - { - /* Enough space for the characer */ - memcpy(str, buf, buflen); - str+= buflen; - } - else - { - /* - There is no space for whole multibyte - character, then add trailing spaces. - */ - *str++= ' '; - } - } while (str < end); -} - -/* -** Calculate min_str and max_str that ranges a LIKE string. -** Arguments: -** ptr Pointer to LIKE string. -** ptr_length Length of LIKE string. -** escape Escape character in LIKE. (Normally '\'). -** All escape characters should be removed from min_str and max_str -** res_length Length of min_str and max_str. -** min_str Smallest case sensitive string that ranges LIKE. -** Should be space padded to res_length. -** max_str Largest case sensitive string that ranges LIKE. -** Normally padded with the biggest character sort value. -** -** The function should return 0 if ok and 1 if the LIKE string can't be -** optimized ! -*/ - -my_bool my_like_range_mb(const CHARSET_INFO *cs, - const char *ptr,size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str,char *max_str, - size_t *min_length,size_t *max_length) -{ - uint mb_len; - const char *end= ptr + ptr_length; - char *min_org= min_str; - char *min_end= min_str + res_length; - char *max_end= max_str + res_length; - size_t maxcharlen= res_length / cs->mbmaxlen; - const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0); - - for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--) - { - /* We assume here that escape, w_any, w_namy are one-byte characters */ - if (*ptr == escape && ptr+1 != end) - ptr++; /* Skip escape */ - else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */ - { -fill_max_and_min: - /* - Calculate length of keys: - 'a\0\0... is the smallest possible string when we have space expand - a\ff\ff... is the biggest possible string - */ - *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) : - res_length); - *max_length= res_length; - /* Create min key */ - do - { - *min_str++= (char) cs->min_sort_char; - } while (min_str != min_end); - - /* - Write max key: create a buffer with multibyte - representation of the max_sort_char character, - and copy it into max_str in a loop. - */ - *max_length= res_length; - pad_max_char(cs, max_str, max_end); - return 0; - } - if ((mb_len= my_ismbchar(cs, ptr, end)) > 1) - { - if (ptr+mb_len > end || min_str+mb_len > min_end) - break; - while (mb_len--) - *min_str++= *max_str++= *ptr++; - } - else - { - /* - Special case for collations with contractions. - For example, in Chezh, 'ch' is a separate letter - which is sorted between 'h' and 'i'. - If the pattern 'abc%', 'c' at the end can mean: - - letter 'c' itself, - - beginning of the contraction 'ch'. - - If we simply return this LIKE range: - - 'abc\min\min\min' and 'abc\max\max\max' - - then this query: SELECT * FROM t1 WHERE a LIKE 'abc%' - will only find values starting from 'abc[^h]', - but won't find values starting from 'abch'. - - We must ignore contraction heads followed by w_one or w_many. - ('Contraction head' means any letter which can be the first - letter in a contraction) - - For example, for Czech 'abc%', we will return LIKE range, - which is equal to LIKE range for 'ab%': - - 'ab\min\min\min\min' and 'ab\max\max\max\max'. - - */ - if (contractions && ptr + 1 < end && - my_uca_can_be_contraction_head(contractions, (uchar) *ptr)) - { - /* Ptr[0] is a contraction head. */ - - if (ptr[1] == w_one || ptr[1] == w_many) - { - /* Contraction head followed by a wildcard, quit. */ - goto fill_max_and_min; - } - - /* - Some letters can be both contraction heads and contraction tails. - For example, in Danish 'aa' is a separate single letter which - is sorted after 'z'. So 'a' can be both head and tail. - - If ptr[0]+ptr[1] is a contraction, - then put both letters together. - - If ptr[1] can be a contraction part, but ptr[0]+ptr[1] - is not a contraction, then we put only ptr[0], - and continue with ptr[1] on the next loop. - */ - if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) && - my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1])) - { - /* Contraction found */ - if (maxcharlen == 1 || min_str + 1 >= min_end) - { - /* Both contraction parts don't fit, quit */ - goto fill_max_and_min; - } - - /* Put contraction head */ - *min_str++= *max_str++= *ptr++; - maxcharlen--; - } - } - /* Put contraction tail, or a single character */ - *min_str++= *max_str++= *ptr++; - } - } - - *min_length= *max_length = (size_t) (min_str - min_org); - while (min_str != min_end) - *min_str++= *max_str++= ' '; /* Because if key compression */ - return 0; -} - - -/** - Calculate min_str and max_str that ranges a LIKE string. - Generic function, currently used for ucs2, utf16, utf32, - but should be suitable for any other character sets with - cs->min_sort_char and cs->max_sort_char represented in - Unicode code points. - - @param cs Character set and collation pointer - @param ptr Pointer to LIKE pattern. - @param ptr_length Length of LIKE pattern. - @param escape Escape character pattern, typically '\'. - @param w_one 'One character' pattern, typically '_'. - @param w_many 'Many characters' pattern, typically '%'. - @param res_length Length of min_str and max_str. - - @param[out] min_str Smallest string that ranges LIKE. - @param[out] max_str Largest string that ranges LIKE. - @param[out] min_len Length of min_str - @param[out] max_len Length of max_str - - @return Optimization status. - @retval FALSE if LIKE pattern can be optimized - @rerval TRUE if LIKE can't be optimized. -*/ -my_bool -my_like_range_generic(const CHARSET_INFO *cs, - const char *ptr, size_t ptr_length, - pbool escape, pbool w_one, pbool w_many, - size_t res_length, - char *min_str,char *max_str, - size_t *min_length,size_t *max_length) -{ - const char *end= ptr + ptr_length; - const char *min_org= min_str; - const char *max_org= max_str; - char *min_end= min_str + res_length; - char *max_end= max_str + res_length; - size_t charlen= res_length / cs->mbmaxlen; - size_t res_length_diff; - const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0); - - for ( ; charlen > 0; charlen--) - { - my_wc_t wc, wc2; - int res; - if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0) - { - if (res == MY_CS_ILSEQ) /* Bad sequence */ - return TRUE; /* min_length and max_length are not important */ - break; /* End of the string */ - } - ptr+= res; - - if (wc == (my_wc_t) escape) - { - if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0) - { - if (res == MY_CS_ILSEQ) - return TRUE; /* min_length and max_length are not important */ - /* - End of the string: Escape is the last character. - Put escape as a normal character. - We'll will leave the loop on the next iteration. - */ - } - else - ptr+= res; - - /* Put escape character to min_str and max_str */ - if ((res= cs->cset->wc_mb(cs, wc, - (uchar*) min_str, (uchar*) min_end)) <= 0) - goto pad_set_lengths; /* No space */ - min_str+= res; - - if ((res= cs->cset->wc_mb(cs, wc, - (uchar*) max_str, (uchar*) max_end)) <= 0) - goto pad_set_lengths; /* No space */ - max_str+= res; - continue; - } - else if (wc == (my_wc_t) w_one) - { - if ((res= cs->cset->wc_mb(cs, cs->min_sort_char, - (uchar*) min_str, (uchar*) min_end)) <= 0) - goto pad_set_lengths; - min_str+= res; - - if ((res= cs->cset->wc_mb(cs, cs->max_sort_char, - (uchar*) max_str, (uchar*) max_end)) <= 0) - goto pad_set_lengths; - max_str+= res; - continue; - } - else if (wc == (my_wc_t) w_many) - { - /* - Calculate length of keys: - a\min\min... is the smallest possible string - a\max\max... is the biggest possible string - */ - *min_length= ((cs->state & MY_CS_BINSORT) ? - (size_t) (min_str - min_org) : - res_length); - *max_length= res_length; - goto pad_min_max; - } - - if (contractions && - my_uca_can_be_contraction_head(contractions, wc) && - (res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0) - { - uint16 *weight; - if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many)) - { - /* Contraction head followed by a wildcard */ - *min_length= *max_length= res_length; - goto pad_min_max; - } - - if (my_uca_can_be_contraction_tail(contractions, wc2) && - (weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0]) - { - /* Contraction found */ - if (charlen == 1) - { - /* contraction does not fit to result */ - *min_length= *max_length= res_length; - goto pad_min_max; - } - - ptr+= res; - charlen--; - - /* Put contraction head */ - if ((res= cs->cset->wc_mb(cs, wc, - (uchar*) min_str, (uchar*) min_end)) <= 0) - goto pad_set_lengths; - min_str+= res; - - if ((res= cs->cset->wc_mb(cs, wc, - (uchar*) max_str, (uchar*) max_end)) <= 0) - goto pad_set_lengths; - max_str+= res; - wc= wc2; /* Prepare to put contraction tail */ - } - } - - /* Normal character, or contraction tail */ - if ((res= cs->cset->wc_mb(cs, wc, - (uchar*) min_str, (uchar*) min_end)) <= 0) - goto pad_set_lengths; - min_str+= res; - if ((res= cs->cset->wc_mb(cs, wc, - (uchar*) max_str, (uchar*) max_end)) <= 0) - goto pad_set_lengths; - max_str+= res; - } - -pad_set_lengths: - *min_length= (size_t) (min_str - min_org); - *max_length= (size_t) (max_str - max_org); - -pad_min_max: - /* - Fill up max_str and min_str to res_length. - fill() cannot set incomplete characters and - requires that "length" argument is divisible to mbminlen. - Make sure to call fill() with proper "length" argument. - */ - res_length_diff= res_length % cs->mbminlen; - cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff, - cs->min_sort_char); - cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff, - cs->max_sort_char); - - /* In case of incomplete characters set the remainder to 0x00's */ - if (res_length_diff) - { - /* Example: odd res_length for ucs2 */ - memset(min_end - res_length_diff, 0, res_length_diff); - memset(max_end - res_length_diff, 0, res_length_diff); - } - return FALSE; -} - - -static -int -my_wildcmp_mb_bin_impl(const CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, int recurse_level) -{ - int result= -1; /* Not found, using wildcards */ - - if (my_string_stack_guard && my_string_stack_guard(recurse_level)) - return 1; - while (wildstr != wildend) - { - while (*wildstr != w_many && *wildstr != w_one) - { - int l; - if (*wildstr == escape && wildstr+1 != wildend) - wildstr++; - if ((l = my_ismbchar(cs, wildstr, wildend))) - { - if (str+l > str_end || memcmp(str, wildstr, l) != 0) - return 1; - str += l; - wildstr += l; - } - else - if (str == str_end || *wildstr++ != *str++) - return(1); /* No match */ - if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ - result=1; /* Found an anchor char */ - } - if (*wildstr == w_one) - { - do - { - if (str == str_end) /* Skip one char if possible */ - return (result); - INC_PTR(cs,str,str_end); - } while (++wildstr < wildend && *wildstr == w_one); - if (wildstr == wildend) - break; - } - if (*wildstr == w_many) - { /* Found w_many */ - int cmp; - const char* mb = wildstr; - int mb_len=0; - - wildstr++; - /* Remove any '%' and '_' from the wild search string */ - for (; wildstr != wildend ; wildstr++) - { - if (*wildstr == w_many) - continue; - if (*wildstr == w_one) - { - if (str == str_end) - return (-1); - INC_PTR(cs,str,str_end); - continue; - } - break; /* Not a wild character */ - } - if (wildstr == wildend) - return(0); /* Ok if w_many is last */ - if (str == str_end) - return -1; - - if ((cmp= *wildstr) == escape && wildstr+1 != wildend) - cmp= *++wildstr; - - mb=wildstr; - mb_len= my_ismbchar(cs, wildstr, wildend); - INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */ - do - { - for (;;) - { - if (str >= str_end) - return -1; - if (mb_len) - { - if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0) - { - str += mb_len; - break; - } - } - else if (!my_ismbchar(cs, str, str_end) && *str == cmp) - { - str++; - break; - } - INC_PTR(cs,str, str_end); - } - { - int tmp=my_wildcmp_mb_bin_impl(cs,str,str_end, - wildstr,wildend,escape, - w_one,w_many, recurse_level + 1); - if (tmp <= 0) - return (tmp); - } - } while (str != str_end && wildstr[0] != w_many); - return(-1); - } - } - return (str != str_end ? 1 : 0); -} - -int -my_wildcmp_mb_bin(const CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - return my_wildcmp_mb_bin_impl(cs, str, str_end, - wildstr, wildend, - escape, w_one, w_many, 1); -} - - -/* - Data was produced from EastAsianWidth.txt - using utt11-dump utility. -*/ -static const char pg11[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pg23[256]= -{ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pg2E[256]= -{ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pg2F[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 -}; - -static const char pg30[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0, -0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 -}; - -static const char pg31[256]= -{ -0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 -}; - -static const char pg32[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 -}; - -static const char pg4D[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pg9F[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pgA4[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pgD7[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pgFA[256]= -{ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pgFE[256]= -{ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const char pgFF[256]= -{ -0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; - -static const struct {int page; const char *p;} utr11_data[256]= -{ -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F}, -{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, -{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7}, -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, -{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF} -}; - - -size_t my_numcells_mb(const CHARSET_INFO *cs, const char *b, const char *e) -{ - my_wc_t wc; - size_t clen= 0; - - while (b < e) - { - int mb_len; - uint pg; - if ((mb_len= cs->cset->mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0 || - wc > 0xFFFF) - { - /* - Let's think a wrong sequence takes 1 dysplay cell. - Also, consider supplementary characters as taking one cell. - */ - mb_len= 1; - b++; - continue; - } - b+= mb_len; - if (wc > 0xFFFF) - { - if (wc >= 0x20000 && wc <= 0x3FFFD) /* CJK Ideograph Extension B, C */ - clen+= 1; - } - else - { - pg= (wc >> 8) & 0xFF; - clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page; - } - clen++; - } - return clen; -} - - -int my_mb_ctype_mb(const CHARSET_INFO *cs, int *ctype, - const uchar *s, const uchar *e) -{ - my_wc_t wc; - int res= cs->cset->mb_wc(cs, &wc, s, e); - if (res <= 0 || wc > 0xFFFF) - *ctype= 0; - else - *ctype= my_uni_ctype[wc>>8].ctype ? - my_uni_ctype[wc>>8].ctype[wc&0xFF] : - my_uni_ctype[wc>>8].pctype; - return res; -} - - -MY_COLLATION_HANDLER my_collation_mb_bin_handler = -{ - NULL, /* init */ - my_strnncoll_mb_bin, - my_strnncollsp_mb_bin, - my_strnxfrm_mb, - my_strnxfrmlen_simple, - my_like_range_mb, - my_wildcmp_mb_bin, - my_strcasecmp_mb_bin, - my_instr_mb, - my_hash_sort_mb_bin, - my_propagate_simple -}; -- cgit v1.1