mysql:1790
From: <takeshi@xxxxxxxxxx>
Date: Tue, 21 Mar 2000 20:18:42 +0900
Subject: [mysql 01790] sjis & order by
MySQL を --with-charset=sjis で作成して、
char 型(binary 属性はつけない)に sjis データを入れた時、
order by が一部、正しく出なかったのを修正するパッチです。
検証してみてください。
3.22.32用のパッチです
現在の MySQL のコードでは、char 型に代入された SJIS 文字の
2byte 目が 0x41-0x5a (=[a-z]), 0x61-0x7a (=[A-Z]) の時に、
2byte 目が ケース非依存でソートされてしまいます。
これを正しく処理するパッチです
EUC は、2byte 目も 8bit 文字なので、この問題はおきません。
なお、本パッチは、2バイトコードの SJIS しか考慮していません。
半角かなの場合の処理は手つかずです。
ソースコードは、ほとんど GBK 用のソースのコピーです.
--
村上 毅 takeshi@xxxxxxxxxx
--- include/m_ctype.h.in.orig Tue Mar 21 16:43:54 2000
+++ include/m_ctype.h.in Tue Mar 21 15:03:51 2000
@@ -138,6 +138,8 @@
#define ismbhead(c) issjishead(c)
#define mbcharlen(c) (issjishead(c)? 2: 0)
#define MBMAXLEN 2
+#undef USE_STRCOLL
+#define USE_STRCOLL
#endif
/* Support for Chinese(BIG5) characters, by jou@xxxxxxxxxx
--- strings/ctype-sjis.c.orig Tue Mar 21 16:44:12 2000
+++ strings/ctype-sjis.c Tue Mar 21 17:55:23 2000
@@ -1,7 +1,15 @@
/* This file is for Shift JIS charset, and created by tommy@xxxxxxxxxx.
*/
+#include <stdio.h>
#include <global.h>
#include "m_string.h"
+#include "m_ctype.h"
+
+#define issjiscode(c,d) (issjishead(c) && issjistail(d))
+#define gbkcode(c,d) (((uchar)(c) <<8) | (uchar)(d))
+#define gbkhead(e) ((uchar)(e>>8))
+#define gbktail(e) ((uchar)(e&0xff))
+
uchar NEAR ctype_sjis[257] =
{
@@ -37,7 +45,7 @@
0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020,
0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020,
0020, 0020, 0020, 0020, 0020, 0020, 0020, 0020,
- 0020, 0020, 0020, 0020, 0020, 0000, 0000, 0000,
+ 0020, 0020, 0020, 0020, 0020, 0000, 0000, 0000
};
uchar NEAR to_lower_sjis[]=
@@ -73,7 +81,7 @@
'\340','\341','\342','\343','\344','\345','\346','\347',
'\350','\351','\352','\353','\354','\355','\356','\357',
'\360','\361','\362','\363','\364','\365','\366','\367',
- '\370','\371','\372','\373','\374','\375','\376','\377',
+ '\370','\371','\372','\373','\374','\375','\376','\377'
};
uchar NEAR to_upper_sjis[]=
@@ -109,7 +117,7 @@
'\340','\341','\342','\343','\344','\345','\346','\347',
'\350','\351','\352','\353','\354','\355','\356','\357',
'\360','\361','\362','\363','\364','\365','\366','\367',
- '\370','\371','\372','\373','\374','\375','\376','\377',
+ '\370','\371','\372','\373','\374','\375','\376','\377'
};
uchar NEAR sort_order_sjis[]=
@@ -145,5 +153,146 @@
'\340','\341','\342','\343','\344','\345','\346','\347',
'\350','\351','\352','\353','\354','\355','\356','\357',
'\360','\361','\362','\363','\364','\365','\366','\367',
- '\370','\371','\372','\373','\374','\375','\376','\377',
+ '\370','\371','\372','\373','\374','\375','\376','\377'
};
+
+/*
+uint16 gbksortorder(uint16 i)
+{
+ uint index=gbktail(i);
+ if (index>0x7f) index-=0x41;
+ else index-=0x40;
+ index+=(gbkhead(i)-0x81)*0xbe;
+ return 0x8100+gbk_order[index];
+}
+*/
+
+uint16 sjissortorder(uint16 i)
+{
+ return (i);
+}
+
+uint MY_STRXFRM_MULTIPLY=1;
+
+int my_strnncoll(const uchar * s1, int len1, const uchar * s2, int len2)
+{
+ uint len,c1,c2;
+
+ len = min(len1,len2);
+ while (len--)
+ {
+ if ((len > 0) && issjiscode(*s1,*(s1+1)) && issjiscode(*s2, *(s2+1)))
+ {
+ c1=gbkcode(*s1,*(s1+1));
+ c2=gbkcode(*s2,*(s2+1));
+ if (c1!=c2)
+ return ((int) sjissortorder(c1) - (int) sjissortorder(c2));
+ s1+=2;
+ s2+=2;
+ --len;
+ } else if (my_sort_order[(uchar) *s1++] != my_sort_order[(uchar) *s2++])
+ return ((int) my_sort_order[(uchar) s1[-1]] -
+ (int) my_sort_order[(uchar) s2[-1]]);
+ }
+ return (int) (len1-len2);
+}
+
+int my_strcoll(const uchar * s1, const uchar * s2)
+{
+ return my_strnncoll(s1,strlen(s1),s2,strlen(s2));
+}
+
+int my_strnxfrm(uchar * dest, uchar * src, int len, int srclen)
+{
+ uint16 e;
+
+ len = srclen;
+ while (len--)
+ {
+ if ((len > 0) && issjiscode(*src, *(src+1)))
+ {
+ e = sjissortorder(gbkcode(*src, *(src+1)));
+ *dest++ = gbkhead(e);
+ *dest++ = gbktail(e);
+ src+=2;
+ len--;
+ } else
+ *dest++ = my_sort_order[(uchar) *src++];
+ }
+ return srclen;
+}
+
+int my_strxfrm(uchar * dest, uchar * src, int len)
+{
+ return my_strnxfrm(dest,src,len,strlen(src));
+}
+
+/*
+** Calculate min_str and max_str that ranges a LIKE string.
+** Arguments:
+** ptr Pointer to LIKE string.
+** ptr_length Length of LIKE string.
+** escape Escape character in LIKE. (Normally '\').
+** All escape characters should be removed from min_str and max_str
+** res_length Length of min_str and max_str.
+** min_str Smallest case sensitive string that ranges LIKE.
+** Should be space padded to res_length.
+** max_str Largest case sensitive string that ranges LIKE.
+** Normally padded with the biggest character sort value.
+**
+** The function should return 0 if ok and 1 if the LIKE string can't be
+** optimized !
+*/
+
+#define max_sort_char 255
+#define wild_one '_'
+#define wild_many '%'
+
+extern my_bool my_like_range(const char *ptr,uint ptr_length,pchar escape,
+ uint res_length, char *min_str,char *max_str,
+ uint *min_length,uint *max_length)
+{
+ const char *end=ptr+ptr_length;
+ char *min_org=min_str;
+ char *min_end=min_str+res_length;
+
+ for (; ptr != end && min_str != min_end ; ptr++)
+ {
+ if (ptr+1 != end && issjiscode(ptr[0],ptr[1]))
+ {
+ *min_str++= *max_str++ = *ptr++;
+ *min_str++= *max_str++ = *ptr;
+ continue;
+ }
+ if (*ptr == escape && ptr+1 != end)
+ {
+ ptr++; /* Skipp escape */
+ *min_str++= *max_str++ = *ptr;
+ continue;
+ }
+ if (*ptr == wild_one) /* '_' in SQL */
+ {
+ *min_str++='\0'; /* This should be min char */
+ *max_str++=max_sort_char;
+ continue;
+ }
+ if (*ptr == wild_many) /* '%' in SQL */
+ {
+ *min_length= (uint) (min_str - min_org);
+ *max_length= res_length;
+ do {
+ *min_str++ = '\0'; /* Because if key compression */
+ *max_str++ = max_sort_char;
+ } while (min_str != min_end);
+ return 0;
+ }
+ *min_str++= *max_str++ = *ptr;
+ }
+ *min_length= *max_length = (uint) (min_str - min_org);
+ while (min_str != min_end)
+ {
+ *min_str++ = ' '; /* Because if key compression */
+ *max_str++ = ' ';
+ }
+ return 0;
+}
-> 1790 2000-03-21 20:18 [<takeshi@xxxxxxxxxx>] sjis & order by 1792 2000-03-22 01:52 ┗[とみたまさひろ <tomm] 1794 2000-03-22 04:23 ┗[<takeshi@xxxxxxxxxx>]