Adjust string comparison so that only bitwise-equal strings are considered
equal: if strcoll claims two strings are equal, check it with strcmp, and sort according to strcmp if not identical. This fixes inconsistent behavior under glibc's hu_HU locale, and probably under some other locales as well. Also, take advantage of the now-well-defined behavior to speed up texteq, textne, bpchareq, bpcharne: they may as well just do a bitwise comparison and not bother with strcoll at all. NOTE: affected databases may need to REINDEX indexes on text columns to be sure they are self-consistent.
This commit is contained in:
parent
7b53b45a64
commit
656beff590
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.45 2005/10/15 02:49:08 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.46 2005/12/22 22:50:00 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* These functions are stored in pg_amproc. For each operator class
|
* These functions are stored in pg_amproc. For each operator class
|
||||||
@ -138,9 +138,9 @@ hashtext(PG_FUNCTION_ARGS)
|
|||||||
Datum result;
|
Datum result;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: this is currently identical in behavior to hashvarlena, but it
|
* Note: this is currently identical in behavior to hashvarlena, but
|
||||||
* seems likely that we may need to do something different in non-C
|
* keep it as a separate function in case we someday want to do something
|
||||||
* locales. (See also hashbpchar, if so.)
|
* different in non-C locales. (See also hashbpchar, if so.)
|
||||||
*/
|
*/
|
||||||
result = hash_any((unsigned char *) VARDATA(key),
|
result = hash_any((unsigned char *) VARDATA(key),
|
||||||
VARSIZE(key) - VARHDRSZ);
|
VARSIZE(key) - VARHDRSZ);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.113 2005/10/15 02:49:30 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.114 2005/12/22 22:50:00 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -614,11 +614,14 @@ bpchareq(PG_FUNCTION_ARGS)
|
|||||||
len1 = bcTruelen(arg1);
|
len1 = bcTruelen(arg1);
|
||||||
len2 = bcTruelen(arg2);
|
len2 = bcTruelen(arg2);
|
||||||
|
|
||||||
/* fast path for different-length inputs */
|
/*
|
||||||
|
* Since we only care about equality or not-equality, we can avoid all
|
||||||
|
* the expense of strcoll() here, and just do bitwise comparison.
|
||||||
|
*/
|
||||||
if (len1 != len2)
|
if (len1 != len2)
|
||||||
result = false;
|
result = false;
|
||||||
else
|
else
|
||||||
result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) == 0);
|
result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
|
||||||
|
|
||||||
PG_FREE_IF_COPY(arg1, 0);
|
PG_FREE_IF_COPY(arg1, 0);
|
||||||
PG_FREE_IF_COPY(arg2, 1);
|
PG_FREE_IF_COPY(arg2, 1);
|
||||||
@ -638,11 +641,14 @@ bpcharne(PG_FUNCTION_ARGS)
|
|||||||
len1 = bcTruelen(arg1);
|
len1 = bcTruelen(arg1);
|
||||||
len2 = bcTruelen(arg2);
|
len2 = bcTruelen(arg2);
|
||||||
|
|
||||||
/* fast path for different-length inputs */
|
/*
|
||||||
|
* Since we only care about equality or not-equality, we can avoid all
|
||||||
|
* the expense of strcoll() here, and just do bitwise comparison.
|
||||||
|
*/
|
||||||
if (len1 != len2)
|
if (len1 != len2)
|
||||||
result = true;
|
result = true;
|
||||||
else
|
else
|
||||||
result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) != 0);
|
result = (strncmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
|
||||||
|
|
||||||
PG_FREE_IF_COPY(arg1, 0);
|
PG_FREE_IF_COPY(arg1, 0);
|
||||||
PG_FREE_IF_COPY(arg2, 1);
|
PG_FREE_IF_COPY(arg2, 1);
|
||||||
@ -789,7 +795,9 @@ bpchar_smaller(PG_FUNCTION_ARGS)
|
|||||||
* bpchar needs a specialized hash function because we want to ignore
|
* bpchar needs a specialized hash function because we want to ignore
|
||||||
* trailing blanks in comparisons.
|
* trailing blanks in comparisons.
|
||||||
*
|
*
|
||||||
* XXX is there any need for locale-specific behavior here?
|
* Note: currently there is no need for locale-specific behavior here,
|
||||||
|
* but if we ever change the semantics of bpchar comparison to trust
|
||||||
|
* strcoll() completely, we'd need to do something different in non-C locales.
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
hashbpchar(PG_FUNCTION_ARGS)
|
hashbpchar(PG_FUNCTION_ARGS)
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.141 2005/11/22 18:17:23 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.142 2005/12/22 22:50:00 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -938,6 +938,15 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
|
|||||||
|
|
||||||
result = strcoll(a1p, a2p);
|
result = strcoll(a1p, a2p);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In some locales strcoll() can claim that nonidentical strings are
|
||||||
|
* equal. Believing that would be bad news for a number of reasons,
|
||||||
|
* so we follow Perl's lead and sort "equal" strings according to
|
||||||
|
* strcmp().
|
||||||
|
*/
|
||||||
|
if (result == 0)
|
||||||
|
result = strcmp(a1p, a2p);
|
||||||
|
|
||||||
if (a1p != a1buf)
|
if (a1p != a1buf)
|
||||||
pfree(a1p);
|
pfree(a1p);
|
||||||
if (a2p != a2buf)
|
if (a2p != a2buf)
|
||||||
@ -984,11 +993,15 @@ texteq(PG_FUNCTION_ARGS)
|
|||||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||||
bool result;
|
bool result;
|
||||||
|
|
||||||
/* fast path for different-length inputs */
|
/*
|
||||||
|
* Since we only care about equality or not-equality, we can avoid all
|
||||||
|
* the expense of strcoll() here, and just do bitwise comparison.
|
||||||
|
*/
|
||||||
if (VARSIZE(arg1) != VARSIZE(arg2))
|
if (VARSIZE(arg1) != VARSIZE(arg2))
|
||||||
result = false;
|
result = false;
|
||||||
else
|
else
|
||||||
result = (text_cmp(arg1, arg2) == 0);
|
result = (strncmp(VARDATA(arg1), VARDATA(arg2),
|
||||||
|
VARSIZE(arg1) - VARHDRSZ) == 0);
|
||||||
|
|
||||||
PG_FREE_IF_COPY(arg1, 0);
|
PG_FREE_IF_COPY(arg1, 0);
|
||||||
PG_FREE_IF_COPY(arg2, 1);
|
PG_FREE_IF_COPY(arg2, 1);
|
||||||
@ -1003,11 +1016,15 @@ textne(PG_FUNCTION_ARGS)
|
|||||||
text *arg2 = PG_GETARG_TEXT_P(1);
|
text *arg2 = PG_GETARG_TEXT_P(1);
|
||||||
bool result;
|
bool result;
|
||||||
|
|
||||||
/* fast path for different-length inputs */
|
/*
|
||||||
|
* Since we only care about equality or not-equality, we can avoid all
|
||||||
|
* the expense of strcoll() here, and just do bitwise comparison.
|
||||||
|
*/
|
||||||
if (VARSIZE(arg1) != VARSIZE(arg2))
|
if (VARSIZE(arg1) != VARSIZE(arg2))
|
||||||
result = true;
|
result = true;
|
||||||
else
|
else
|
||||||
result = (text_cmp(arg1, arg2) != 0);
|
result = (strncmp(VARDATA(arg1), VARDATA(arg2),
|
||||||
|
VARSIZE(arg1) - VARHDRSZ) != 0);
|
||||||
|
|
||||||
PG_FREE_IF_COPY(arg1, 0);
|
PG_FREE_IF_COPY(arg1, 0);
|
||||||
PG_FREE_IF_COPY(arg2, 1);
|
PG_FREE_IF_COPY(arg2, 1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user