Fix contrib/pg_trgm's similarity() function for trigram-free strings.
Cases such as similarity('', '') produced a NaN result due to computing 0/0. Per discussion, make it return zero instead. This appears to be the basic cause of bug #7867 from Michele Baravalle, although it remains unclear why her installation doesn't think Cyrillic letters are letters. Back-patch to all active branches.
This commit is contained in:
parent
cd89965aab
commit
9728eda792
@ -53,6 +53,12 @@ select similarity('wow',' WOW ');
|
||||
1
|
||||
(1 row)
|
||||
|
||||
select similarity('---', '####---');
|
||||
similarity
|
||||
------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE test_trgm(t text);
|
||||
\copy test_trgm from 'data/trgm.data
|
||||
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
||||
|
@ -11,6 +11,8 @@ select show_trgm('a b C0*%^');
|
||||
select similarity('wow','WOWa ');
|
||||
select similarity('wow',' WOW ');
|
||||
|
||||
select similarity('---', '####---');
|
||||
|
||||
CREATE TABLE test_trgm(t text);
|
||||
|
||||
\copy test_trgm from 'data/trgm.data
|
||||
|
@ -553,6 +553,10 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
|
||||
len1 = ARRNELEM(trg1);
|
||||
len2 = ARRNELEM(trg2);
|
||||
|
||||
/* explicit test is needed to avoid 0/0 division when both lengths are 0 */
|
||||
if (len1 <= 0 || len2 <= 0)
|
||||
return (float4) 0.0;
|
||||
|
||||
while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
|
||||
{
|
||||
int res = CMPTRGM(ptr1, ptr2);
|
||||
@ -570,9 +574,9 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
|
||||
}
|
||||
|
||||
#ifdef DIVUNION
|
||||
return ((((float4) count) / ((float4) (len1 + len2 - count))));
|
||||
return ((float4) count) / ((float4) (len1 + len2 - count));
|
||||
#else
|
||||
return (((float) count) / ((float) ((len1 > len2) ? len1 : len2)));
|
||||
return ((float4) count) / ((float4) ((len1 > len2) ? len1 : len2));
|
||||
#endif
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user