mirror of https://github.com/postgres/postgres
Fix assorted bugs in contrib/unaccent's configuration file parsing.
Make it use t_isspace() to identify whitespace, rather than relying on sscanf which is known to get it wrong on some platform/locale combinations. Get rid of fixed-size buffers. Make it actually continue to parse the file after ignoring a line with untranslatable characters, as was obviously intended. The first of these issues is per gripe from J Smith, though not exactly either of his proposed patches.
This commit is contained in:
parent
ffc703a891
commit
ced3a93ccb
|
@ -91,35 +91,83 @@ initSuffixTree(char *filename)
|
|||
|
||||
do
|
||||
{
|
||||
char src[4096];
|
||||
char trg[4096];
|
||||
int srclen;
|
||||
int trglen;
|
||||
char *line = NULL;
|
||||
|
||||
/*
|
||||
* pg_do_encoding_conversion() (called by tsearch_readline()) will
|
||||
* emit exception if it finds untranslatable characters in current
|
||||
* locale. We just skip such lines, continuing with the next.
|
||||
*/
|
||||
skip = true;
|
||||
|
||||
PG_TRY();
|
||||
{
|
||||
/*
|
||||
* pg_do_encoding_conversion() (called by tsearch_readline()) will
|
||||
* emit exception if it finds untranslatable characters in current
|
||||
* locale. We just skip such characters.
|
||||
*/
|
||||
char *line;
|
||||
|
||||
while ((line = tsearch_readline(&trst)) != NULL)
|
||||
{
|
||||
if (sscanf(line, "%s\t%s\n", src, trg) != 2)
|
||||
/*
|
||||
* The format of each line must be "src trg" where src and trg
|
||||
* are sequences of one or more non-whitespace characters,
|
||||
* separated by whitespace. Whitespace at start or end of
|
||||
* line is ignored.
|
||||
*/
|
||||
int state;
|
||||
char *ptr;
|
||||
char *src = NULL;
|
||||
char *trg = NULL;
|
||||
int ptrlen;
|
||||
int srclen = 0;
|
||||
int trglen = 0;
|
||||
|
||||
state = 0;
|
||||
for (ptr = line; *ptr; ptr += ptrlen)
|
||||
{
|
||||
ptrlen = pg_mblen(ptr);
|
||||
/* ignore whitespace, but end src or trg */
|
||||
if (t_isspace(ptr))
|
||||
{
|
||||
if (state == 1)
|
||||
state = 2;
|
||||
else if (state == 3)
|
||||
state = 4;
|
||||
continue;
|
||||
}
|
||||
switch (state)
|
||||
{
|
||||
case 0:
|
||||
/* start of src */
|
||||
src = ptr;
|
||||
srclen = ptrlen;
|
||||
state = 1;
|
||||
break;
|
||||
case 1:
|
||||
/* continue src */
|
||||
srclen += ptrlen;
|
||||
break;
|
||||
case 2:
|
||||
/* start of trg */
|
||||
trg = ptr;
|
||||
trglen = ptrlen;
|
||||
state = 3;
|
||||
break;
|
||||
case 3:
|
||||
/* continue trg */
|
||||
trglen += ptrlen;
|
||||
break;
|
||||
default:
|
||||
/* bogus line format */
|
||||
state = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
srclen = strlen(src);
|
||||
trglen = strlen(trg);
|
||||
|
||||
if (state >= 3)
|
||||
rootSuffixTree = placeChar(rootSuffixTree,
|
||||
(unsigned char *) src, srclen,
|
||||
trg, trglen);
|
||||
skip = false;
|
||||
|
||||
pfree(line);
|
||||
}
|
||||
skip = false;
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue