Tom Lane 7351b5fa17 Cleanup for some problems in tsearch patch:
- ispell initialization crashed on empty dictionary file
- ispell initialization crashed on affix file with prefixes but no suffixes
- stop words file was run through pg_verify_mbstr, with database
  encoding, but it's supposed to be UTF-8; similar bug for synonym files
- bunch of comments added, typos fixed, and other cleanup

Introduced consistent encoding checking/conversion of data read from tsearch
configuration files, by doing this in a single t_readline() subroutine
(replacing direct usages of fgets).  Cleaned up API for readstopwords too.

Heikki Linnakangas
2007-08-25 00:03:59 +00:00

167 lines
3.5 KiB
C

/*-------------------------------------------------------------------------
*
* ts_utils.c
* various support functions
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "miscadmin.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
/*
* Given the base name and extension of a tsearch config file, return
* its full path name. The base name is assumed to be user-supplied,
* and is checked to prevent pathname attacks. The extension is assumed
* to be safe.
*
* The result is a palloc'd string.
*/
char *
get_tsearch_config_filename(const char *basename,
const char *extension)
{
char sharepath[MAXPGPATH];
char *result;
const char *p;
/*
* We enforce that the basename is all alpha characters. This may be
* overly restrictive, but we don't want to allow access to anything
* outside the tsearch_data directory, so for instance '/' *must* be
* rejected. This is the same test used for timezonesets names.
*/
for (p = basename; *p; p++)
{
if (!isalpha((unsigned char) *p))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid text search configuration file name \"%s\"",
basename)));
}
get_share_path(my_exec_path, sharepath);
result = palloc(MAXPGPATH);
snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
sharepath, basename, extension);
return result;
}
static int
comparestr(const void *a, const void *b)
{
return strcmp(*(char **) a, *(char **) b);
}
/*
* Reads a stopword file. Each word is run through 'wordop'
* function, if given. wordop may either modify the input in-place,
* or palloc a new version.
*/
void
readstoplist(const char *fname, StopList *s, char *(*wordop) (char *))
{
char **stop = NULL;
s->len = 0;
if (fname && *fname)
{
char *filename = get_tsearch_config_filename(fname, "stop");
FILE *hin;
char *line;
int reallen = 0;
if ((hin = AllocateFile(filename, "r")) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open stopword file \"%s\": %m",
filename)));
while ((line = t_readline(hin)) != NULL)
{
char *pbuf = line;
/* Trim trailing space */
while (*pbuf && !t_isspace(pbuf))
pbuf++;
*pbuf = '\0';
/* Skip empty lines */
if (*line == '\0')
{
pfree(line);
continue;
}
if (s->len >= reallen)
{
if (reallen == 0)
{
reallen = 64;
stop = (char **) palloc(sizeof(char *) * reallen);
}
else
{
reallen *= 2;
stop = (char **) repalloc((void *) stop,
sizeof(char *) * reallen);
}
}
if (wordop)
{
stop[s->len] = wordop(line);
if (stop[s->len] != line)
pfree(line);
}
else
stop[s->len] = line;
(s->len)++;
}
FreeFile(hin);
pfree(filename);
}
s->stop = stop;
/* Sort to allow binary searching */
if (s->stop && s->len > 0)
qsort(s->stop, s->len, sizeof(char *), comparestr);
}
bool
searchstoplist(StopList * s, char *key)
{
return (s->stop && s->len > 0 &&
bsearch(&key, s->stop, s->len,
sizeof(char *), comparestr)) ? true : false;
}
char *
pnstrdup(const char *in, int len)
{
char *out = palloc(len + 1);
memcpy(out, in, len);
out[len] = '\0';
return out;
}