Add more strict check of stop and non-recognized words,

allow only recognized words in thezaurus configuration file.
This commit is contained in:
Teodor Sigaev 2006-06-02 15:35:42 +00:00
parent d77df813c9
commit c7faf45160

View File

@ -1,4 +1,4 @@
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.1 2006/05/31 14:05:31 teodor Exp $ */
/* $PostgreSQL: pgsql/contrib/tsearch2/dict_thesaurus.c,v 1.2 2006/06/02 15:35:42 teodor Exp $ */
/*
* thesaurus
@ -330,8 +330,12 @@ compileTheLexeme(DictThesaurus *d) {
);
if ( !(ptr && ptr->lexeme) ) {
if ( !ptr )
elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary", d->wrds[i].lexeme);
else
elog(ERROR,"Thesaurus: word '%s' is recognized as stop-word, assign any stop-word", d->wrds[i].lexeme);
newwrds = addCompiledLexeme( newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary or it's a stop-word, assign any non-recognized word", d->wrds[i].lexeme);
} else {
while( ptr->lexeme ) {
TSLexeme *remptr = ptr+1;
@ -420,7 +424,7 @@ compileTheSubstitute(DictThesaurus *d) {
);
reml = lexized;
if ( lexized ) {
if ( lexized && lexized->lexeme ) {
int toset = (lexized->lexeme && outptr != d->subst[i].res ) ? (outptr - d->subst[i].res) : -1;
while( lexized->lexeme ) {
@ -443,6 +447,8 @@ compileTheSubstitute(DictThesaurus *d) {
if ( toset > 0)
d->subst[i].res[toset].flags |= TSL_ADDPOS;
} else {
elog(NOTICE,"Thesaurus: word '%s' isn't recognized by subdictionary or it's a stop-word, ignored", inptr->lexeme);
}
if ( inptr->lexeme )
@ -450,6 +456,9 @@ compileTheSubstitute(DictThesaurus *d) {
inptr++;
}
if ( outptr == d->subst[i].res )
elog(ERROR,"Thesaurus: all words in subsitution aren't recognized by subdictionary");
d->subst[i].reslen = outptr - d->subst[i].res;
free(rem);