Avoid unnecessary copying of source string when generating a cloned TParser.
For long source strings the copying results in O(N^2) behavior, and the multiplier can be significant if wide-char conversion is involved. Andres Freund, reviewed by Kevin Grittner.
This commit is contained in:
parent
a5495cd841
commit
21d11e7ee2
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.25 2009/11/15 13:57:01 petere Exp $
|
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.26 2009/12/15 20:37:17 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -328,6 +328,46 @@ TParserInit(char *str, int len)
|
|||||||
return prs;
|
return prs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As an alternative to a full TParserInit one can create a
|
||||||
|
* TParserCopy which basically is a regular TParser without a private
|
||||||
|
* copy of the string - instead it uses the one from another TParser.
|
||||||
|
* This is useful because at some places TParsers are created
|
||||||
|
* recursively and the repeated copying around of the strings can
|
||||||
|
* cause major inefficiency if the source string is long.
|
||||||
|
* The new parser starts parsing at the original's current position.
|
||||||
|
*
|
||||||
|
* Obviously one must not close the original TParser before the copy.
|
||||||
|
*/
|
||||||
|
static TParser *
|
||||||
|
TParserCopyInit(const TParser *orig)
|
||||||
|
{
|
||||||
|
TParser *prs = (TParser *) palloc0(sizeof(TParser));
|
||||||
|
|
||||||
|
prs->charmaxlen = orig->charmaxlen;
|
||||||
|
prs->str = orig->str + orig->state->posbyte;
|
||||||
|
prs->lenstr = orig->lenstr - orig->state->posbyte;
|
||||||
|
|
||||||
|
#ifdef USE_WIDE_UPPER_LOWER
|
||||||
|
prs->usewide = orig->usewide;
|
||||||
|
|
||||||
|
if (orig->pgwstr)
|
||||||
|
prs->pgwstr = orig->pgwstr + orig->state->poschar;
|
||||||
|
if (orig->wstr)
|
||||||
|
prs->wstr = orig->wstr + orig->state->poschar;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
prs->state = newTParserPosition(NULL);
|
||||||
|
prs->state->state = TPS_Base;
|
||||||
|
|
||||||
|
#ifdef WPARSER_TRACE
|
||||||
|
fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return prs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
TParserClose(TParser *prs)
|
TParserClose(TParser *prs)
|
||||||
{
|
{
|
||||||
@ -346,9 +386,33 @@ TParserClose(TParser *prs)
|
|||||||
pfree(prs->pgwstr);
|
pfree(prs->pgwstr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef WPARSER_TRACE
|
||||||
|
fprintf(stderr, "closing parser");
|
||||||
|
#endif
|
||||||
pfree(prs);
|
pfree(prs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Close a parser created with TParserCopyInit
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
TParserCopyClose(TParser *prs)
|
||||||
|
{
|
||||||
|
while (prs->state)
|
||||||
|
{
|
||||||
|
TParserPosition *ptr = prs->state->prev;
|
||||||
|
|
||||||
|
pfree(prs->state);
|
||||||
|
prs->state = ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef WPARSER_TRACE
|
||||||
|
fprintf(stderr, "closing parser copy");
|
||||||
|
#endif
|
||||||
|
pfree(prs);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Character-type support functions, equivalent to is* macros, but
|
* Character-type support functions, equivalent to is* macros, but
|
||||||
* working with any possible encodings and locales. Notes:
|
* working with any possible encodings and locales. Notes:
|
||||||
@ -617,7 +681,7 @@ p_isignore(TParser *prs)
|
|||||||
static int
|
static int
|
||||||
p_ishost(TParser *prs)
|
p_ishost(TParser *prs)
|
||||||
{
|
{
|
||||||
TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
|
TParser *tmpprs = TParserCopyInit(prs);
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
tmpprs->wanthost = true;
|
tmpprs->wanthost = true;
|
||||||
@ -631,7 +695,7 @@ p_ishost(TParser *prs)
|
|||||||
prs->state->charlen = tmpprs->state->charlen;
|
prs->state->charlen = tmpprs->state->charlen;
|
||||||
res = 1;
|
res = 1;
|
||||||
}
|
}
|
||||||
TParserClose(tmpprs);
|
TParserCopyClose(tmpprs);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -639,7 +703,7 @@ p_ishost(TParser *prs)
|
|||||||
static int
|
static int
|
||||||
p_isURLPath(TParser *prs)
|
p_isURLPath(TParser *prs)
|
||||||
{
|
{
|
||||||
TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte);
|
TParser *tmpprs = TParserCopyInit(prs);
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
|
||||||
tmpprs->state = newTParserPosition(tmpprs->state);
|
tmpprs->state = newTParserPosition(tmpprs->state);
|
||||||
@ -654,7 +718,7 @@ p_isURLPath(TParser *prs)
|
|||||||
prs->state->charlen = tmpprs->state->charlen;
|
prs->state->charlen = tmpprs->state->charlen;
|
||||||
res = 1;
|
res = 1;
|
||||||
}
|
}
|
||||||
TParserClose(tmpprs);
|
TParserCopyClose(tmpprs);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user