From 4f51429dd7f194e36af32b557ecdce555b5ab51b Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sat, 8 Apr 2023 21:54:45 +1200 Subject: [PATCH] Update tsearch regex memory management. Now that our regex engine uses palloc(), it's not necessary to set up a special memory context callback to free compiled regexes. The regex has no resources other than the memory that is already going to be freed in bulk. Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com --- src/backend/tsearch/spell.c | 34 +++++++------------------------ src/include/tsearch/dicts/spell.h | 18 ++++++---------- 2 files changed, 13 insertions(+), 39 deletions(-) diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index 8d48cad251..fe4fd3a929 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -655,17 +655,6 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) return 0; } -/* - * Context reset/delete callback for a regular expression used in an affix - */ -static void -regex_affix_deletion_callback(void *arg) -{ - aff_regex_struct *pregex = (aff_regex_struct *) arg; - - pg_regfree(&(pregex->regex)); -} - /* * Adds a new affix rule to the Affix field. * @@ -728,7 +717,6 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, int err; pg_wchar *wmask; char *tmask; - aff_regex_struct *pregex; Affix->issimple = 0; Affix->isregis = 0; @@ -743,31 +731,23 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); /* - * The regex engine stores its stuff using malloc not palloc, so we - * must arrange to explicitly clean up the regex when the dictionary's - * context is cleared. That means the regex_t has to stay in a fixed - * location within the context; we can't keep it directly in the AFFIX - * struct, since we may sort and resize the array of AFFIXes. + * The regex and all internal state created by pg_regcomp are + * allocated in the dictionary's memory context, and will be freed + * automatically when it is destroyed. */ - Affix->reg.pregex = pregex = palloc(sizeof(aff_regex_struct)); - - err = pg_regcomp(&(pregex->regex), wmask, wmasklen, + Affix->reg.pregex = palloc(sizeof(regex_t)); + err = pg_regcomp(Affix->reg.pregex, wmask, wmasklen, REG_ADVANCED | REG_NOSUB, DEFAULT_COLLATION_OID); if (err) { char errstr[100]; - pg_regerror(err, &(pregex->regex), errstr, sizeof(errstr)); + pg_regerror(err, Affix->reg.pregex, errstr, sizeof(errstr)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("invalid regular expression: %s", errstr))); } - - pregex->mcallback.func = regex_affix_deletion_callback; - pregex->mcallback.arg = (void *) pregex; - MemoryContextRegisterResetCallback(CurrentMemoryContext, - &pregex->mcallback); } Affix->flagflags = flagflags; @@ -2161,7 +2141,7 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); data_len = pg_mb2wchar_with_len(newword, data, newword_len); - if (pg_regexec(&(Affix->reg.pregex->regex), data, data_len, + if (pg_regexec(Affix->reg.pregex, data, data_len, 0, NULL, 0, NULL, 0) == REG_OKAY) { pfree(data); diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h index 5c30af6ac6..0763f9ffe7 100644 --- a/src/include/tsearch/dicts/spell.h +++ b/src/include/tsearch/dicts/spell.h @@ -81,17 +81,6 @@ typedef struct spell_struct #define SPELLHDRSZ (offsetof(SPELL, word)) -/* - * If an affix uses a regex, we have to store that separately in a struct - * that won't move around when arrays of affixes are enlarged or sorted. - * This is so that it can be found to be cleaned up at context destruction. - */ -typedef struct aff_regex_struct -{ - regex_t regex; - MemoryContextCallback mcallback; -} aff_regex_struct; - /* * Represents an entry in an affix list. */ @@ -108,7 +97,12 @@ typedef struct aff_struct char *repl; union { - aff_regex_struct *pregex; + /* + * Arrays of AFFIX are moved and sorted. We'll use a pointer to + * regex_t to keep this struct small, and avoid assuming that regex_t + * is movable. + */ + regex_t *pregex; Regis regis; } reg; } AFFIX;