mirror of https://github.com/postgres/postgres
Use a hash table to de-duplicate column names in ruleutils.c.
Commit 8004953b5
added a hash table to avoid O(N^2) cost in choosing
unique relation aliases while deparsing a view or rule. It did
nothing about the similar O(N^2) (maybe worse) costs of choosing
unique column aliases within each RTE. However, that's now
demonstrably a bottleneck when deparsing CHECK constraints for wide
tables, so let's use a similar hash table to handle those.
The extra cost of setting up the hash table will not be repaid unless
the table has many columns. I've set this up so that we use the brute
force method if there are less than 32 columns. The exact cutoff is
not too critical, but this value seems good because it results in both
code paths getting exercised by existing regression-test cases.
Patch by me; thanks to David Rowley for review.
Discussion: https://postgr.es/m/2885468.1722291250@sss.pgh.pa.us
This commit is contained in:
parent
bccca780ee
commit
52c707483c
|
@ -224,6 +224,10 @@ typedef struct
|
||||||
* of aliases to columns of the right input. Thus, positions in the printable
|
* of aliases to columns of the right input. Thus, positions in the printable
|
||||||
* column alias list are not necessarily one-for-one with varattnos of the
|
* column alias list are not necessarily one-for-one with varattnos of the
|
||||||
* JOIN, so we need a separate new_colnames[] array for printing purposes.
|
* JOIN, so we need a separate new_colnames[] array for printing purposes.
|
||||||
|
*
|
||||||
|
* Finally, when dealing with wide tables we risk O(N^2) costs in assigning
|
||||||
|
* non-duplicate column names. We ameliorate that by using a hash table that
|
||||||
|
* holds all the strings appearing in colnames, new_colnames, and parentUsing.
|
||||||
*/
|
*/
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
@ -291,6 +295,15 @@ typedef struct
|
||||||
int *leftattnos; /* left-child varattnos of join cols, or 0 */
|
int *leftattnos; /* left-child varattnos of join cols, or 0 */
|
||||||
int *rightattnos; /* right-child varattnos of join cols, or 0 */
|
int *rightattnos; /* right-child varattnos of join cols, or 0 */
|
||||||
List *usingNames; /* names assigned to merged columns */
|
List *usingNames; /* names assigned to merged columns */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hash table holding copies of all the strings appearing in this struct's
|
||||||
|
* colnames, new_colnames, and parentUsing. We use a hash table only for
|
||||||
|
* sufficiently wide relations, and only during the colname-assignment
|
||||||
|
* functions set_relation_column_names and set_join_column_names;
|
||||||
|
* otherwise, names_hash is NULL.
|
||||||
|
*/
|
||||||
|
HTAB *names_hash; /* entries are just strings */
|
||||||
} deparse_columns;
|
} deparse_columns;
|
||||||
|
|
||||||
/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
|
/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
|
||||||
|
@ -376,6 +389,9 @@ static bool colname_is_unique(const char *colname, deparse_namespace *dpns,
|
||||||
static char *make_colname_unique(char *colname, deparse_namespace *dpns,
|
static char *make_colname_unique(char *colname, deparse_namespace *dpns,
|
||||||
deparse_columns *colinfo);
|
deparse_columns *colinfo);
|
||||||
static void expand_colnames_array_to(deparse_columns *colinfo, int n);
|
static void expand_colnames_array_to(deparse_columns *colinfo, int n);
|
||||||
|
static void build_colinfo_names_hash(deparse_columns *colinfo);
|
||||||
|
static void add_to_names_hash(deparse_columns *colinfo, const char *name);
|
||||||
|
static void destroy_colinfo_names_hash(deparse_columns *colinfo);
|
||||||
static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
|
static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
|
||||||
deparse_columns *colinfo);
|
deparse_columns *colinfo);
|
||||||
static char *get_rtable_name(int rtindex, deparse_context *context);
|
static char *get_rtable_name(int rtindex, deparse_context *context);
|
||||||
|
@ -4133,6 +4149,10 @@ has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
|
||||||
*
|
*
|
||||||
* parentUsing is a list of all USING aliases assigned in parent joins of
|
* parentUsing is a list of all USING aliases assigned in parent joins of
|
||||||
* the current jointree node. (The passed-in list must not be modified.)
|
* the current jointree node. (The passed-in list must not be modified.)
|
||||||
|
*
|
||||||
|
* Note that we do not use per-deparse_columns hash tables in this function.
|
||||||
|
* The number of names that need to be assigned should be small enough that
|
||||||
|
* we don't need to trouble with that.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing)
|
set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing)
|
||||||
|
@ -4408,6 +4428,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *));
|
colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *));
|
||||||
colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool));
|
colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool));
|
||||||
|
|
||||||
|
/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
|
||||||
|
build_colinfo_names_hash(colinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scan the columns, select a unique alias for each one, and store it in
|
* Scan the columns, select a unique alias for each one, and store it in
|
||||||
* colinfo->colnames and colinfo->new_colnames. The former array has NULL
|
* colinfo->colnames and colinfo->new_colnames. The former array has NULL
|
||||||
|
@ -4443,6 +4466,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
colname = make_colname_unique(colname, dpns, colinfo);
|
colname = make_colname_unique(colname, dpns, colinfo);
|
||||||
|
|
||||||
colinfo->colnames[i] = colname;
|
colinfo->colnames[i] = colname;
|
||||||
|
add_to_names_hash(colinfo, colname);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Put names of non-dropped columns in new_colnames[] too */
|
/* Put names of non-dropped columns in new_colnames[] too */
|
||||||
|
@ -4456,6 +4480,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
changed_any = true;
|
changed_any = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We're now done needing the colinfo's names_hash */
|
||||||
|
destroy_colinfo_names_hash(colinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set correct length for new_colnames[] array. (Note: if columns have
|
* Set correct length for new_colnames[] array. (Note: if columns have
|
||||||
* been added, colinfo->num_cols includes them, which is not really quite
|
* been added, colinfo->num_cols includes them, which is not really quite
|
||||||
|
@ -4526,6 +4553,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
expand_colnames_array_to(colinfo, noldcolumns);
|
expand_colnames_array_to(colinfo, noldcolumns);
|
||||||
Assert(colinfo->num_cols == noldcolumns);
|
Assert(colinfo->num_cols == noldcolumns);
|
||||||
|
|
||||||
|
/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
|
||||||
|
build_colinfo_names_hash(colinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scan the join output columns, select an alias for each one, and store
|
* Scan the join output columns, select an alias for each one, and store
|
||||||
* it in colinfo->colnames. If there are USING columns, set_using_names()
|
* it in colinfo->colnames. If there are USING columns, set_using_names()
|
||||||
|
@ -4563,6 +4593,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
if (rte->alias == NULL)
|
if (rte->alias == NULL)
|
||||||
{
|
{
|
||||||
colinfo->colnames[i] = real_colname;
|
colinfo->colnames[i] = real_colname;
|
||||||
|
add_to_names_hash(colinfo, real_colname);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4579,6 +4610,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
colname = make_colname_unique(colname, dpns, colinfo);
|
colname = make_colname_unique(colname, dpns, colinfo);
|
||||||
|
|
||||||
colinfo->colnames[i] = colname;
|
colinfo->colnames[i] = colname;
|
||||||
|
add_to_names_hash(colinfo, colname);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remember if any assigned aliases differ from "real" name */
|
/* Remember if any assigned aliases differ from "real" name */
|
||||||
|
@ -4677,6 +4709,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
colinfo->new_colnames[j] = child_colname;
|
colinfo->new_colnames[j] = child_colname;
|
||||||
|
add_to_names_hash(colinfo, colinfo->new_colnames[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
|
colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
|
||||||
|
@ -4726,6 +4759,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
colinfo->new_colnames[j] = child_colname;
|
colinfo->new_colnames[j] = child_colname;
|
||||||
|
add_to_names_hash(colinfo, colinfo->new_colnames[j]);
|
||||||
}
|
}
|
||||||
|
|
||||||
colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
|
colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
|
||||||
|
@ -4740,6 +4774,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
|
||||||
Assert(j == nnewcolumns);
|
Assert(j == nnewcolumns);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* We're now done needing the colinfo's names_hash */
|
||||||
|
destroy_colinfo_names_hash(colinfo);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For a named join, print column aliases if we changed any from the child
|
* For a named join, print column aliases if we changed any from the child
|
||||||
* names. Unnamed joins cannot print aliases.
|
* names. Unnamed joins cannot print aliases.
|
||||||
|
@ -4762,28 +4799,58 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
|
||||||
int i;
|
int i;
|
||||||
ListCell *lc;
|
ListCell *lc;
|
||||||
|
|
||||||
/* Check against already-assigned column aliases within RTE */
|
/*
|
||||||
for (i = 0; i < colinfo->num_cols; i++)
|
* If we have a hash table, consult that instead of linearly scanning the
|
||||||
|
* colinfo's strings.
|
||||||
|
*/
|
||||||
|
if (colinfo->names_hash)
|
||||||
{
|
{
|
||||||
char *oldname = colinfo->colnames[i];
|
if (hash_search(colinfo->names_hash,
|
||||||
|
colname,
|
||||||
if (oldname && strcmp(oldname, colname) == 0)
|
HASH_FIND,
|
||||||
|
NULL) != NULL)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Check against already-assigned column aliases within RTE */
|
||||||
|
for (i = 0; i < colinfo->num_cols; i++)
|
||||||
|
{
|
||||||
|
char *oldname = colinfo->colnames[i];
|
||||||
|
|
||||||
|
if (oldname && strcmp(oldname, colname) == 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're building a new_colnames array, check that too (this will
|
||||||
|
* be partially but not completely redundant with the previous checks)
|
||||||
|
*/
|
||||||
|
for (i = 0; i < colinfo->num_new_cols; i++)
|
||||||
|
{
|
||||||
|
char *oldname = colinfo->new_colnames[i];
|
||||||
|
|
||||||
|
if (oldname && strcmp(oldname, colname) == 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Also check against names already assigned for parent-join USING
|
||||||
|
* cols
|
||||||
|
*/
|
||||||
|
foreach(lc, colinfo->parentUsing)
|
||||||
|
{
|
||||||
|
char *oldname = (char *) lfirst(lc);
|
||||||
|
|
||||||
|
if (strcmp(oldname, colname) == 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're building a new_colnames array, check that too (this will be
|
* Also check against USING-column names that must be globally unique.
|
||||||
* partially but not completely redundant with the previous checks)
|
* These are not hashed, but there should be few of them.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < colinfo->num_new_cols; i++)
|
|
||||||
{
|
|
||||||
char *oldname = colinfo->new_colnames[i];
|
|
||||||
|
|
||||||
if (oldname && strcmp(oldname, colname) == 0)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Also check against USING-column names that must be globally unique */
|
|
||||||
foreach(lc, dpns->using_names)
|
foreach(lc, dpns->using_names)
|
||||||
{
|
{
|
||||||
char *oldname = (char *) lfirst(lc);
|
char *oldname = (char *) lfirst(lc);
|
||||||
|
@ -4792,15 +4859,6 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Also check against names already assigned for parent-join USING cols */
|
|
||||||
foreach(lc, colinfo->parentUsing)
|
|
||||||
{
|
|
||||||
char *oldname = (char *) lfirst(lc);
|
|
||||||
|
|
||||||
if (strcmp(oldname, colname) == 0)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4861,6 +4919,90 @@ expand_colnames_array_to(deparse_columns *colinfo, int n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* build_colinfo_names_hash: optionally construct a hash table for colinfo
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
build_colinfo_names_hash(deparse_columns *colinfo)
|
||||||
|
{
|
||||||
|
HASHCTL hash_ctl;
|
||||||
|
int i;
|
||||||
|
ListCell *lc;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use a hash table only for RTEs with at least 32 columns. (The cutoff
|
||||||
|
* is somewhat arbitrary, but let's choose it so that this code does get
|
||||||
|
* exercised in the regression tests.)
|
||||||
|
*/
|
||||||
|
if (colinfo->num_cols < 32)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set up the hash table. The entries are just strings with no other
|
||||||
|
* payload.
|
||||||
|
*/
|
||||||
|
hash_ctl.keysize = NAMEDATALEN;
|
||||||
|
hash_ctl.entrysize = NAMEDATALEN;
|
||||||
|
hash_ctl.hcxt = CurrentMemoryContext;
|
||||||
|
colinfo->names_hash = hash_create("deparse_columns names",
|
||||||
|
colinfo->num_cols + colinfo->num_new_cols,
|
||||||
|
&hash_ctl,
|
||||||
|
HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Preload the hash table with any names already present (these would have
|
||||||
|
* come from set_using_names).
|
||||||
|
*/
|
||||||
|
for (i = 0; i < colinfo->num_cols; i++)
|
||||||
|
{
|
||||||
|
char *oldname = colinfo->colnames[i];
|
||||||
|
|
||||||
|
if (oldname)
|
||||||
|
add_to_names_hash(colinfo, oldname);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < colinfo->num_new_cols; i++)
|
||||||
|
{
|
||||||
|
char *oldname = colinfo->new_colnames[i];
|
||||||
|
|
||||||
|
if (oldname)
|
||||||
|
add_to_names_hash(colinfo, oldname);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach(lc, colinfo->parentUsing)
|
||||||
|
{
|
||||||
|
char *oldname = (char *) lfirst(lc);
|
||||||
|
|
||||||
|
add_to_names_hash(colinfo, oldname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* add_to_names_hash: add a string to the names_hash, if we're using one
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
add_to_names_hash(deparse_columns *colinfo, const char *name)
|
||||||
|
{
|
||||||
|
if (colinfo->names_hash)
|
||||||
|
(void) hash_search(colinfo->names_hash,
|
||||||
|
name,
|
||||||
|
HASH_ENTER,
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* destroy_colinfo_names_hash: destroy hash table when done with it
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
destroy_colinfo_names_hash(deparse_columns *colinfo)
|
||||||
|
{
|
||||||
|
if (colinfo->names_hash)
|
||||||
|
{
|
||||||
|
hash_destroy(colinfo->names_hash);
|
||||||
|
colinfo->names_hash = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* identify_join_columns: figure out where columns of a join come from
|
* identify_join_columns: figure out where columns of a join come from
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue