diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 86d704e8d0..37e895479e 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.75 2001/06/25 21:11:43 tgl Exp $ * * NOTES * some of the executor utility code such as "ExecTypeFromTL" should be @@ -20,6 +20,7 @@ #include "postgres.h" #include "catalog/pg_type.h" +#include "nodes/parsenodes.h" #include "parser/parse_type.h" #include "utils/builtins.h" #include "utils/syscache.h" diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c index 39668f867e..2f28130f8b 100644 --- a/src/backend/commands/comment.c +++ b/src/backend/commands/comment.c @@ -7,14 +7,13 @@ * Copyright (c) 1999-2001, PostgreSQL Global Development Group * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/comment.c,v 1.30 2001/06/13 21:44:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/comment.c,v 1.31 2001/06/25 21:11:43 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "utils/builtins.h" #include "access/heapam.h" #include "catalog/catname.h" #include "catalog/indexing.h" @@ -26,11 +25,12 @@ #include "catalog/pg_class.h" #include "commands/comment.h" #include "miscadmin.h" -#include "parser/parse.h" #include "parser/parse_expr.h" #include "parser/parse_func.h" +#include "parser/parse.h" #include "rewrite/rewriteRemove.h" #include "utils/acl.h" +#include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/syscache.h" @@ -717,7 +717,7 @@ CommentOperator(char *opername, List *arguments, char *comment) /*** Get the procedure associated with the operator ***/ data = (Form_pg_operator) GETSTRUCT(optuple); - oid = RegprocToOid(data->oprcode); + oid = data->oprcode; if (oid == InvalidOid) elog(ERROR, "operator '%s' does not have an underlying function", opername); diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index cafc01fc33..36d8f229f1 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.45 2001/06/05 05:26:04 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.46 2001/06/25 21:11:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,6 +24,7 @@ #include "parser/parsetree.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" /* note that pg_type.h hardwires size of bool as 1 ... duplicate it */ @@ -509,6 +510,16 @@ clause_selectivity(Query *root, */ s1 = (Selectivity) 0.5; } + else if (IsA(clause, NullTest)) + { + /* Use node specific selectivity calculation function */ + s1 = nulltestsel(root, (NullTest *) clause, varRelid); + } + else if (IsA(clause, BooleanTest)) + { + /* Use node specific selectivity calculation function */ + s1 = booltestsel(root, (BooleanTest *) clause, varRelid); + } else if (IsA(clause, RelabelType)) { /* Not sure this case is needed, but it can't hurt */ @@ -517,5 +528,9 @@ clause_selectivity(Query *root, varRelid); } +#ifdef SELECTIVITY_DEBUG + elog(NOTICE, "clause_selectivity: s1 %f", s1); +#endif /* SELECTIVITY_DEBUG */ + return s1; } diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index fc25058ab6..477fa6ed5c 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.107 2001/06/17 02:05:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.108 2001/06/25 21:11:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" #include "utils/syscache.h" diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index c0280c7b88..fc03173ce3 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.190 2001/06/23 00:07:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.191 2001/06/25 21:11:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ #include "rewrite/rewriteManip.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/numeric.h" #include "utils/relcache.h" #include "utils/syscache.h" diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 1ad99d330e..ee92651f57 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- * formatting.c * - * $Header: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v 1.37 2001/05/03 22:53:07 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v 1.38 2001/06/25 21:11:44 tgl Exp $ * * * Portions Copyright (c) 1999-2000, PostgreSQL Global Development Group @@ -67,21 +67,23 @@ #define DEBUG_elog_output NOTICE ***/ -#include -#include +#include "postgres.h" + #include #include #include +#ifdef USE_LOCALE #include +#endif #include #include -#include "postgres.h" #include "utils/builtins.h" #include "utils/date.h" #include "utils/datetime.h" #include "utils/formatting.h" #include "utils/int8.h" +#include "utils/numeric.h" #include "utils/pg_locale.h" /* ---------- diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 099cc37e9d..3ab3881c25 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.93 2001/06/09 22:16:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.94 2001/06/25 21:11:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -93,6 +93,7 @@ #include "utils/date.h" #include "utils/int8.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" #include "utils/syscache.h" /* @@ -117,6 +118,10 @@ /* default number of distinct values in a table */ #define DEFAULT_NUM_DISTINCT 200 +/* default selectivity estimate for boolean and null test nodes */ +#define DEFAULT_UNK_SEL 0.005 +#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL) +#define DEFAULT_BOOL_SEL 0.5 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, @@ -933,6 +938,327 @@ icnlikesel(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(result); } +/* + * booltestsel - Selectivity of BooleanTest Node. + */ +Selectivity +booltestsel(Query *root, BooleanTest *clause, int varRelid) +{ + Var *var; + Node *arg; + Oid relid; + HeapTuple statsTuple; + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + double selec; + + Assert(clause && IsA(clause, BooleanTest)); + + arg = (Node *) clause->arg; + + /* + * Ignore any binary-compatible relabeling (probably unnecessary, + * but can't hurt) + */ + if (IsA(arg, RelabelType)) + arg = ((RelabelType *) arg)->arg; + + if (IsA(arg, Var) && (varRelid == 0 || varRelid == ((Var *) arg)->varno)) + var = (Var *) arg; + else + { + /* + * If argument is not a Var, we can't get statistics for it, but + * perhaps clause_selectivity can do something with it. We ignore + * the possibility of a NULL value when using clause_selectivity, + * and just assume the value is either TRUE or FALSE. + */ + switch (clause->booltesttype) + { + case IS_UNKNOWN: + selec = DEFAULT_UNK_SEL; + break; + case IS_NOT_UNKNOWN: + selec = DEFAULT_NOT_UNK_SEL; + break; + case IS_TRUE: + case IS_NOT_FALSE: + selec = (double) clause_selectivity(root, arg, varRelid); + break; + case IS_FALSE: + case IS_NOT_TRUE: + selec = 1.0 - (double) clause_selectivity(root, arg, varRelid); + break; + default: + elog(ERROR, "booltestsel: unexpected booltesttype %d", + (int) clause->booltesttype); + selec = 0.0; /* Keep compiler quiet */ + break; + } + return (Selectivity) selec; + } + + /* get stats for the attribute, if available */ + relid = getrelid(var->varno, root->rtable); + if (relid == InvalidOid) + statsTuple = NULL; + else + statsTuple = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid), + Int16GetDatum(var->varattno), + 0, 0); + + if (HeapTupleIsValid(statsTuple)) + { + Form_pg_statistic stats; + double freq_null; + + stats = (Form_pg_statistic) GETSTRUCT(statsTuple); + + freq_null = stats->stanullfrac; + + if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod, + STATISTIC_KIND_MCV, InvalidOid, + &values, &nvalues, + &numbers, &nnumbers) + && nnumbers > 0) + { + double freq_true; + double freq_false; + + /* + * Get first MCV frequency and derive frequency for true. + */ + if (DatumGetBool(values[0])) + freq_true = numbers[0]; + else + freq_true = 1.0 - numbers[0] - freq_null; + + /* + * Next derive freqency for false. + * Then use these as appropriate to derive frequency for each case. + */ + freq_false = 1.0 - freq_true - freq_null; + + switch (clause->booltesttype) + { + case IS_UNKNOWN: + /* select only NULL values */ + selec = freq_null; + break; + case IS_NOT_UNKNOWN: + /* select non-NULL values */ + selec = 1.0 - freq_null; + break; + case IS_TRUE: + /* select only TRUE values */ + selec = freq_true; + break; + case IS_NOT_TRUE: + /* select non-TRUE values */ + selec = 1.0 - freq_true; + break; + case IS_FALSE: + /* select only FALSE values */ + selec = freq_false; + break; + case IS_NOT_FALSE: + /* select non-FALSE values */ + selec = 1.0 - freq_false; + break; + default: + elog(ERROR, "booltestsel: unexpected booltesttype %d", + (int) clause->booltesttype); + selec = 0.0; /* Keep compiler quiet */ + break; + } + + free_attstatsslot(var->vartype, values, nvalues, + numbers, nnumbers); + } + else + { + /* + * No most-common-value info available. + * Still have null fraction information, + * so use it for IS [NOT] UNKNOWN. + * Otherwise adjust for null fraction and + * assume an even split for boolean tests. + */ + switch (clause->booltesttype) + { + case IS_UNKNOWN: + /* + * Use freq_null directly. + */ + selec = freq_null; + break; + case IS_NOT_UNKNOWN: + /* + * Select not unknown (not null) values. + * Calculate from freq_null. + */ + selec = 1.0 - freq_null; + break; + case IS_TRUE: + case IS_NOT_TRUE: + case IS_FALSE: + case IS_NOT_FALSE: + selec = (1.0 - freq_null) / 2.0; + break; + default: + elog(ERROR, "booltestsel: unexpected booltesttype %d", + (int) clause->booltesttype); + selec = 0.0; /* Keep compiler quiet */ + break; + } + } + + ReleaseSysCache(statsTuple); + } + else + { + /* + * No VACUUM ANALYZE stats available, so use a default value. + * (Note: not much point in recursing to clause_selectivity here.) + */ + switch (clause->booltesttype) + { + case IS_UNKNOWN: + selec = DEFAULT_UNK_SEL; + break; + case IS_NOT_UNKNOWN: + selec = DEFAULT_NOT_UNK_SEL; + break; + case IS_TRUE: + case IS_NOT_TRUE: + case IS_FALSE: + case IS_NOT_FALSE: + selec = DEFAULT_BOOL_SEL; + break; + default: + elog(ERROR, "booltestsel: unexpected booltesttype %d", + (int) clause->booltesttype); + selec = 0.0; /* Keep compiler quiet */ + break; + } + } + + /* result should be in range, but make sure... */ + if (selec < 0.0) + selec = 0.0; + else if (selec > 1.0) + selec = 1.0; + + return (Selectivity) selec; +} + +/* + * nulltestsel - Selectivity of NullTest Node. + */ +Selectivity +nulltestsel(Query *root, NullTest *clause, int varRelid) +{ + Var *var; + Node *arg; + Oid relid; + HeapTuple statsTuple; + double selec; + double defselec; + double freq_null; + + Assert(clause && IsA(clause, NullTest)); + + switch (clause->nulltesttype) + { + case IS_NULL: + defselec = DEFAULT_UNK_SEL; + break; + case IS_NOT_NULL: + defselec = DEFAULT_NOT_UNK_SEL; + break; + default: + elog(ERROR, "nulltestsel: unexpected nulltesttype %d", + (int) clause->nulltesttype); + return (Selectivity) 0; /* keep compiler quiet */ + } + + arg = (Node *) clause->arg; + + /* + * Ignore any binary-compatible relabeling + */ + if (IsA(arg, RelabelType)) + arg = ((RelabelType *) arg)->arg; + + if (IsA(arg, Var) && (varRelid == 0 || varRelid == ((Var *) arg)->varno)) + var = (Var *) arg; + else + { + /* + * punt if non-Var argument + */ + return (Selectivity) defselec; + } + + relid = getrelid(var->varno, root->rtable); + if (relid == InvalidOid) + return (Selectivity) defselec; + + /* get stats for the attribute, if available */ + statsTuple = SearchSysCache(STATRELATT, + ObjectIdGetDatum(relid), + Int16GetDatum(var->varattno), + 0, 0); + if (HeapTupleIsValid(statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(statsTuple); + freq_null = stats->stanullfrac; + + switch (clause->nulltesttype) + { + case IS_NULL: + /* + * Use freq_null directly. + */ + selec = freq_null; + break; + case IS_NOT_NULL: + /* + * Select not unknown (not null) values. + * Calculate from freq_null. + */ + selec = 1.0 - freq_null; + break; + default: + elog(ERROR, "nulltestsel: unexpected nulltesttype %d", + (int) clause->nulltesttype); + return (Selectivity) 0; /* keep compiler quiet */ + } + + ReleaseSysCache(statsTuple); + } + else + { + /* + * No VACUUM ANALYZE stats available, so make a guess + */ + selec = defselec; + } + + /* result should be in range, but make sure... */ + if (selec < 0.0) + selec = 0.0; + else if (selec > 1.0) + selec = 1.0; + + return (Selectivity) selec; +} + /* * eqjoinsel - Join selectivity of "=" */ diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index d02ccd1e80..d267946255 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: builtins.h,v 1.155 2001/06/17 02:05:20 tgl Exp $ + * $Id: builtins.h,v 1.156 2001/06/25 21:11:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -15,9 +15,8 @@ #define BUILTINS_H #include "fmgr.h" -#include "nodes/relation.h" /* for amcostestimate parameters */ -#include "storage/itemptr.h" -#include "utils/numeric.h" +#include "nodes/primnodes.h" + /* * Defined in adt/ @@ -342,57 +341,6 @@ extern char *deparse_expression(Node *expr, List *dpcontext, bool forceprefix); extern List *deparse_context_for(char *relname, Oid relid); -/* selfuncs.c */ -extern Datum eqsel(PG_FUNCTION_ARGS); -extern Datum neqsel(PG_FUNCTION_ARGS); -extern Datum scalarltsel(PG_FUNCTION_ARGS); -extern Datum scalargtsel(PG_FUNCTION_ARGS); -extern Datum regexeqsel(PG_FUNCTION_ARGS); -extern Datum icregexeqsel(PG_FUNCTION_ARGS); -extern Datum likesel(PG_FUNCTION_ARGS); -extern Datum iclikesel(PG_FUNCTION_ARGS); -extern Datum regexnesel(PG_FUNCTION_ARGS); -extern Datum icregexnesel(PG_FUNCTION_ARGS); -extern Datum nlikesel(PG_FUNCTION_ARGS); -extern Datum icnlikesel(PG_FUNCTION_ARGS); - -extern Datum eqjoinsel(PG_FUNCTION_ARGS); -extern Datum neqjoinsel(PG_FUNCTION_ARGS); -extern Datum scalarltjoinsel(PG_FUNCTION_ARGS); -extern Datum scalargtjoinsel(PG_FUNCTION_ARGS); -extern Datum regexeqjoinsel(PG_FUNCTION_ARGS); -extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS); -extern Datum likejoinsel(PG_FUNCTION_ARGS); -extern Datum iclikejoinsel(PG_FUNCTION_ARGS); -extern Datum regexnejoinsel(PG_FUNCTION_ARGS); -extern Datum icregexnejoinsel(PG_FUNCTION_ARGS); -extern Datum nlikejoinsel(PG_FUNCTION_ARGS); -extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); - -extern Datum btcostestimate(PG_FUNCTION_ARGS); -extern Datum rtcostestimate(PG_FUNCTION_ARGS); -extern Datum hashcostestimate(PG_FUNCTION_ARGS); -extern Datum gistcostestimate(PG_FUNCTION_ARGS); - -/* selfuncs.c supporting routines that are also used by optimizer code */ -typedef enum -{ - Pattern_Type_Like, Pattern_Type_Like_IC, - Pattern_Type_Regex, Pattern_Type_Regex_IC -} Pattern_Type; - -typedef enum -{ - Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact -} Pattern_Prefix_Status; - -extern Pattern_Prefix_Status pattern_fixed_prefix(char *patt, - Pattern_Type ptype, - char **prefix, - char **rest); -extern bool locale_is_like_safe(void); -extern char *make_greater_string(const char *str, Oid datatype); - /* tid.c */ extern Datum tidin(PG_FUNCTION_ARGS); extern Datum tidout(PG_FUNCTION_ARGS); diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h new file mode 100644 index 0000000000..9c0c7b3ac9 --- /dev/null +++ b/src/include/utils/selfuncs.h @@ -0,0 +1,77 @@ +/*------------------------------------------------------------------------- + * + * selfuncs.h + * Selectivity functions and index cost estimation functions for + * standard operators and index access methods. + * + * + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $Id: selfuncs.h,v 1.1 2001/06/25 21:11:45 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef SELFUNCS_H +#define SELFUNCS_H + +#include "fmgr.h" +#include "nodes/parsenodes.h" + + +typedef enum +{ + Pattern_Type_Like, Pattern_Type_Like_IC, + Pattern_Type_Regex, Pattern_Type_Regex_IC +} Pattern_Type; + +typedef enum +{ + Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact +} Pattern_Prefix_Status; + + +/* selfuncs.c */ + +extern Pattern_Prefix_Status pattern_fixed_prefix(char *patt, + Pattern_Type ptype, + char **prefix, + char **rest); +extern bool locale_is_like_safe(void); +extern char *make_greater_string(const char *str, Oid datatype); + +extern Datum eqsel(PG_FUNCTION_ARGS); +extern Datum neqsel(PG_FUNCTION_ARGS); +extern Datum scalarltsel(PG_FUNCTION_ARGS); +extern Datum scalargtsel(PG_FUNCTION_ARGS); +extern Datum regexeqsel(PG_FUNCTION_ARGS); +extern Datum icregexeqsel(PG_FUNCTION_ARGS); +extern Datum likesel(PG_FUNCTION_ARGS); +extern Datum iclikesel(PG_FUNCTION_ARGS); +extern Datum regexnesel(PG_FUNCTION_ARGS); +extern Datum icregexnesel(PG_FUNCTION_ARGS); +extern Datum nlikesel(PG_FUNCTION_ARGS); +extern Datum icnlikesel(PG_FUNCTION_ARGS); + +extern Datum eqjoinsel(PG_FUNCTION_ARGS); +extern Datum neqjoinsel(PG_FUNCTION_ARGS); +extern Datum scalarltjoinsel(PG_FUNCTION_ARGS); +extern Datum scalargtjoinsel(PG_FUNCTION_ARGS); +extern Datum regexeqjoinsel(PG_FUNCTION_ARGS); +extern Datum icregexeqjoinsel(PG_FUNCTION_ARGS); +extern Datum likejoinsel(PG_FUNCTION_ARGS); +extern Datum iclikejoinsel(PG_FUNCTION_ARGS); +extern Datum regexnejoinsel(PG_FUNCTION_ARGS); +extern Datum icregexnejoinsel(PG_FUNCTION_ARGS); +extern Datum nlikejoinsel(PG_FUNCTION_ARGS); +extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); + +Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid); +Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid); + +extern Datum btcostestimate(PG_FUNCTION_ARGS); +extern Datum rtcostestimate(PG_FUNCTION_ARGS); +extern Datum hashcostestimate(PG_FUNCTION_ARGS); +extern Datum gistcostestimate(PG_FUNCTION_ARGS); + +#endif /* SELFUNCS_H */