Increase the default value of default_statistics_target from 10 to 100,
and its maximum value from 1000 to 10000. ALTER TABLE SET STATISTICS similarly now allows a value up to 10000. Per discussion.
This commit is contained in:
parent
b69bde7749
commit
65e3ea7641
@ -1,4 +1,4 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.199 2008/12/08 15:11:39 mha Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.200 2008/12/13 19:13:43 tgl Exp $ -->
|
||||
|
||||
<chapter Id="runtime-config">
|
||||
<title>Server Configuration</title>
|
||||
@ -2133,7 +2133,7 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows
|
||||
not had a column-specific target set via <command>ALTER TABLE
|
||||
SET STATISTICS</>. Larger values increase the time needed to
|
||||
do <command>ANALYZE</>, but might improve the quality of the
|
||||
planner's estimates. The default is 10. For more information
|
||||
planner's estimates. The default is 100. For more information
|
||||
on the use of statistics by the <productname>PostgreSQL</>
|
||||
query planner, refer to <xref linkend="planner-stats">.
|
||||
</para>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/perform.sgml,v 1.68 2007/12/28 21:03:31 tgl Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/perform.sgml,v 1.69 2008/12/13 19:13:43 tgl Exp $ -->
|
||||
|
||||
<chapter id="performance-tips">
|
||||
<title>Performance Tips</title>
|
||||
@ -562,7 +562,7 @@ SELECT attname, n_distinct, most_common_vals FROM pg_stats WHERE tablename = 'ro
|
||||
column-by-column basis using the <command>ALTER TABLE SET STATISTICS</>
|
||||
command, or globally by setting the
|
||||
<xref linkend="guc-default-statistics-target"> configuration variable.
|
||||
The default limit is presently 10 entries. Raising the limit
|
||||
The default limit is presently 100 entries. Raising the limit
|
||||
might allow more accurate planner estimates to be made, particularly for
|
||||
columns with irregular data distributions, at the price of consuming
|
||||
more space in <structname>pg_statistic</structname> and slightly more
|
||||
|
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.101 2008/11/14 10:22:45 petere Exp $
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.102 2008/12/13 19:13:44 tgl Exp $
|
||||
PostgreSQL documentation
|
||||
-->
|
||||
|
||||
@ -142,7 +142,7 @@ where <replaceable class="PARAMETER">action</replaceable> is one of:
|
||||
This form
|
||||
sets the per-column statistics-gathering target for subsequent
|
||||
<xref linkend="sql-analyze" endterm="sql-analyze-title"> operations.
|
||||
The target can be set in the range 0 to 1000; alternatively, set it
|
||||
The target can be set in the range 0 to 10000; alternatively, set it
|
||||
to -1 to revert to using the system default statistics
|
||||
target (<xref linkend="guc-default-statistics-target">).
|
||||
For more information on the use of statistics by the
|
||||
|
@ -1,5 +1,5 @@
|
||||
<!--
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/analyze.sgml,v 1.24 2008/11/14 10:22:45 petere Exp $
|
||||
$PostgreSQL: pgsql/doc/src/sgml/ref/analyze.sgml,v 1.25 2008/12/13 19:13:44 tgl Exp $
|
||||
PostgreSQL documentation
|
||||
-->
|
||||
|
||||
@ -132,10 +132,10 @@ ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
|
||||
will change slightly each time <command>ANALYZE</command> is run,
|
||||
even if the actual table contents did not change. This might result
|
||||
in small changes in the planner's estimated costs shown by
|
||||
<xref linkend="sql-explain" endterm="sql-explain-title">. In rare situations, this
|
||||
non-determinism will cause the query optimizer to choose a
|
||||
different query plan between runs of <command>ANALYZE</command>. To
|
||||
avoid this, raise the amount of statistics collected by
|
||||
<xref linkend="sql-explain" endterm="sql-explain-title">.
|
||||
In rare situations, this non-determinism will cause the planner's
|
||||
choices of query plans to change after <command>ANALYZE</command> is run.
|
||||
To avoid this, raise the amount of statistics collected by
|
||||
<command>ANALYZE</command>, as described below.
|
||||
</para>
|
||||
|
||||
@ -148,7 +148,7 @@ ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
|
||||
endterm="sql-altertable-title">). The target value sets the
|
||||
maximum number of entries in the most-common-value list and the
|
||||
maximum number of bins in the histogram. The default target value
|
||||
is 10, but this can be adjusted up or down to trade off accuracy of
|
||||
is 100, but this can be adjusted up or down to trade off accuracy of
|
||||
planner estimates against the time taken for
|
||||
<command>ANALYZE</command> and the amount of space occupied in
|
||||
<literal>pg_statistic</literal>. In particular, setting the
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.128 2008/11/10 00:49:37 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.129 2008/12/13 19:13:44 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -67,7 +67,7 @@ typedef struct AnlIndexData
|
||||
|
||||
|
||||
/* Default statistics target (GUC parameter) */
|
||||
int default_statistics_target = 10;
|
||||
int default_statistics_target = 100;
|
||||
|
||||
/* A few variables that don't seem worth passing around as parameters */
|
||||
static int elevel = -1;
|
||||
@ -1531,10 +1531,10 @@ std_typanalyze(VacAttrStats *stats)
|
||||
* error in bin size f, and error probability gamma, the minimum
|
||||
* random sample size is
|
||||
* r = 4 * k * ln(2*n/gamma) / f^2
|
||||
* Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
|
||||
* Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
|
||||
* r = 305.82 * k
|
||||
* Note that because of the log function, the dependence on n is
|
||||
* quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
|
||||
* quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
|
||||
* bin size error with probability 0.99. So there's no real need to
|
||||
* scale for n, which is a good thing because we don't necessarily
|
||||
* know it at this point.
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.272 2008/12/06 23:22:46 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.273 2008/12/13 19:13:44 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -3942,9 +3942,9 @@ ATExecSetStatistics(Relation rel, const char *colName, Node *newValue)
|
||||
errmsg("statistics target %d is too low",
|
||||
newtarget)));
|
||||
}
|
||||
else if (newtarget > 1000)
|
||||
else if (newtarget > 10000)
|
||||
{
|
||||
newtarget = 1000;
|
||||
newtarget = 10000;
|
||||
ereport(WARNING,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("lowering statistics target to %d",
|
||||
|
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.3 2008/11/27 21:17:39 heikki Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.4 2008/12/13 19:13:44 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -63,7 +63,7 @@ ts_typanalyze(PG_FUNCTION_ARGS)
|
||||
attr->attstattarget = default_statistics_target;
|
||||
|
||||
stats->compute_stats = compute_tsvector_stats;
|
||||
/* see comment about the choice of minrows from analyze.c */
|
||||
/* see comment about the choice of minrows in commands/analyze.c */
|
||||
stats->minrows = 300 * attr->attstattarget;
|
||||
|
||||
PG_RETURN_BOOL(true);
|
||||
@ -105,8 +105,8 @@ ts_typanalyze(PG_FUNCTION_ARGS)
|
||||
* is no more than a few times w.
|
||||
*
|
||||
* We use a hashtable for the D structure and a bucket width of
|
||||
* statistic_target * 100, where 100 is an arbitrarily chosen constant, meant
|
||||
* to approximate the number of lexemes in a single tsvector.
|
||||
* statistics_target * 100, where 100 is an arbitrarily chosen constant,
|
||||
* meant to approximate the number of lexemes in a single tsvector.
|
||||
*/
|
||||
static void
|
||||
compute_tsvector_stats(VacAttrStats *stats,
|
||||
@ -130,7 +130,7 @@ compute_tsvector_stats(VacAttrStats *stats,
|
||||
LexemeHashKey hash_key;
|
||||
TrackItem *item;
|
||||
|
||||
/* We want statistic_target * 100 lexemes in the MCELEM array */
|
||||
/* We want statistics_target * 100 lexemes in the MCELEM array */
|
||||
num_mcelem = stats->attr->attstattarget * 100;
|
||||
|
||||
/*
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Written by Peter Eisentraut <peter_e@gmx.net>.
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.482 2008/12/02 02:00:32 alvherre Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.483 2008/12/13 19:13:44 tgl Exp $
|
||||
*
|
||||
*--------------------------------------------------------------------
|
||||
*/
|
||||
@ -1245,7 +1245,7 @@ static struct config_int ConfigureNamesInt[] =
|
||||
"column-specific target set via ALTER TABLE SET STATISTICS.")
|
||||
},
|
||||
&default_statistics_target,
|
||||
10, 1, 1000, NULL, NULL
|
||||
100, 1, 10000, NULL, NULL
|
||||
},
|
||||
{
|
||||
{"from_collapse_limit", PGC_USERSET, QUERY_TUNING_OTHER,
|
||||
|
@ -210,7 +210,7 @@
|
||||
|
||||
# - Other Planner Options -
|
||||
|
||||
#default_statistics_target = 10 # range 1-1000
|
||||
#default_statistics_target = 100 # range 1-10000
|
||||
#constraint_exclusion = off
|
||||
#cursor_tuple_fraction = 0.1 # range 0.0-1.0
|
||||
#from_collapse_limit = 8
|
||||
|
Loading…
x
Reference in New Issue
Block a user