diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 474c3bd517..ef33cacec6 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -68,7 +68,6 @@ statapprox_heap(Relation rel, output_type *stat) Buffer vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy; TransactionId OldestXmin; - uint64 misc_count = 0; OldestXmin = GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM); bstrategy = GetAccessStrategy(BAS_BULKREAD); @@ -114,14 +113,15 @@ statapprox_heap(Relation rel, output_type *stat) else stat->free_space += BLCKSZ - SizeOfPageHeaderData; + /* We may count the page as scanned even if it's new/empty */ + scanned++; + if (PageIsNew(page) || PageIsEmpty(page)) { UnlockReleaseBuffer(buf); continue; } - scanned++; - /* * Look at each tuple on the page and decide whether it's live or * dead, then count it and its size. Unlike lazy_scan_heap, we can @@ -153,25 +153,23 @@ statapprox_heap(Relation rel, output_type *stat) tuple.t_tableOid = RelationGetRelid(rel); /* - * We count live and dead tuples, but we also need to add up - * others in order to feed vac_estimate_reltuples. + * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples + * as "dead" while DELETE_IN_PROGRESS tuples are "live". We don't + * bother distinguishing tuples inserted/deleted by our own + * transaction. */ switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) { - case HEAPTUPLE_RECENTLY_DEAD: - misc_count++; - /* Fall through */ - case HEAPTUPLE_DEAD: - stat->dead_tuple_len += tuple.t_len; - stat->dead_tuple_count++; - break; case HEAPTUPLE_LIVE: + case HEAPTUPLE_DELETE_IN_PROGRESS: stat->tuple_len += tuple.t_len; stat->tuple_count++; break; + case HEAPTUPLE_DEAD: + case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_INSERT_IN_PROGRESS: - case HEAPTUPLE_DELETE_IN_PROGRESS: - misc_count++; + stat->dead_tuple_len += tuple.t_len; + stat->dead_tuple_count++; break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); @@ -184,8 +182,16 @@ statapprox_heap(Relation rel, output_type *stat) stat->table_len = (uint64) nblocks * BLCKSZ; + /* + * We don't know how many tuples are in the pages we didn't scan, so + * extrapolate the live-tuple count to the whole table in the same way + * that VACUUM does. (Like VACUUM, we're not taking a random sample, so + * just extrapolating linearly seems unsafe.) There should be no dead + * tuples in all-visible pages, so no correction is needed for that, and + * we already accounted for the space in those pages, too. + */ stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned, - stat->tuple_count + misc_count); + stat->tuple_count); /* * Calculate percentages if the relation has one or more pages. diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index fc81133f07..c030cdab4b 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1739,8 +1739,8 @@ SCRAM-SHA-256$<iteration count>:&l float4 - Number of rows in the table. This is only an estimate used by the - planner. It is updated by VACUUM, + Number of live rows in the table. This is only an estimate used by + the planner. It is updated by VACUUM, ANALYZE, and a few DDL commands such as CREATE INDEX. diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 9e2dd0e729..f4a1efbf54 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2366,12 +2366,12 @@ index_build(Relation heapRelation, * things to add it to the new index. After we return, the AM's index * build procedure does whatever cleanup it needs. * - * The total count of heap tuples is returned. This is for updating pg_class - * statistics. (It's annoying not to be able to do that here, but we want - * to merge that update with others; see index_update_stats.) Note that the - * index AM itself must keep track of the number of index tuples; we don't do - * so here because the AM might reject some of the tuples for its own reasons, - * such as being unable to store NULLs. + * The total count of live heap tuples is returned. This is for updating + * pg_class statistics. (It's annoying not to be able to do that here, but we + * want to merge that update with others; see index_update_stats.) Note that + * the index AM itself must keep track of the number of index tuples; we don't + * do so here because the AM might reject some of the tuples for its own + * reasons, such as being unable to store NULLs. * * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect * any potentially broken HOT chains. Currently, we set this if there are @@ -2402,8 +2402,8 @@ IndexBuildHeapScan(Relation heapRelation, * to scan cannot be done when requesting syncscan. * * When "anyvisible" mode is requested, all tuples visible to any transaction - * are considered, including those inserted or deleted by transactions that are - * still in progress. + * are indexed and counted as live, including those inserted or deleted by + * transactions that are still in progress. */ double IndexBuildHeapRangeScan(Relation heapRelation, @@ -2599,6 +2599,12 @@ IndexBuildHeapRangeScan(Relation heapRelation, */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + /* + * The criteria for counting a tuple as live in this block need to + * match what analyze.c's acquire_sample_rows() does, otherwise + * CREATE INDEX and ANALYZE may produce wildly different reltuples + * values, e.g. when there are many recently-dead tuples. + */ switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin, scan->rs_cbuf)) { @@ -2611,6 +2617,8 @@ IndexBuildHeapRangeScan(Relation heapRelation, /* Normal case, index and unique-check it */ indexIt = true; tupleIsAlive = true; + /* Count it as live, too */ + reltuples += 1; break; case HEAPTUPLE_RECENTLY_DEAD: @@ -2624,6 +2632,9 @@ IndexBuildHeapRangeScan(Relation heapRelation, * the live tuple at the end of the HOT-chain. Since this * breaks semantics for pre-existing snapshots, mark the * index as unusable for them. + * + * We don't count recently-dead tuples in reltuples, even + * if we index them; see acquire_sample_rows(). */ if (HeapTupleIsHotUpdated(heapTuple)) { @@ -2646,6 +2657,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, { indexIt = true; tupleIsAlive = true; + reltuples += 1; break; } @@ -2683,6 +2695,15 @@ IndexBuildHeapRangeScan(Relation heapRelation, goto recheck; } } + else + { + /* + * For consistency with acquire_sample_rows(), count + * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only + * when inserted by our own transaction. + */ + reltuples += 1; + } /* * We must index such tuples, since if the index build @@ -2702,6 +2723,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, { indexIt = true; tupleIsAlive = false; + reltuples += 1; break; } @@ -2745,6 +2767,14 @@ IndexBuildHeapRangeScan(Relation heapRelation, * the same as a RECENTLY_DEAD tuple. */ indexIt = true; + + /* + * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live, + * if they were not deleted by the current + * transaction. That's what acquire_sample_rows() + * does, and we want the behavior to be consistent. + */ + reltuples += 1; } else if (HeapTupleIsHotUpdated(heapTuple)) { @@ -2762,8 +2792,8 @@ IndexBuildHeapRangeScan(Relation heapRelation, { /* * It's a regular tuple deleted by our own xact. Index - * it but don't check for uniqueness, the same as a - * RECENTLY_DEAD tuple. + * it, but don't check for uniqueness nor count in + * reltuples, the same as a RECENTLY_DEAD tuple. */ indexIt = true; } @@ -2787,8 +2817,6 @@ IndexBuildHeapRangeScan(Relation heapRelation, tupleIsAlive = true; } - reltuples += 1; - MemoryContextReset(econtext->ecxt_per_tuple_memory); /* Set up for predicate or expression evaluation */ diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 3985c4dd0e..a1782c2874 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -771,6 +771,9 @@ vacuum_set_xid_limits(Relation rel, * subset of the table. When we have only partial information, we take * the old value of pg_class.reltuples as a measurement of the * tuple density in the unscanned pages. + * + * Note: scanned_tuples should count only *live* tuples, since + * pg_class.reltuples is defined that way. */ double vac_estimate_reltuples(Relation relation, @@ -852,6 +855,9 @@ vac_estimate_reltuples(Relation relation, * transaction. This is OK since postponing the flag maintenance is * always allowable. * + * Note: num_tuples should count only *live* tuples, since + * pg_class.reltuples is defined that way. + * * This routine is shared by VACUUM and ANALYZE. */ void diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 9ac84e8293..f9da24c491 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -114,9 +114,9 @@ typedef struct LVRelStats BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */ BlockNumber frozenskipped_pages; /* # of frozen pages we skipped */ BlockNumber tupcount_pages; /* pages whose tuples we counted */ - double scanned_tuples; /* counts only tuples on tupcount_pages */ - double old_rel_tuples; /* previous value of pg_class.reltuples */ + double old_live_tuples; /* previous value of pg_class.reltuples */ double new_rel_tuples; /* new estimated total # of tuples */ + double new_live_tuples; /* new estimated total # of live tuples */ double new_dead_tuples; /* new estimated total # of dead tuples */ BlockNumber pages_removed; double tuples_deleted; @@ -196,7 +196,6 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, TransactionId xidFullScanLimit; MultiXactId mxactFullScanLimit; BlockNumber new_rel_pages; - double new_rel_tuples; BlockNumber new_rel_allvisible; double new_live_tuples; TransactionId new_frozen_xid; @@ -245,7 +244,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); vacrelstats->old_rel_pages = onerel->rd_rel->relpages; - vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples; + vacrelstats->old_live_tuples = onerel->rd_rel->reltuples; vacrelstats->num_index_scans = 0; vacrelstats->pages_removed = 0; vacrelstats->lock_waiter_detected = false; @@ -311,11 +310,11 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, * since then we don't know for certain that all tuples have a newer xmin. */ new_rel_pages = vacrelstats->rel_pages; - new_rel_tuples = vacrelstats->new_rel_tuples; + new_live_tuples = vacrelstats->new_live_tuples; if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0) { new_rel_pages = vacrelstats->old_rel_pages; - new_rel_tuples = vacrelstats->old_rel_tuples; + new_live_tuples = vacrelstats->old_live_tuples; } visibilitymap_count(onerel, &new_rel_allvisible, NULL); @@ -327,7 +326,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, vac_update_relstats(onerel, new_rel_pages, - new_rel_tuples, + new_live_tuples, new_rel_allvisible, vacrelstats->hasindex, new_frozen_xid, @@ -335,10 +334,6 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, false); /* report results to the stats collector, too */ - new_live_tuples = new_rel_tuples - vacrelstats->new_dead_tuples; - if (new_live_tuples < 0) - new_live_tuples = 0; /* just in case */ - pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, new_live_tuples, @@ -471,10 +466,11 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, TransactionId relminmxid = onerel->rd_rel->relminmxid; BlockNumber empty_pages, vacuumed_pages; - double num_tuples, - tups_vacuumed, - nkeep, - nunused; + double num_tuples, /* total number of nonremovable tuples */ + live_tuples, /* live tuples (reltuples estimate) */ + tups_vacuumed, /* tuples cleaned up by vacuum */ + nkeep, /* dead-but-not-removable tuples */ + nunused; /* unused item pointers */ IndexBulkDeleteResult **indstats; int i; PGRUsage ru0; @@ -505,7 +501,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, relname))); empty_pages = vacuumed_pages = 0; - num_tuples = tups_vacuumed = nkeep = nunused = 0; + num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0; indstats = (IndexBulkDeleteResult **) palloc0(nindexes * sizeof(IndexBulkDeleteResult *)); @@ -988,6 +984,17 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, tupgone = false; + /* + * The criteria for counting a tuple as live in this block need to + * match what analyze.c's acquire_sample_rows() does, otherwise + * VACUUM and ANALYZE may produce wildly different reltuples + * values, e.g. when there are many recently-dead tuples. + * + * The logic here is a bit simpler than acquire_sample_rows(), as + * VACUUM can't run inside a transaction block, which makes some + * cases impossible (e.g. in-progress insert from the same + * transaction). + */ switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) { case HEAPTUPLE_DEAD: @@ -1028,6 +1035,12 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", relname, blkno, offnum); + /* + * Count it as live. Not only is this natural, but it's + * also what acquire_sample_rows() does. + */ + live_tuples += 1; + /* * Is the tuple definitely visible to all transactions? * @@ -1073,12 +1086,29 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, all_visible = false; break; case HEAPTUPLE_INSERT_IN_PROGRESS: - /* This is an expected case during concurrent vacuum */ + + /* + * This is an expected case during concurrent vacuum. + * + * We do not count these rows as live, because we expect + * the inserting transaction to update the counters at + * commit, and we assume that will happen only after we + * report our results. This assumption is a bit shaky, + * but it is what acquire_sample_rows() does, so be + * consistent. + */ all_visible = false; break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ all_visible = false; + + /* + * Count such rows as live. As above, we assume the + * deleting transaction will commit and update the + * counters after we report. + */ + live_tuples += 1; break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); @@ -1281,15 +1311,18 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, pfree(frozen); /* save stats for use later */ - vacrelstats->scanned_tuples = num_tuples; vacrelstats->tuples_deleted = tups_vacuumed; vacrelstats->new_dead_tuples = nkeep; /* now we can compute the new value for pg_class.reltuples */ - vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, - nblocks, - vacrelstats->tupcount_pages, - num_tuples); + vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel, + nblocks, + vacrelstats->tupcount_pages, + live_tuples); + + /* also compute total number of surviving heap entries */ + vacrelstats->new_rel_tuples = + vacrelstats->new_live_tuples + vacrelstats->new_dead_tuples; /* * Release any remaining pin on visibility map page. @@ -1625,7 +1658,8 @@ lazy_vacuum_index(Relation indrel, ivinfo.analyze_only = false; ivinfo.estimated_count = true; ivinfo.message_level = elevel; - ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples; + /* We can only provide an approximate value of num_heap_tuples here */ + ivinfo.num_heap_tuples = vacrelstats->old_live_tuples; ivinfo.strategy = vac_strategy; /* Do bulk deletion */ @@ -1656,6 +1690,12 @@ lazy_cleanup_index(Relation indrel, ivinfo.analyze_only = false; ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages); ivinfo.message_level = elevel; + + /* + * Now we can provide a better estimate of total number of surviving + * tuples (we assume indexes are more interested in that than in the + * number of nominally live tuples). + */ ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples; ivinfo.strategy = vac_strategy;