From 64058429c54afb49a679c58e5ff699640bd42502 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 17 May 2007 19:11:25 +0000 Subject: [PATCH] Add database page inspection /contrib module. Simon and Heikki --- contrib/Makefile | 3 +- contrib/README | 4 + contrib/pageinspect/Makefile | 24 + contrib/pageinspect/README.pageinspect | 94 ++++ contrib/pageinspect/btreefuncs.c | 499 +++++++++++++++++ contrib/pageinspect/heapfuncs.c | 229 ++++++++ contrib/pageinspect/pageinspect.sql.in | 109 ++++ contrib/pageinspect/rawpage.c | 164 ++++++ contrib/pageinspect/uninstall_pageinspect.sql | 20 + contrib/pgstattuple/README.pgstattuple | 51 +- contrib/pgstattuple/pgstatindex.c | 512 ++---------------- contrib/pgstattuple/pgstattuple.sql.in | 56 -- contrib/pgstattuple/uninstall_pgstattuple.sql | 9 - 13 files changed, 1186 insertions(+), 588 deletions(-) create mode 100644 contrib/pageinspect/Makefile create mode 100644 contrib/pageinspect/README.pageinspect create mode 100644 contrib/pageinspect/btreefuncs.c create mode 100644 contrib/pageinspect/heapfuncs.c create mode 100644 contrib/pageinspect/pageinspect.sql.in create mode 100644 contrib/pageinspect/rawpage.c create mode 100644 contrib/pageinspect/uninstall_pageinspect.sql diff --git a/contrib/Makefile b/contrib/Makefile index 9b2bdc5448..dfb65d68be 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -1,4 +1,4 @@ -# $PostgreSQL: pgsql/contrib/Makefile,v 1.75 2007/04/21 17:26:17 petere Exp $ +# $PostgreSQL: pgsql/contrib/Makefile,v 1.76 2007/05/17 19:11:24 momjian Exp $ subdir = contrib top_builddir = .. @@ -19,6 +19,7 @@ WANTED_DIRS = \ lo \ ltree \ oid2name \ + pageinspect \ pg_buffercache \ pg_freespacemap \ pg_standby \ diff --git a/contrib/README b/contrib/README index 7c5034d48c..5b2167ec97 100644 --- a/contrib/README +++ b/contrib/README @@ -80,6 +80,10 @@ oid2name - Maps numeric files to table names by B Palmer +pageinspect - + Allows inspection of database pages + Heikki Linnakangas + pg_buffercache - Real time queries on the shared buffer cache by Mark Kirkwood diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile new file mode 100644 index 0000000000..5222e89e62 --- /dev/null +++ b/contrib/pageinspect/Makefile @@ -0,0 +1,24 @@ +#------------------------------------------------------------------------- +# +# pageinspect Makefile +# +# $PostgreSQL: pgsql/contrib/pageinspect/Makefile,v 1.1 2007/05/17 19:11:24 momjian Exp $ +# +#------------------------------------------------------------------------- + +MODULE_big = pageinspect +OBJS = rawpage.o heapfuncs.o btreefuncs.o +DOCS = README.pageinspect +DATA_built = pageinspect.sql +DATA = uninstall_pageinspect.sql + +ifdef USE_PGXS +PGXS := $(shell pg_config --pgxs) +include $(PGXS) +else +subdir = contrib/pageinspect +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + diff --git a/contrib/pageinspect/README.pageinspect b/contrib/pageinspect/README.pageinspect new file mode 100644 index 0000000000..c9af5cdcb8 --- /dev/null +++ b/contrib/pageinspect/README.pageinspect @@ -0,0 +1,94 @@ +The functions in this module allow you to inspect the contents of data pages +at a low level, for debugging purposes. + +1. Installation + + $ make + $ make install + $ psql -e -f /usr/local/pgsql/share/contrib/pageinspect.sql test + +2. Functions included: + + get_raw_page + ------------ + get_raw_page reads one block of the named table and returns a copy as a + bytea field. This allows a single time-consistent copy of the block to be + made. Use of this functions is restricted to superusers. + + page_header + ----------- + page_header shows fields which are common to all PostgreSQL heap and index + pages. Use of this function is restricted to superusers. + + A page image obtained with get_raw_page should be passed as argument: + + test=# SELECT * FROM page_header(get_raw_page('pg_class',0)); + lsn | tli | flags | lower | upper | special | pagesize | version + ----------+-----+-------+-------+-------+---------+----------+--------- + 0/3C5614 | 1 | 1 | 216 | 256 | 8192 | 8192 | 4 + (1 row) + + The returned columns correspond to the fields in the PageHeaderData-struct, + see src/include/storage/bufpage.h for more details. + + heap_page_items + --------------- + heap_page_items shows all line pointers on a heap page. For those line + pointers that are in use, tuple headers are also shown. All tuples are + shown, whether or not the tuples were visible to an MVCC snapshot at the + time the raw page was copied. Use of this function is restricted to + superusers. + + A heap page image obtained with get_raw_page should be passed as argument: + + test=# SELECT * FROM heap_page_items(get_raw_page('pg_class',0)); + + See src/include/storage/itemid.h and src/include/access/htup.h for + explanations of the fields returned. + + bt_metap + -------- + bt_metap() returns information about the btree index metapage: + + test=> SELECT * FROM bt_metap('pg_cast_oid_index'); + -[ RECORD 1 ]----- + magic | 340322 + version | 2 + root | 1 + level | 0 + fastroot | 1 + fastlevel | 0 + + bt_page_stats + ------------- + bt_page_stats() shows information about single btree pages: + + test=> SELECT * FROM bt_page_stats('pg_cast_oid_index', 1); + -[ RECORD 1 ]-+----- + blkno | 1 + type | l + live_items | 256 + dead_items | 0 + avg_item_size | 12 + page_size | 8192 + free_size | 4056 + btpo_prev | 0 + btpo_next | 0 + btpo | 0 + btpo_flags | 3 + + bt_page_items + ------------- + bt_page_items() returns information about specific items on btree pages: + + test=> SELECT * FROM bt_page_items('pg_cast_oid_index', 1); + itemoffset | ctid | itemlen | nulls | vars | data + ------------+---------+---------+-------+------+------------- + 1 | (0,1) | 12 | f | f | 23 27 00 00 + 2 | (0,2) | 12 | f | f | 24 27 00 00 + 3 | (0,3) | 12 | f | f | 25 27 00 00 + 4 | (0,4) | 12 | f | f | 26 27 00 00 + 5 | (0,5) | 12 | f | f | 27 27 00 00 + 6 | (0,6) | 12 | f | f | 28 27 00 00 + 7 | (0,7) | 12 | f | f | 29 27 00 00 + 8 | (0,8) | 12 | f | f | 2a 27 00 00 diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c new file mode 100644 index 0000000000..9b392d047b --- /dev/null +++ b/contrib/pageinspect/btreefuncs.c @@ -0,0 +1,499 @@ +/* + * btreefuncs.c + * + * Copyright (c) 2006 Satoshi Nagayasu + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" +#include "access/heapam.h" +#include "access/itup.h" +#include "access/nbtree.h" +#include "access/transam.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "utils/inval.h" + +PG_FUNCTION_INFO_V1(bt_metap); +PG_FUNCTION_INFO_V1(bt_page_items); +PG_FUNCTION_INFO_V1(bt_page_stats); + +extern Datum bt_metap(PG_FUNCTION_ARGS); +extern Datum bt_page_items(PG_FUNCTION_ARGS); +extern Datum bt_page_stats(PG_FUNCTION_ARGS); + +#define BTMETAP_TYPE "public.bt_metap_type" +#define BTMETAP_NCOLUMNS 6 + +#define BTPAGEITEMS_TYPE "public.bt_page_items_type" +#define BTPAGEITEMS_NCOLUMNS 6 + +#define BTPAGESTATS_TYPE "public.bt_page_stats_type" +#define BTPAGESTATS_NCOLUMNS 11 + + +#define IS_INDEX(r) ((r)->rd_rel->relkind == 'i') +#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) + +#define CHECK_PAGE_OFFSET_RANGE(page, offset) { \ + if ( !(FirstOffsetNumber<=(offset) && \ + (offset)<=PageGetMaxOffsetNumber(page)) ) \ + elog(ERROR, "Page offset number out of range."); } + +#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \ + if ( (blkno)<0 && RelationGetNumberOfBlocks((rel))<=(blkno) ) \ + elog(ERROR, "Block number out of range."); } + +/* ------------------------------------------------ + * structure for single btree page statistics + * ------------------------------------------------ + */ +typedef struct BTPageStat +{ + uint32 blkno; + uint32 live_items; + uint32 dead_items; + uint32 page_size; + uint32 max_avail; + uint32 free_size; + uint32 avg_item_size; + char type; + + /* opaque data */ + BlockNumber btpo_prev; + BlockNumber btpo_next; + union + { + uint32 level; + TransactionId xact; + } btpo; + uint16 btpo_flags; + BTCycleId btpo_cycleid; +} BTPageStat; + +/* ------------------------------------------------ + * A structure for a whole btree index statistics + * used by pgstatindex(). + * ------------------------------------------------ + */ +typedef struct BTIndexStat +{ + uint32 magic; + uint32 version; + BlockNumber root_blkno; + uint32 level; + + BlockNumber fastroot; + uint32 fastlevel; + + uint32 live_items; + uint32 dead_items; + + uint32 root_pages; + uint32 internal_pages; + uint32 leaf_pages; + uint32 empty_pages; + uint32 deleted_pages; + + uint32 page_size; + uint32 avg_item_size; + + uint32 max_avail; + uint32 free_space; +} BTIndexStat; + + +/* ------------------------------------------------- + * GetBTPageStatistics() + * + * Collect statistics of single b-tree leaf page + * ------------------------------------------------- + */ +static void +GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat * stat) +{ + Page page = BufferGetPage(buffer); + PageHeader phdr = (PageHeader) page; + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + int item_size = 0; + int off; + + stat->blkno = blkno; + + stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData); + + stat->dead_items = stat->live_items = 0; + + stat->page_size = PageGetPageSize(page); + + /* page type (flags) */ + if (P_ISDELETED(opaque)) + { + stat->type = 'd'; + stat->btpo.xact = opaque->btpo.xact; + return; + } + else if (P_IGNORE(opaque)) + stat->type = 'e'; + else if (P_ISLEAF(opaque)) + stat->type = 'l'; + else if (P_ISROOT(opaque)) + stat->type = 'r'; + else + stat->type = 'i'; + + /* btpage opaque data */ + stat->btpo_prev = opaque->btpo_prev; + stat->btpo_next = opaque->btpo_next; + stat->btpo.level = opaque->btpo.level; + stat->btpo_flags = opaque->btpo_flags; + stat->btpo_cycleid = opaque->btpo_cycleid; + + /* count live and dead tuples, and free space */ + for (off = FirstOffsetNumber; off <= maxoff; off++) + { + IndexTuple itup; + + ItemId id = PageGetItemId(page, off); + + itup = (IndexTuple) PageGetItem(page, id); + + item_size += IndexTupleSize(itup); + + if (!ItemIdDeleted(id)) + stat->live_items++; + else + stat->dead_items++; + } + stat->free_size = PageGetFreeSpace(page); + + if ((stat->live_items + stat->dead_items) > 0) + stat->avg_item_size = item_size / (stat->live_items + stat->dead_items); + else + stat->avg_item_size = 0; +} + +/* ----------------------------------------------- + * bt_page() + * + * Usage: SELECT * FROM bt_page('t1_pkey', 0); + * ----------------------------------------------- + */ +Datum +bt_page_stats(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + uint32 blkno = PG_GETARG_UINT32(1); + Buffer buffer; + + Relation rel; + RangeVar *relrv; + Datum result; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + CHECK_RELATION_BLOCK_RANGE(rel, blkno); + + buffer = ReadBuffer(rel, blkno); + + if (!IS_INDEX(rel) || !IS_BTREE(rel)) + elog(ERROR, "bt_page_stats() can be used only on b-tree index."); + + if (blkno == 0) + elog(ERROR, "Block 0 is a meta page."); + + { + HeapTuple tuple; + TupleDesc tupleDesc; + int j; + char *values[BTPAGESTATS_NCOLUMNS]; + + BTPageStat stat; + + /* keep compiler quiet */ + stat.btpo_prev = stat.btpo_next = InvalidBlockNumber; + stat.btpo_flags = stat.free_size = stat.avg_item_size = 0; + + GetBTPageStatistics(blkno, buffer, &stat); + + tupleDesc = RelationNameGetTupleDesc(BTPAGESTATS_TYPE); + + j = 0; + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.blkno); + + values[j] = palloc(32); + snprintf(values[j++], 32, "%c", stat.type); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.live_items); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.dead_items); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.avg_item_size); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.page_size); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.free_size); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.btpo_prev); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.btpo_next); + + values[j] = palloc(32); + if (stat.type == 'd') + snprintf(values[j++], 32, "%d", stat.btpo.xact); + else + snprintf(values[j++], 32, "%d", stat.btpo.level); + + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.btpo_flags); + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); + } + + ReleaseBuffer(buffer); + + relation_close(rel, AccessShareLock); + + PG_RETURN_DATUM(result); +} + +/*------------------------------------------------------- + * bt_page_items() + * + * Get IndexTupleData set in a leaf page + * + * Usage: SELECT * FROM bt_page_items('t1_pkey', 0); + *------------------------------------------------------- + */ +/* --------------------------------------------------- + * data structure for SRF to hold a scan information + * --------------------------------------------------- + */ +struct user_args +{ + TupleDesc tupd; + Relation rel; + Buffer buffer; + Page page; + uint16 offset; +}; + +Datum +bt_page_items(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + uint32 blkno = PG_GETARG_UINT32(1); + + RangeVar *relrv; + Datum result; + char *values[BTPAGEITEMS_NCOLUMNS]; + BTPageOpaque opaque; + HeapTuple tuple; + ItemId id; + + FuncCallContext *fctx; + MemoryContext mctx; + struct user_args *uargs = NULL; + + if (blkno == 0) + elog(ERROR, "Block 0 is a meta page."); + + if (SRF_IS_FIRSTCALL()) + { + fctx = SRF_FIRSTCALL_INIT(); + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + uargs = palloc(sizeof(struct user_args)); + + uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE); + uargs->offset = FirstOffsetNumber; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + uargs->rel = relation_openrv(relrv, AccessShareLock); + + CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno); + + uargs->buffer = ReadBuffer(uargs->rel, blkno); + + if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel)) + elog(ERROR, "bt_page_items() can be used only on b-tree index."); + + uargs->page = BufferGetPage(uargs->buffer); + + opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); + + if (P_ISDELETED(opaque)) + elog(NOTICE, "bt_page_items(): this page is deleted."); + + fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); + fctx->user_fctx = uargs; + + MemoryContextSwitchTo(mctx); + } + + fctx = SRF_PERCALL_SETUP(); + uargs = fctx->user_fctx; + + if (fctx->call_cntr < fctx->max_calls) + { + IndexTuple itup; + + id = PageGetItemId(uargs->page, uargs->offset); + + if (!ItemIdIsValid(id)) + elog(ERROR, "Invalid ItemId."); + + itup = (IndexTuple) PageGetItem(uargs->page, id); + + { + int j = 0; + + BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)); + + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", uargs->offset); + values[j] = palloc(32); + snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup)); + values[j] = palloc(32); + snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f'); + values[j] = palloc(32); + snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); + + { + int off; + char *dump; + char *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); + + dump = palloc(IndexTupleSize(itup) * 3); + memset(dump, 0, IndexTupleSize(itup) * 3); + + for (off = 0; + off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); + off++) + { + if (dump[0] == '\0') + sprintf(dump, "%02x", *(ptr + off) & 0xff); + else + { + char buf[4]; + + sprintf(buf, " %02x", *(ptr + off) & 0xff); + strcat(dump, buf); + } + } + values[j] = dump; + } + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values); + result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple); + } + + uargs->offset = uargs->offset + 1; + + SRF_RETURN_NEXT(fctx, result); + } + else + { + ReleaseBuffer(uargs->buffer); + relation_close(uargs->rel, AccessShareLock); + + SRF_RETURN_DONE(fctx); + } +} + + +/* ------------------------------------------------ + * bt_metap() + * + * Get a btree meta-page information + * + * Usage: SELECT * FROM bt_metap('t1_pkey') + * ------------------------------------------------ + */ +Datum +bt_metap(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + Buffer buffer; + + Relation rel; + RangeVar *relrv; + Datum result; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + if (!IS_INDEX(rel) || !IS_BTREE(rel)) + elog(ERROR, "bt_metap() can be used only on b-tree index."); + + buffer = ReadBuffer(rel, 0); + + { + BTMetaPageData *metad; + + TupleDesc tupleDesc; + int j; + char *values[BTMETAP_NCOLUMNS]; + HeapTuple tuple; + + Page page = BufferGetPage(buffer); + + metad = BTPageGetMeta(page); + + tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE); + + j = 0; + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_magic); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_version); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_root); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_level); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_fastroot); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_fastlevel); + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); + } + + ReleaseBuffer(buffer); + + relation_close(rel, AccessShareLock); + + PG_RETURN_DATUM(result); +} diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c new file mode 100644 index 0000000000..abf50de5fa --- /dev/null +++ b/contrib/pageinspect/heapfuncs.c @@ -0,0 +1,229 @@ +/*------------------------------------------------------------------------- + * + * heapfuncs.c + * Functions to investigate heap pages + * + * We check the input to these functions for corrupt pointers etc. that + * might cause crashes, but at the same time we try to print out as much + * information as possible, even if it's nonsense. That's because if a + * page is corrupt, we don't know why and how exactly it is corrupt, so we + * let the user to judge it. + * + * These functions are restricted to superusers for the fear of introducing + * security holes if the input checking isn't as water-tight as it should. + * You'd need to be superuser to obtain a raw page image anyway, so + * there's hardly any use case for using these without superuser-rights + * anyway. + * + * Copyright (c) 2007, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL: pgsql/contrib/pageinspect/heapfuncs.c,v 1.1 2007/05/17 19:11:24 momjian Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" +#include "access/heapam.h" +#include "access/transam.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "miscadmin.h" + +Datum heap_page_items(PG_FUNCTION_ARGS); + +#define GET_TEXT(str_) \ + DirectFunctionCall1(textin, CStringGetDatum(str_)) + +/* + * bits_to_text + * + * Converts a bits8-array of 'len' bits to a human-readable + * c-string representation. + */ +static char * +bits_to_text(bits8 *bits, int len) +{ + int i; + char *str; + + str = palloc(len + 1); + + for(i = 0; i < len; i++) + str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0'; + + str[i] = '\0'; + + return str; +} + + +/* + * heap_page_items + * + * Allows inspection of line pointers and tuple headers of a heap page. + */ +PG_FUNCTION_INFO_V1(heap_page_items); + +typedef struct heap_page_items_state +{ + TupleDesc tupd; + Page page; + uint16 offset; +} heap_page_items_state; + +Datum +heap_page_items(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + heap_page_items_state *inter_call_data = NULL; + FuncCallContext *fctx; + int raw_page_size; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use raw page functions")))); + + raw_page_size = VARSIZE(raw_page) - VARHDRSZ; + + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext mctx; + + if(raw_page_size < SizeOfPageHeaderData) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page too small (%d bytes)", raw_page_size))); + + fctx = SRF_FIRSTCALL_INIT(); + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + inter_call_data = palloc(sizeof(heap_page_items_state)); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + inter_call_data->tupd = tupdesc; + + inter_call_data->offset = FirstOffsetNumber; + inter_call_data->page = VARDATA(raw_page); + + fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); + fctx->user_fctx = inter_call_data; + + MemoryContextSwitchTo(mctx); + } + + fctx = SRF_PERCALL_SETUP(); + inter_call_data = fctx->user_fctx; + + if (fctx->call_cntr < fctx->max_calls) + { + Page page = inter_call_data->page; + HeapTuple resultTuple; + Datum result; + ItemId id; + Datum values[13]; + bool nulls[13]; + uint16 lp_offset; + uint16 lp_flags; + uint16 lp_len; + + memset(nulls, 0, sizeof(nulls)); + + /* Extract information from the line pointer */ + + id = PageGetItemId(page, inter_call_data->offset); + + lp_offset = ItemIdGetOffset(id); + lp_flags = ItemIdGetFlags(id); + lp_len = ItemIdGetLength(id); + + values[0] = UInt16GetDatum(inter_call_data->offset); + values[1] = UInt16GetDatum(lp_offset); + values[2] = UInt16GetDatum(lp_flags); + values[3] = UInt16GetDatum(lp_len); + + /* We do just enough validity checking to make sure we don't + * reference data outside the page passed to us. The page + * could be corrupt in many other ways, but at least we won't + * crash. + */ + if ((lp_len >= sizeof(HeapTupleHeader)) && + (lp_offset == MAXALIGN(lp_offset)) && + (lp_offset + lp_len <= raw_page_size) && + ItemIdIsUsed(id)) + { + HeapTupleHeader tuphdr; + int bits_len; + + /* Extract infromation from the tuple header */ + + tuphdr = (HeapTupleHeader) PageGetItem(page, id); + + values[4] = UInt32GetDatum(HeapTupleHeaderGetXmin(tuphdr)); + values[5] = UInt32GetDatum(HeapTupleHeaderGetXmax(tuphdr)); + values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); /* shared with xvac */ + values[7] = PointerGetDatum(&tuphdr->t_ctid); + values[8] = UInt16GetDatum(tuphdr->t_infomask2); + values[9] = UInt16GetDatum(tuphdr->t_infomask); + values[10] = UInt8GetDatum(tuphdr->t_hoff); + + /* We already checked that the item as is completely within + * the raw page passed to us, with the length given in the line + * pointer.. Let's check that t_hoff doesn't point over lp_len, + * before using it to access t_bits and oid. + */ + if (tuphdr->t_hoff >= sizeof(HeapTupleHeader) && + tuphdr->t_hoff <= lp_len) + { + if (tuphdr->t_infomask & HEAP_HASNULL) + { + bits_len = tuphdr->t_hoff - + (((char *)tuphdr->t_bits) - ((char *)tuphdr)); + + values[11] = GET_TEXT( + bits_to_text(tuphdr->t_bits, bits_len * 8)); + } + else + nulls[11] = true; + + if (tuphdr->t_infomask & HEAP_HASOID) + values[12] = HeapTupleHeaderGetOid(tuphdr); + else + nulls[12] = true; + } + else + { + nulls[11] = true; + nulls[12] = true; + } + } + else + { + /* The line pointer is not used, or it's invalid. Set the rest of + * the fields to NULL */ + int i; + + for(i = 4; i <= 12; i++) + nulls[i] = true; + } + + /* Build and return the result tuple. */ + resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls); + result = HeapTupleGetDatum(resultTuple); + + inter_call_data->offset++; + + SRF_RETURN_NEXT(fctx, result); + } + else + SRF_RETURN_DONE(fctx); +} diff --git a/contrib/pageinspect/pageinspect.sql.in b/contrib/pageinspect/pageinspect.sql.in new file mode 100644 index 0000000000..3bd2825f22 --- /dev/null +++ b/contrib/pageinspect/pageinspect.sql.in @@ -0,0 +1,109 @@ +-- Adjust this setting to control where the objects get created. +SET search_path = public; + +-- +-- get_raw_page() +-- +CREATE OR REPLACE FUNCTION get_raw_page(text, int4) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page' +LANGUAGE C STRICT; + +-- +-- page_header() +-- +CREATE TYPE page_header_type AS ( + lsn text, + tli smallint, + flags smallint, + lower smallint, + upper smallint, + special smallint, + pagesize smallint, + version smallint +); + +CREATE OR REPLACE FUNCTION page_header(bytea) +RETURNS page_header_type +AS 'MODULE_PATHNAME', 'page_header' +LANGUAGE C STRICT; + +-- +-- heap_page_items() +-- +CREATE TYPE heap_page_items_type AS ( + lp smallint, + lp_off smallint, + lp_flags smallint, + lp_len smallint, + t_xmin xid, + t_xmax xid, + t_field3 int4, + t_ctid tid, + t_infomask2 smallint, + t_infomask smallint, + t_hoff smallint, + t_bits text, + t_oid oid +); + +CREATE OR REPLACE FUNCTION heap_page_items(bytea) +RETURNS SETOF heap_page_items_type +AS 'MODULE_PATHNAME', 'heap_page_items' +LANGUAGE C STRICT; + +-- +-- bt_metap() +-- +CREATE TYPE bt_metap_type AS ( + magic int4, + version int4, + root int4, + level int4, + fastroot int4, + fastlevel int4 +); + +CREATE OR REPLACE FUNCTION bt_metap(text) +RETURNS bt_metap_type +AS 'MODULE_PATHNAME', 'bt_metap' +LANGUAGE 'C' STRICT; + +-- +-- bt_page_stats() +-- +CREATE TYPE bt_page_stats_type AS ( + blkno int4, + type char, + live_items int4, + dead_items int4, + avg_item_size float, + page_size int4, + free_size int4, + btpo_prev int4, + btpo_next int4, + btpo int4, + btpo_flags int4 +); + +CREATE OR REPLACE FUNCTION bt_page_stats(text, int4) +RETURNS bt_page_stats_type +AS 'MODULE_PATHNAME', 'bt_page_stats' +LANGUAGE 'C' STRICT; + +-- +-- bt_page_items() +-- +CREATE TYPE bt_page_items_type AS ( + itemoffset smallint, + ctid tid, + itemlen smallint, + nulls bool, + vars bool, + data text +); + +CREATE OR REPLACE FUNCTION bt_page_items(text, int4) +RETURNS SETOF bt_page_items_type +AS 'MODULE_PATHNAME', 'bt_page_items' +LANGUAGE 'C' STRICT; diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c new file mode 100644 index 0000000000..4aba08e780 --- /dev/null +++ b/contrib/pageinspect/rawpage.c @@ -0,0 +1,164 @@ +/*------------------------------------------------------------------------- + * + * rawpage.c + * Functions to extract a raw page as bytea and inspect it + * + * Access-method specific inspection functions are in separate files. + * + * Copyright (c) 2007, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL: pgsql/contrib/pageinspect/rawpage.c,v 1.1 2007/05/17 19:11:24 momjian Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" +#include "access/heapam.h" +#include "access/transam.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "miscadmin.h" + +PG_MODULE_MAGIC; + +Datum get_raw_page(PG_FUNCTION_ARGS); +Datum page_header(PG_FUNCTION_ARGS); + +/* + * get_raw_page + * + * Returns a copy of a page from shared buffers as a bytea + */ +PG_FUNCTION_INFO_V1(get_raw_page); + +Datum +get_raw_page(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + uint32 blkno = PG_GETARG_UINT32(1); + + Relation rel; + RangeVar *relrv; + bytea *raw_page; + char *raw_page_data; + Buffer buf; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use raw functions")))); + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + /* Check that this relation has storage */ + if (rel->rd_rel->relkind == RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from view \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from composite type \"%s\"", + RelationGetRelationName(rel)))); + + if (blkno >= RelationGetNumberOfBlocks(rel)) + elog(ERROR, "block number %u is out of range for relation \"%s\"", + blkno, RelationGetRelationName(rel)); + + /* Initialize buffer to copy to */ + raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ); + SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ); + raw_page_data = VARDATA(raw_page); + + /* Take a verbatim copy of the page */ + + buf = ReadBuffer(rel, blkno); + LockBuffer(buf, BUFFER_LOCK_SHARE); + + memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ); + + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buf); + + relation_close(rel, AccessShareLock); + + PG_RETURN_BYTEA_P(raw_page); +} + +/* + * page_header + * + * Allows inspection of page header fields of a raw page + */ + +PG_FUNCTION_INFO_V1(page_header); + +Datum +page_header(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + int raw_page_size; + + TupleDesc tupdesc; + + Datum result; + HeapTuple tuple; + Datum values[8]; + bool nulls[8]; + + PageHeader page; + XLogRecPtr lsn; + char lsnchar[64]; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use raw page functions")))); + + raw_page_size = VARSIZE(raw_page) - VARHDRSZ; + + /* + * Check that enough data was supplied, so that we don't try to access + * fields outside the supplied buffer. + */ + if(raw_page_size < sizeof(PageHeaderData)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page too small (%d bytes)", raw_page_size))); + + page = (PageHeader) VARDATA(raw_page); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Extract information from the page header */ + + lsn = PageGetLSN(page); + snprintf(lsnchar, sizeof(lsnchar), "%X/%X", lsn.xlogid, lsn.xrecoff); + + values[0] = DirectFunctionCall1(textin, CStringGetDatum(lsnchar)); + values[1] = UInt16GetDatum(PageGetTLI(page)); + values[2] = UInt16GetDatum(page->pd_flags); + values[3] = UInt16GetDatum(page->pd_lower); + values[4] = UInt16GetDatum(page->pd_upper); + values[5] = UInt16GetDatum(page->pd_special); + values[6] = UInt16GetDatum(PageGetPageSize(page)); + values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); + + /* Build and return the tuple. */ + + memset(nulls, 0, sizeof(nulls)); + + tuple = heap_form_tuple(tupdesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); +} diff --git a/contrib/pageinspect/uninstall_pageinspect.sql b/contrib/pageinspect/uninstall_pageinspect.sql new file mode 100644 index 0000000000..ecf9095073 --- /dev/null +++ b/contrib/pageinspect/uninstall_pageinspect.sql @@ -0,0 +1,20 @@ +-- Adjust this setting to control where the objects get created. +SET search_path = public; + +DROP FUNCTION get_raw_page(text, int4); + +DROP FUNCTION page_header(bytea); +DROP TYPE page_header_type; + +DROP FUNCTION heap_page_items(bytea); +DROP TYPE heap_page_items_type; + +DROP FUNCTION bt_metap(text); +DROP TYPE bt_metap_type; + +DROP FUNCTION bt_page_stats(text, int4); +DROP TYPE bt_page_stats_type; + +DROP FUNCTION bt_page_items(text, int4); +DROP TYPE bt_page_items_type; + diff --git a/contrib/pgstattuple/README.pgstattuple b/contrib/pgstattuple/README.pgstattuple index 235de72559..c47f6ad0e5 100644 --- a/contrib/pgstattuple/README.pgstattuple +++ b/contrib/pgstattuple/README.pgstattuple @@ -56,53 +56,6 @@ pgstattuple README 2002/08/29 Tatsuo Ishii avg_leaf_density | 50.27 leaf_fragmentation | 0 - bt_metap - -------- - bt_metap() returns information about the btree index metapage: - - test=> SELECT * FROM bt_metap('pg_cast_oid_index'); - -[ RECORD 1 ]----- - magic | 340322 - version | 2 - root | 1 - level | 0 - fastroot | 1 - fastlevel | 0 - - bt_page_stats - ------------- - bt_page_stats() shows information about single btree pages: - - test=> SELECT * FROM bt_page_stats('pg_cast_oid_index', 1); - -[ RECORD 1 ]-+----- - blkno | 1 - type | l - live_items | 256 - dead_items | 0 - avg_item_size | 12 - page_size | 8192 - free_size | 4056 - btpo_prev | 0 - btpo_next | 0 - btpo | 0 - btpo_flags | 3 - - bt_page_items - ------------- - bt_page_items() returns information about specific items on btree pages: - - test=> SELECT * FROM bt_page_items('pg_cast_oid_index', 1); - itemoffset | ctid | itemlen | nulls | vars | data - ------------+---------+---------+-------+------+------------- - 1 | (0,1) | 12 | f | f | 23 27 00 00 - 2 | (0,2) | 12 | f | f | 24 27 00 00 - 3 | (0,3) | 12 | f | f | 25 27 00 00 - 4 | (0,4) | 12 | f | f | 26 27 00 00 - 5 | (0,5) | 12 | f | f | 27 27 00 00 - 6 | (0,6) | 12 | f | f | 28 27 00 00 - 7 | (0,7) | 12 | f | f | 29 27 00 00 - 8 | (0,8) | 12 | f | f | 2a 27 00 00 - 2. Installing pgstattuple @@ -140,6 +93,10 @@ pgstattuple README 2002/08/29 Tatsuo Ishii 5. History + 2007/05/17 + + Moved page-level functions to contrib/pageinspect + 2006/06/28 Extended to work against indexes. diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c index 2982ceaf50..838fd9e525 100644 --- a/contrib/pgstattuple/pgstatindex.c +++ b/contrib/pgstattuple/pgstatindex.c @@ -36,30 +36,14 @@ #include "utils/inval.h" PG_FUNCTION_INFO_V1(pgstatindex); -PG_FUNCTION_INFO_V1(bt_metap); -PG_FUNCTION_INFO_V1(bt_page_items); -PG_FUNCTION_INFO_V1(bt_page_stats); PG_FUNCTION_INFO_V1(pg_relpages); extern Datum pgstatindex(PG_FUNCTION_ARGS); -extern Datum bt_metap(PG_FUNCTION_ARGS); -extern Datum bt_page_items(PG_FUNCTION_ARGS); -extern Datum bt_page_stats(PG_FUNCTION_ARGS); extern Datum pg_relpages(PG_FUNCTION_ARGS); #define PGSTATINDEX_TYPE "public.pgstatindex_type" #define PGSTATINDEX_NCOLUMNS 10 -#define BTMETAP_TYPE "public.bt_metap_type" -#define BTMETAP_NCOLUMNS 6 - -#define BTPAGEITEMS_TYPE "public.bt_page_items_type" -#define BTPAGEITEMS_NCOLUMNS 6 - -#define BTPAGESTATS_TYPE "public.bt_page_stats_type" -#define BTPAGESTATS_NCOLUMNS 11 - - #define IS_INDEX(r) ((r)->rd_rel->relkind == 'i') #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) @@ -72,34 +56,6 @@ extern Datum pg_relpages(PG_FUNCTION_ARGS); if ( (blkno)<0 && RelationGetNumberOfBlocks((rel))<=(blkno) ) \ elog(ERROR, "Block number out of range."); } -/* ------------------------------------------------ - * structure for single btree page statistics - * ------------------------------------------------ - */ -typedef struct BTPageStat -{ - uint32 blkno; - uint32 live_items; - uint32 dead_items; - uint32 page_size; - uint32 max_avail; - uint32 free_size; - uint32 avg_item_size; - uint32 fragments; - char type; - - /* opaque data */ - BlockNumber btpo_prev; - BlockNumber btpo_next; - union - { - uint32 level; - TransactionId xact; - } btpo; - uint16 btpo_flags; - BTCycleId btpo_cycleid; -} BTPageStat; - /* ------------------------------------------------ * A structure for a whole btree index statistics * used by pgstatindex(). @@ -107,116 +63,22 @@ typedef struct BTPageStat */ typedef struct BTIndexStat { - uint32 magic; uint32 version; BlockNumber root_blkno; uint32 level; - BlockNumber fastroot; - uint32 fastlevel; - - uint32 live_items; - uint32 dead_items; - uint32 root_pages; uint32 internal_pages; uint32 leaf_pages; uint32 empty_pages; uint32 deleted_pages; - uint32 page_size; - uint32 avg_item_size; - uint32 max_avail; uint32 free_space; uint32 fragments; } BTIndexStat; -/* ------------------------------------------------- - * GetBTPageStatistics() - * - * Collect statistics of single b-tree leaf page - * ------------------------------------------------- - */ -static void -GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat * stat) -{ - Page page = BufferGetPage(buffer); - PageHeader phdr = (PageHeader) page; - OffsetNumber maxoff = PageGetMaxOffsetNumber(page); - BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); - int item_size = 0; - int off; - - stat->blkno = blkno; - - stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData); - - stat->dead_items = stat->live_items = 0; - stat->fragments = 0; - - stat->page_size = PageGetPageSize(page); - - /* page type (flags) */ - if (P_ISDELETED(opaque)) - { - stat->type = 'd'; - stat->btpo.xact = opaque->btpo.xact; - return; - } - else if (P_IGNORE(opaque)) - stat->type = 'e'; - else if (P_ISLEAF(opaque)) - stat->type = 'l'; - else if (P_ISROOT(opaque)) - stat->type = 'r'; - else - stat->type = 'i'; - - /* btpage opaque data */ - stat->btpo_prev = opaque->btpo_prev; - stat->btpo_next = opaque->btpo_next; - stat->btpo.level = opaque->btpo.level; - stat->btpo_flags = opaque->btpo_flags; - stat->btpo_cycleid = opaque->btpo_cycleid; - - /*---------------------------------------------- - * If a next leaf is on the previous block, - * it means a fragmentation. - *---------------------------------------------- - */ - if (stat->type == 'l') - { - if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno) - stat->fragments++; - } - - /* count live and dead tuples, and free space */ - for (off = FirstOffsetNumber; off <= maxoff; off++) - { - IndexTuple itup; - - ItemId id = PageGetItemId(page, off); - - itup = (IndexTuple) PageGetItem(page, id); - - item_size += IndexTupleSize(itup); - - if (!ItemIdDeleted(id)) - stat->live_items++; - else - stat->dead_items++; - } - stat->free_size = PageGetFreeSpace(page); - - if ((stat->live_items + stat->dead_items) > 0) - stat->avg_item_size = item_size / (stat->live_items + stat->dead_items); - else - stat->avg_item_size = 0; -} - - /* ------------------------------------------------------ * pgstatindex() * @@ -249,12 +111,9 @@ pgstatindex(PG_FUNCTION_ARGS) Page page = BufferGetPage(buffer); BTMetaPageData *metad = BTPageGetMeta(page); - indexStat.magic = metad->btm_magic; indexStat.version = metad->btm_version; indexStat.root_blkno = metad->btm_root; indexStat.level = metad->btm_level; - indexStat.fastroot = metad->btm_fastroot; - indexStat.fastlevel = metad->btm_fastlevel; ReleaseBuffer(buffer); } @@ -279,47 +138,49 @@ pgstatindex(PG_FUNCTION_ARGS) */ for (blkno = 1; blkno < nblocks; blkno++) { - Buffer buffer = ReadBuffer(rel, blkno); - BTPageStat stat; + Buffer buffer; + Page page; + BTPageOpaque opaque; - /* scan one page */ - stat.blkno = blkno; - GetBTPageStatistics(blkno, buffer, &stat); + /* Read and lock buffer */ + buffer = ReadBuffer(rel, blkno); + LockBuffer(buffer, BUFFER_LOCK_SHARE); - /*--------------------- - * page status (type) - *--------------------- - */ - switch (stat.type) + page = BufferGetPage(buffer); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* Determine page type, and update totals */ + + if (P_ISDELETED(opaque)) + indexStat.deleted_pages++; + + else if (P_IGNORE(opaque)) + indexStat.empty_pages++; + + else if (P_ISLEAF(opaque)) { - case 'd': - indexStat.deleted_pages++; - break; - case 'l': - indexStat.leaf_pages++; - break; - case 'i': - indexStat.internal_pages++; - break; - case 'e': - indexStat.empty_pages++; - break; - case 'r': - indexStat.root_pages++; - break; - default: - elog(ERROR, "unknown page status."); + int max_avail; + max_avail = BLCKSZ - (BLCKSZ - ((PageHeader)page)->pd_special + SizeOfPageHeaderData); + indexStat.max_avail += max_avail; + indexStat.free_space += PageGetFreeSpace(page); + + indexStat.leaf_pages++; + + /* + * If the next leaf is on an earlier block, it + * means a fragmentation. + */ + if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno) + indexStat.fragments++; } + else if (P_ISROOT(opaque)) + indexStat.root_pages++; - /* -- leaf fragmentation -- */ - indexStat.fragments += stat.fragments; - - if (stat.type == 'l') - { - indexStat.max_avail += stat.max_avail; - indexStat.free_space += stat.free_size; - } + else + indexStat.internal_pages++; + /* Unlock and release buffer */ + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); } @@ -373,305 +234,6 @@ pgstatindex(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } -/* ----------------------------------------------- - * bt_page() - * - * Usage: SELECT * FROM bt_page('t1_pkey', 0); - * ----------------------------------------------- - */ -Datum -bt_page_stats(PG_FUNCTION_ARGS) -{ - text *relname = PG_GETARG_TEXT_P(0); - uint32 blkno = PG_GETARG_UINT32(1); - Buffer buffer; - - Relation rel; - RangeVar *relrv; - Datum result; - - relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); - rel = relation_openrv(relrv, AccessShareLock); - - CHECK_RELATION_BLOCK_RANGE(rel, blkno); - - buffer = ReadBuffer(rel, blkno); - - if (!IS_INDEX(rel) || !IS_BTREE(rel)) - elog(ERROR, "bt_page_stats() can be used only on b-tree index."); - - if (blkno == 0) - elog(ERROR, "Block 0 is a meta page."); - - { - HeapTuple tuple; - TupleDesc tupleDesc; - int j; - char *values[BTPAGESTATS_NCOLUMNS]; - - BTPageStat stat; - - GetBTPageStatistics(blkno, buffer, &stat); - - tupleDesc = RelationNameGetTupleDesc(BTPAGESTATS_TYPE); - - j = 0; - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.blkno); - - values[j] = palloc(32); - snprintf(values[j++], 32, "%c", stat.type); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.live_items); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.dead_items); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.avg_item_size); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.page_size); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.free_size); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.btpo_prev); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.btpo_next); - - values[j] = palloc(32); - if (stat.type == 'd') - snprintf(values[j++], 32, "%d", stat.btpo.xact); - else - snprintf(values[j++], 32, "%d", stat.btpo.level); - - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", stat.btpo_flags); - - tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), - values); - - result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); - } - - ReleaseBuffer(buffer); - - relation_close(rel, AccessShareLock); - - PG_RETURN_DATUM(result); -} - -/*------------------------------------------------------- - * bt_page_items() - * - * Get IndexTupleData set in a leaf page - * - * Usage: SELECT * FROM bt_page_items('t1_pkey', 0); - *------------------------------------------------------- - */ -/* --------------------------------------------------- - * data structure for SRF to hold a scan information - * --------------------------------------------------- - */ -struct user_args -{ - TupleDesc tupd; - Relation rel; - Buffer buffer; - Page page; - uint16 offset; -}; - -Datum -bt_page_items(PG_FUNCTION_ARGS) -{ - text *relname = PG_GETARG_TEXT_P(0); - uint32 blkno = PG_GETARG_UINT32(1); - - RangeVar *relrv; - Datum result; - char *values[BTPAGEITEMS_NCOLUMNS]; - BTPageOpaque opaque; - HeapTuple tuple; - ItemId id; - - FuncCallContext *fctx; - MemoryContext mctx; - struct user_args *uargs = NULL; - - if (blkno == 0) - elog(ERROR, "Block 0 is a meta page."); - - if (SRF_IS_FIRSTCALL()) - { - fctx = SRF_FIRSTCALL_INIT(); - mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); - - uargs = palloc(sizeof(struct user_args)); - - uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE); - uargs->offset = FirstOffsetNumber; - - relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); - uargs->rel = relation_openrv(relrv, AccessShareLock); - - CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno); - - uargs->buffer = ReadBuffer(uargs->rel, blkno); - - if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel)) - elog(ERROR, "bt_page_items() can be used only on b-tree index."); - - uargs->page = BufferGetPage(uargs->buffer); - - opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); - - if (P_ISDELETED(opaque)) - elog(NOTICE, "bt_page_items(): this page is deleted."); - - fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); - fctx->user_fctx = uargs; - - MemoryContextSwitchTo(mctx); - } - - fctx = SRF_PERCALL_SETUP(); - uargs = fctx->user_fctx; - - if (fctx->call_cntr < fctx->max_calls) - { - IndexTuple itup; - - id = PageGetItemId(uargs->page, uargs->offset); - - if (!ItemIdIsValid(id)) - elog(ERROR, "Invalid ItemId."); - - itup = (IndexTuple) PageGetItem(uargs->page, id); - - { - int j = 0; - - BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)); - - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", uargs->offset); - values[j] = palloc(32); - snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup)); - values[j] = palloc(32); - snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f'); - values[j] = palloc(32); - snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); - - { - int off; - char *dump; - char *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); - - dump = palloc(IndexTupleSize(itup) * 3); - memset(dump, 0, IndexTupleSize(itup) * 3); - - for (off = 0; - off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); - off++) - { - if (dump[0] == '\0') - sprintf(dump, "%02x", *(ptr + off) & 0xff); - else - { - char buf[4]; - - sprintf(buf, " %02x", *(ptr + off) & 0xff); - strcat(dump, buf); - } - } - values[j] = dump; - } - - tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values); - result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple); - } - - uargs->offset = uargs->offset + 1; - - SRF_RETURN_NEXT(fctx, result); - } - else - { - ReleaseBuffer(uargs->buffer); - relation_close(uargs->rel, AccessShareLock); - - SRF_RETURN_DONE(fctx); - } -} - - -/* ------------------------------------------------ - * bt_metap() - * - * Get a btree meta-page information - * - * Usage: SELECT * FROM bt_metap('t1_pkey') - * ------------------------------------------------ - */ -Datum -bt_metap(PG_FUNCTION_ARGS) -{ - text *relname = PG_GETARG_TEXT_P(0); - Buffer buffer; - - Relation rel; - RangeVar *relrv; - Datum result; - - relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); - rel = relation_openrv(relrv, AccessShareLock); - - if (!IS_INDEX(rel) || !IS_BTREE(rel)) - elog(ERROR, "bt_metap() can be used only on b-tree index."); - - buffer = ReadBuffer(rel, 0); - - { - BTMetaPageData *metad; - - TupleDesc tupleDesc; - int j; - char *values[BTMETAP_NCOLUMNS]; - HeapTuple tuple; - - Page page = BufferGetPage(buffer); - - metad = BTPageGetMeta(page); - - tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE); - - j = 0; - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", metad->btm_magic); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", metad->btm_version); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", metad->btm_root); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", metad->btm_level); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", metad->btm_fastroot); - values[j] = palloc(32); - snprintf(values[j++], 32, "%d", metad->btm_fastlevel); - - tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), - values); - - result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); - } - - ReleaseBuffer(buffer); - - relation_close(rel, AccessShareLock); - - PG_RETURN_DATUM(result); -} - /* -------------------------------------------------------- * pg_relpages() * diff --git a/contrib/pgstattuple/pgstattuple.sql.in b/contrib/pgstattuple/pgstattuple.sql.in index 39220f3536..77a5e2d4b2 100644 --- a/contrib/pgstattuple/pgstattuple.sql.in +++ b/contrib/pgstattuple/pgstattuple.sql.in @@ -44,62 +44,6 @@ RETURNS pgstatindex_type AS 'MODULE_PATHNAME', 'pgstatindex' LANGUAGE 'C' STRICT; --- --- bt_metap() --- -CREATE TYPE bt_metap_type AS ( - magic int4, - version int4, - root int4, - level int4, - fastroot int4, - fastlevel int4 -); - -CREATE OR REPLACE FUNCTION bt_metap(text) -RETURNS bt_metap_type -AS 'MODULE_PATHNAME', 'bt_metap' -LANGUAGE 'C' STRICT; - --- --- bt_page_stats() --- -CREATE TYPE bt_page_stats_type AS ( - blkno int4, - type char, - live_items int4, - dead_items int4, - avg_item_size float, - page_size int4, - free_size int4, - btpo_prev int4, - btpo_next int4, - btpo int4, - btpo_flags int4 -); - -CREATE OR REPLACE FUNCTION bt_page_stats(text, int4) -RETURNS bt_page_stats_type -AS 'MODULE_PATHNAME', 'bt_page_stats' -LANGUAGE 'C' STRICT; - --- --- bt_page_items() --- -CREATE TYPE bt_page_items_type AS ( - itemoffset int4, - ctid tid, - itemlen int4, - nulls bool, - vars bool, - data text -); - -CREATE OR REPLACE FUNCTION bt_page_items(text, int4) -RETURNS SETOF bt_page_items_type -AS 'MODULE_PATHNAME', 'bt_page_items' -LANGUAGE 'C' STRICT; - -- -- pg_relpages() -- diff --git a/contrib/pgstattuple/uninstall_pgstattuple.sql b/contrib/pgstattuple/uninstall_pgstattuple.sql index 5b857bb868..16f3d9aa32 100644 --- a/contrib/pgstattuple/uninstall_pgstattuple.sql +++ b/contrib/pgstattuple/uninstall_pgstattuple.sql @@ -8,13 +8,4 @@ DROP TYPE pgstattuple_type; DROP FUNCTION pgstatindex(text); DROP TYPE pgstatindex_type; -DROP FUNCTION bt_metap(text); -DROP TYPE bt_metap_type; - -DROP FUNCTION bt_page_stats(text, int4); -DROP TYPE bt_page_stats_type; - -DROP FUNCTION bt_page_items(text, int4); -DROP TYPE bt_page_items_type; - DROP FUNCTION pg_relpages(text);