diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 6de10d16a1..3c121b1bba 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -10,29 +10,34 @@ * looked up again. Now we use specialized access code so that the commit * log can be broken into relatively small, independent segments. * + * XLOG interactions: this module generates an XLOG record whenever a new + * CLOG page is initialized to zeroes. Other writes of CLOG come from + * recording of transaction commit or abort in xact.c, which generates its + * own XLOG records for these events and will re-perform the status update + * on redo; so we need make no additional XLOG entry here. Also, the XLOG + * is guaranteed flushed through the XLOG commit record before we are called + * to log a commit, so the WAL rule "write xlog before data" is satisfied + * automatically for commits, and we don't really care for aborts. Therefore, + * we don't need to mark CLOG pages with LSN information; we have enough + * synchronization already. + * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.22 2004/07/03 02:55:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.23 2004/08/23 23:22:44 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include -#include -#include -#include - #include "access/clog.h" #include "access/slru.h" -#include "miscadmin.h" -#include "storage/lwlock.h" +#include "postmaster/bgwriter.h" /* - * Defines for CLOG page and segment sizes. A page is the same BLCKSZ - * as is used everywhere else in Postgres. + * Defines for CLOG page sizes. A page is the same BLCKSZ as is used + * everywhere else in Postgres. * * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, * CLOG page numbering also wraps around at 0xFFFFFFFF/CLOG_XACTS_PER_PAGE, @@ -53,25 +58,11 @@ #define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE) -/*---------- - * Shared-memory data structures for CLOG control - * - * XLOG interactions: this module generates an XLOG record whenever a new - * CLOG page is initialized to zeroes. Other writes of CLOG come from - * recording of transaction commit or abort in xact.c, which generates its - * own XLOG records for these events and will re-perform the status update - * on redo; so we need make no additional XLOG entry here. Also, the XLOG - * is guaranteed flushed through the XLOG commit record before we are called - * to log a commit, so the WAL rule "write xlog before data" is satisfied - * automatically for commits, and we don't really care for aborts. Therefore, - * we don't need to mark CLOG pages with LSN information; we have enough - * synchronization already. - *---------- +/* + * Link to shared-memory data structures for CLOG control */ - - static SlruCtlData ClogCtlData; -static SlruCtl ClogCtl = &ClogCtlData; +#define ClogCtl (&ClogCtlData) static int ZeroCLOGPage(int pageno, bool writeXlog); @@ -91,6 +82,7 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; + int slotno; char *byteptr; char byteval; @@ -98,10 +90,10 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) status == TRANSACTION_STATUS_ABORTED || status == TRANSACTION_STATUS_SUB_COMMITTED); - LWLockAcquire(ClogCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); - byteptr = SimpleLruReadPage(ClogCtl, pageno, xid, true); - byteptr += byteno; + slotno = SimpleLruReadPage(ClogCtl, pageno, xid); + byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; /* Current state should be 0, subcommitted or target state */ Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 || @@ -114,9 +106,9 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) byteval |= (status << bshift); *byteptr = byteval; - /* ...->page_status[slotno] = SLRU_PAGE_DIRTY; already done */ + ClogCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY; - LWLockRelease(ClogCtl->ControlLock); + LWLockRelease(CLogControlLock); } /* @@ -131,17 +123,18 @@ TransactionIdGetStatus(TransactionId xid) int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; + int slotno; char *byteptr; XidStatus status; - LWLockAcquire(ClogCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); - byteptr = SimpleLruReadPage(ClogCtl, pageno, xid, false); - byteptr += byteno; + slotno = SimpleLruReadPage(ClogCtl, pageno, xid); + byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; - LWLockRelease(ClogCtl->ControlLock); + LWLockRelease(CLogControlLock); return status; } @@ -160,8 +153,8 @@ CLOGShmemSize(void) void CLOGShmemInit(void) { - SimpleLruInit(ClogCtl, "CLOG Ctl", "pg_clog"); ClogCtl->PagePrecedes = CLOGPagePrecedes; + SimpleLruInit(ClogCtl, "CLOG Ctl", CLogControlLock, "pg_clog"); } /* @@ -175,16 +168,16 @@ BootStrapCLOG(void) { int slotno; - LWLockAcquire(ClogCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); /* Create and zero the first page of the commit log */ slotno = ZeroCLOGPage(0, false); /* Make sure it's written out */ SimpleLruWritePage(ClogCtl, slotno, NULL); - /* Assert(ClogCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ + Assert(ClogCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN); - LWLockRelease(ClogCtl->ControlLock); + LWLockRelease(CLogControlLock); } /* @@ -199,7 +192,9 @@ BootStrapCLOG(void) static int ZeroCLOGPage(int pageno, bool writeXlog) { - int slotno = SimpleLruZeroPage(ClogCtl, pageno); + int slotno; + + slotno = SimpleLruZeroPage(ClogCtl, pageno); if (writeXlog) WriteZeroPageXlogRec(pageno); @@ -217,8 +212,7 @@ StartupCLOG(void) /* * Initialize our idea of the latest page number. */ - SimpleLruSetLatestPage(ClogCtl, - TransactionIdToPage(ShmemVariableCache->nextXid)); + ClogCtl->shared->latest_page_number = TransactionIdToPage(ShmemVariableCache->nextXid); } /* @@ -227,6 +221,7 @@ StartupCLOG(void) void ShutdownCLOG(void) { + /* Flush dirty CLOG pages to disk */ SimpleLruFlush(ClogCtl, false); } @@ -236,6 +231,7 @@ ShutdownCLOG(void) void CheckPointCLOG(void) { + /* Flush dirty CLOG pages to disk */ SimpleLruFlush(ClogCtl, true); } @@ -263,12 +259,12 @@ ExtendCLOG(TransactionId newestXact) pageno = TransactionIdToPage(newestXact); - LWLockAcquire(ClogCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); /* Zero the page and make an XLOG entry about it */ ZeroCLOGPage(pageno, true); - LWLockRelease(ClogCtl->ControlLock); + LWLockRelease(CLogControlLock); } @@ -296,6 +292,15 @@ TruncateCLOG(TransactionId oldestXact) * We pass the *page* containing oldestXact to SimpleLruTruncate. */ cutoffPage = TransactionIdToPage(oldestXact); + + /* Check to see if there's any files that could be removed */ + if (!SlruScanDirectory(ClogCtl, cutoffPage, false)) + return; /* nothing to remove */ + + /* Perform a CHECKPOINT */ + RequestCheckpoint(true); + + /* Now we can remove the old CLOG segment(s) */ SimpleLruTruncate(ClogCtl, cutoffPage); } @@ -340,20 +345,51 @@ WriteZeroPageXlogRec(int pageno) rdata.data = (char *) (&pageno); rdata.len = sizeof(int); rdata.next = NULL; - (void) XLogInsert(RM_SLRU_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata); + (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata); } -/* Redo a ZEROPAGE action during WAL replay */ +/* + * CLOG resource manager's routines + */ void -clog_zeropage_redo(int pageno) +clog_redo(XLogRecPtr lsn, XLogRecord *record) { - int slotno; + uint8 info = record->xl_info & ~XLR_INFO_MASK; - LWLockAcquire(ClogCtl->ControlLock, LW_EXCLUSIVE); + if (info == CLOG_ZEROPAGE) + { + int pageno; + int slotno; - slotno = ZeroCLOGPage(pageno, false); - SimpleLruWritePage(ClogCtl, slotno, NULL); - /* Assert(ClogCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ + memcpy(&pageno, XLogRecGetData(record), sizeof(int)); - LWLockRelease(ClogCtl->ControlLock); + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); + + slotno = ZeroCLOGPage(pageno, false); + SimpleLruWritePage(ClogCtl, slotno, NULL); + Assert(ClogCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN); + + LWLockRelease(CLogControlLock); + } +} + +void +clog_undo(XLogRecPtr lsn, XLogRecord *record) +{ +} + +void +clog_desc(char *buf, uint8 xl_info, char *rec) +{ + uint8 info = xl_info & ~XLR_INFO_MASK; + + if (info == CLOG_ZEROPAGE) + { + int pageno; + + memcpy(&pageno, rec, sizeof(int)); + sprintf(buf + strlen(buf), "zeropage: %d", pageno); + } + else + strcat(buf, "UNKNOWN"); } diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c index ad68e4c99b..575ad7a089 100644 --- a/src/backend/access/transam/rmgr.c +++ b/src/backend/access/transam/rmgr.c @@ -3,7 +3,7 @@ * * Resource managers definition * - * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.14 2004/07/21 22:31:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.15 2004/08/23 23:22:44 tgl Exp $ */ #include "postgres.h" @@ -12,7 +12,7 @@ #include "access/heapam.h" #include "access/nbtree.h" #include "access/rtree.h" -#include "access/slru.h" +#include "access/clog.h" #include "access/xact.h" #include "access/xlog_internal.h" #include "storage/smgr.h" @@ -23,7 +23,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = { {"XLOG", xlog_redo, xlog_undo, xlog_desc, NULL, NULL}, {"Transaction", xact_redo, xact_undo, xact_desc, NULL, NULL}, {"Storage", smgr_redo, smgr_undo, smgr_desc, NULL, NULL}, - {"SLRU", slru_redo, slru_undo, slru_desc, NULL, NULL}, + {"CLOG", clog_redo, clog_undo, clog_desc, NULL, NULL}, {"Reserved 4", NULL, NULL, NULL, NULL, NULL}, {"Reserved 5", NULL, NULL, NULL, NULL, NULL}, {"Reserved 6", NULL, NULL, NULL, NULL, NULL}, diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index d45a7d9f61..5d51f69a53 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -3,49 +3,6 @@ * slru.c * Simple LRU buffering for transaction status logfiles * - * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.18 2004/07/21 22:31:20 tgl Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include -#include -#include - -#include "access/clog.h" -#include "access/slru.h" -#include "access/subtrans.h" -#include "postmaster/bgwriter.h" -#include "storage/fd.h" -#include "storage/lwlock.h" -#include "storage/shmem.h" -#include "miscadmin.h" - - -/* - * Define segment size. A page is the same BLCKSZ as is used everywhere - * else in Postgres. The segment size can be chosen somewhat arbitrarily; - * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG - * or 64K transactions for SUBTRANS. - * - * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, - * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where - * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at - * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need - * take no explicit notice of that fact in this module, except when comparing - * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). - */ - -#define SLRU_PAGES_PER_SEGMENT 32 - - -/*---------- - * Shared-memory data structures for SLRU control - * * We use a simple least-recently-used scheme to manage a pool of page * buffers. Under ordinary circumstances we expect that write * traffic will occur mostly to the latest page (and to the just-prior @@ -86,44 +43,46 @@ * to re-dirty a page that is currently being written out. This is handled * by setting the page's state from WRITE_IN_PROGRESS to DIRTY. The writing * process must notice this and not mark the page CLEAN when it's done. - *---------- + * + * + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.19 2004/08/23 23:22:44 tgl Exp $ + * + *------------------------------------------------------------------------- */ +#include "postgres.h" + +#include +#include +#include + +#include "access/slru.h" +#include "access/xlog.h" +#include "storage/fd.h" +#include "storage/shmem.h" +#include "miscadmin.h" -typedef enum -{ - SLRU_PAGE_EMPTY, /* buffer is not in use */ - SLRU_PAGE_READ_IN_PROGRESS, /* page is being read in */ - SLRU_PAGE_CLEAN, /* page is valid and not dirty */ - SLRU_PAGE_DIRTY, /* page is valid but needs write */ - SLRU_PAGE_WRITE_IN_PROGRESS /* page is being written out */ -} SlruPageStatus; /* - * Shared-memory state + * Define segment size. A page is the same BLCKSZ as is used everywhere + * else in Postgres. The segment size can be chosen somewhat arbitrarily; + * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG + * or 64K transactions for SUBTRANS. + * + * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, + * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where + * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at + * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need + * take no explicit notice of that fact in this module, except when comparing + * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). + * + * Note: this file currently assumes that segment file names will be four + * hex digits. This sets a lower bound on the segment size (64K transactions + * for 32-bit TransactionIds). */ -typedef struct SlruSharedData -{ - LWLockId ControlLock; - - /* - * Info for each buffer slot. Page number is undefined when status is - * EMPTY. lru_count is essentially the number of page switches since - * last use of this page; the page with highest lru_count is the best - * candidate to replace. - */ - char *page_buffer[NUM_CLOG_BUFFERS]; - SlruPageStatus page_status[NUM_CLOG_BUFFERS]; - int page_number[NUM_CLOG_BUFFERS]; - unsigned int page_lru_count[NUM_CLOG_BUFFERS]; - LWLockId BufferLocks[NUM_CLOG_BUFFERS]; /* Per-buffer I/O locks */ - - /* - * latest_page_number is the page number of the current end of the - * CLOG; this is not critical data, since we use it only to avoid - * swapping out the latest page. - */ - int latest_page_number; -} SlruSharedData; +#define SLRU_PAGES_PER_SEGMENT 32 #define SlruFileName(ctl, path, seg) \ snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) @@ -138,8 +97,8 @@ typedef struct SlruSharedData typedef struct SlruFlushData { int num_files; /* # files actually open */ - int fd[NUM_CLOG_BUFFERS]; /* their FD's */ - int segno[NUM_CLOG_BUFFERS]; /* their clog seg#s */ + int fd[NUM_SLRU_BUFFERS]; /* their FD's */ + int segno[NUM_SLRU_BUFFERS]; /* their log seg#s */ } SlruFlushData; /* @@ -149,7 +108,7 @@ typedef struct SlruFlushData do { \ if ((shared)->page_lru_count[slotno] != 0) { \ int iilru; \ - for (iilru = 0; iilru < NUM_CLOG_BUFFERS; iilru++) \ + for (iilru = 0; iilru < NUM_SLRU_BUFFERS; iilru++) \ (shared)->page_lru_count[iilru]++; \ (shared)->page_lru_count[slotno] = 0; \ } \ @@ -176,7 +135,6 @@ static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata); static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid); static int SlruSelectLRUPage(SlruCtl ctl, int pageno); -static bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions); /* @@ -186,11 +144,12 @@ static bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions); int SimpleLruShmemSize(void) { - return MAXALIGN(sizeof(SlruSharedData)) + BLCKSZ * NUM_CLOG_BUFFERS; + return BUFFERALIGN(sizeof(SlruSharedData)) + BLCKSZ * NUM_SLRU_BUFFERS; } void -SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir) +SimpleLruInit(SlruCtl ctl, const char *name, + LWLockId ctllock, const char *subdir) { SlruShared shared; bool found; @@ -207,16 +166,16 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir) memset(shared, 0, sizeof(SlruSharedData)); - shared->ControlLock = LWLockAssign(); + shared->ControlLock = ctllock; - bufptr = (char *) shared + MAXALIGN(sizeof(SlruSharedData)); + bufptr = (char *) shared + BUFFERALIGN(sizeof(SlruSharedData)); - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { shared->page_buffer[slotno] = bufptr; shared->page_status[slotno] = SLRU_PAGE_EMPTY; shared->page_lru_count[slotno] = 1; - shared->BufferLocks[slotno] = LWLockAssign(); + shared->buffer_locks[slotno] = LWLockAssign(); bufptr += BLCKSZ; } @@ -225,11 +184,12 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir) else Assert(found); - /* Initialize the unshared control struct */ + /* + * Initialize the unshared control struct, including directory path. + * We assume caller set PagePrecedes. + */ ctl->shared = shared; - ctl->ControlLock = shared->ControlLock; - - /* Initialize unshared copy of directory path */ + ctl->do_fsync = true; /* default behavior */ snprintf(ctl->Dir, MAXPGPATH, "%s/%s", DataDir, subdir); } @@ -244,8 +204,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir) int SimpleLruZeroPage(SlruCtl ctl, int pageno) { - int slotno; SlruShared shared = ctl->shared; + int slotno; /* Find a suitable buffer slot for the page */ slotno = SlruSelectLRUPage(ctl, pageno); @@ -274,14 +234,13 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno) * The passed-in xid is used only for error reporting, and may be * InvalidTransactionId if no specific xid is associated with the action. * - * Return value is the shared-buffer address of the page. + * Return value is the shared-buffer slot number now holding the page. * The buffer's LRU access info is updated. - * If forwrite is true, the buffer is marked as dirty. * * Control lock must be held at entry, and will be held at exit. */ -char * -SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) +int +SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid) { SlruShared shared = ctl->shared; @@ -303,9 +262,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) { /* otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); - if (forwrite) - shared->page_status[slotno] = SLRU_PAGE_DIRTY; - return shared->page_buffer[slotno]; + return slotno; } } else @@ -327,7 +284,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) /* Release shared lock, grab per-buffer lock instead */ LWLockRelease(shared->ControlLock); - LWLockAcquire(shared->BufferLocks[slotno], LW_EXCLUSIVE); + LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); /* * Check to see if someone else already did the read, or took the @@ -336,7 +293,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) if (shared->page_number[slotno] != pageno || shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS) { - LWLockRelease(shared->BufferLocks[slotno]); + LWLockRelease(shared->buffer_locks[slotno]); LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); continue; } @@ -352,16 +309,14 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) shared->page_status[slotno] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_EMPTY; - LWLockRelease(shared->BufferLocks[slotno]); + LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, xid); SlruRecentlyUsed(shared, slotno); - if (forwrite) - shared->page_status[slotno] = SLRU_PAGE_DIRTY; - return shared->page_buffer[slotno]; + return slotno; } } @@ -379,9 +334,9 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite) void SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) { + SlruShared shared = ctl->shared; int pageno; bool ok; - SlruShared shared = ctl->shared; /* Do nothing if page does not need writing */ if (shared->page_status[slotno] != SLRU_PAGE_DIRTY && @@ -392,7 +347,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) /* Release shared lock, grab per-buffer lock instead */ LWLockRelease(shared->ControlLock); - LWLockAcquire(shared->BufferLocks[slotno], LW_EXCLUSIVE); + LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); /* * Check to see if someone else already did the write, or took the @@ -405,7 +360,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) (shared->page_status[slotno] != SLRU_PAGE_DIRTY && shared->page_status[slotno] != SLRU_PAGE_WRITE_IN_PROGRESS)) { - LWLockRelease(shared->BufferLocks[slotno]); + LWLockRelease(shared->buffer_locks[slotno]); LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); return; } @@ -447,7 +402,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) if (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS) shared->page_status[slotno] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_DIRTY; - LWLockRelease(shared->BufferLocks[slotno]); + LWLockRelease(shared->buffer_locks[slotno]); /* Now it's okay to ereport if we failed */ if (!ok) @@ -640,7 +595,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) */ if (!fdata) { - if (pg_fsync(fd)) + if (ctl->do_fsync && pg_fsync(fd)) { slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; @@ -758,7 +713,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) unsigned int bestcount = 0; /* See if page already has a buffer assigned */ - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { if (shared->page_number[slotno] == pageno && shared->page_status[slotno] != SLRU_PAGE_EMPTY) @@ -769,7 +724,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) * If we find any EMPTY slot, just select that one. Else locate * the least-recently-used slot that isn't the latest page. */ - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) return slotno; @@ -795,7 +750,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) */ if (shared->page_status[bestslot] == SLRU_PAGE_READ_IN_PROGRESS) (void) SimpleLruReadPage(ctl, shared->page_number[bestslot], - InvalidTransactionId, false); + InvalidTransactionId); else SimpleLruWritePage(ctl, bestslot, NULL); @@ -808,18 +763,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) } /* - * This must be called ONCE during postmaster or standalone-backend startup - */ -void -SimpleLruSetLatestPage(SlruCtl ctl, int pageno) -{ - SlruShared shared = ctl->shared; - - shared->latest_page_number = pageno; -} - -/* - * This is called during checkpoint and postmaster/standalone-backend shutdown + * Flush dirty pages to disk during checkpoint or database shutdown */ void SimpleLruFlush(SlruCtl ctl, bool checkpoint) @@ -831,11 +775,14 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint) int i; bool ok; + /* + * Find and write dirty pages + */ fdata.num_files = 0; LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { SimpleLruWritePage(ctl, slotno, &fdata); @@ -857,7 +804,7 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint) ok = true; for (i = 0; i < fdata.num_files; i++) { - if (pg_fsync(fdata.fd[i])) + if (ctl->do_fsync && pg_fsync(fdata.fd[i])) { slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; @@ -879,40 +826,23 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint) /* * Remove all segments before the one holding the passed page number - * - * When this is called, we know that the database logically contains no - * reference to transaction IDs older than oldestXact. However, we must - * not remove any segment until we have performed a checkpoint, to ensure - * that no such references remain on disk either; else a crash just after - * the truncation might leave us with a problem. Since CLOG segments hold - * a large number of transactions, the opportunity to actually remove a - * segment is fairly rare, and so it seems best not to do the checkpoint - * unless we have confirmed that there is a removable segment. Therefore - * we issue the checkpoint command here, not in higher-level code as might - * seem cleaner. */ void SimpleLruTruncate(SlruCtl ctl, int cutoffPage) { - int slotno; SlruShared shared = ctl->shared; + int slotno; /* * The cutoff point is the start of the segment containing cutoffPage. */ cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - if (!SlruScanDirectory(ctl, cutoffPage, false)) - return; /* nothing to remove */ - - /* Perform a CHECKPOINT */ - RequestCheckpoint(true); - /* * Scan shared memory and remove any pages preceding the cutoff page, - * to ensure we won't rewrite them later. (Any dirty pages should - * have been flushed already during the checkpoint, we're just being - * extra careful here.) + * to ensure we won't rewrite them later. (Since this is normally + * called in or just after a checkpoint, any dirty pages should + * have been flushed already ... we're just being extra careful here.) */ LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); @@ -933,7 +863,7 @@ restart:; return; } - for (slotno = 0; slotno < NUM_CLOG_BUFFERS; slotno++) + for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++) { if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) continue; @@ -956,7 +886,7 @@ restart:; */ if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS) (void) SimpleLruReadPage(ctl, shared->page_number[slotno], - InvalidTransactionId, false); + InvalidTransactionId); else SimpleLruWritePage(ctl, slotno, NULL); goto restart; @@ -969,11 +899,13 @@ restart:; } /* - * SlruTruncate subroutine: scan directory for removable segments. + * SimpleLruTruncate subroutine: scan directory for removable segments. * Actually remove them iff doDeletions is true. Return TRUE iff any * removable segments were found. Note: no locking is needed. + * + * This can be called directly from clog.c, for reasons explained there. */ -static bool +bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) { bool found = false; @@ -983,6 +915,13 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) int segpage; char path[MAXPGPATH]; + /* + * The cutoff point is the start of the segment containing cutoffPage. + * (This is redundant when called from SimpleLruTruncate, but not when + * called directly from clog.c.) + */ + cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; + cldir = AllocateDir(ctl->Dir); if (cldir == NULL) ereport(ERROR, @@ -1003,10 +942,9 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) found = true; if (doDeletions) { - ereport(LOG, - (errmsg("removing file \"%s/%s\"", - ctl->Dir, clde->d_name))); snprintf(path, MAXPGPATH, "%s/%s", ctl->Dir, clde->d_name); + ereport(LOG, + (errmsg("removing file \"%s\"", path))); unlink(path); } } @@ -1027,55 +965,3 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions) return found; } - -/* - * SLRU resource manager's routines - */ -void -slru_redo(XLogRecPtr lsn, XLogRecord *record) -{ - uint8 info = record->xl_info & ~XLR_INFO_MASK; - int pageno; - - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); - - switch (info) - { - case CLOG_ZEROPAGE: - clog_zeropage_redo(pageno); - break; - case SUBTRANS_ZEROPAGE: - subtrans_zeropage_redo(pageno); - break; - default: - elog(PANIC, "slru_redo: unknown op code %u", info); - } -} - -void -slru_undo(XLogRecPtr lsn, XLogRecord *record) -{ -} - -void -slru_desc(char *buf, uint8 xl_info, char *rec) -{ - uint8 info = xl_info & ~XLR_INFO_MASK; - - if (info == CLOG_ZEROPAGE) - { - int pageno; - - memcpy(&pageno, rec, sizeof(int)); - sprintf(buf + strlen(buf), "clog zeropage: %d", pageno); - } - else if (info == SUBTRANS_ZEROPAGE) - { - int pageno; - - memcpy(&pageno, rec, sizeof(int)); - sprintf(buf + strlen(buf), "subtrans zeropage: %d", pageno); - } - else - strcat(buf, "UNKNOWN"); -} diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index ace1bb1434..539dee9875 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -1,48 +1,49 @@ /*------------------------------------------------------------------------- * * subtrans.c - * PostgreSQL subtrans-log manager + * PostgreSQL subtransaction-log manager * - * The pg_subtrans manager is a pg_clog-like manager which stores the parent + * The pg_subtrans manager is a pg_clog-like manager that stores the parent * transaction Id for each transaction. It is a fundamental part of the * nested transactions implementation. A main transaction has a parent * of InvalidTransactionId, and each subtransaction has its immediate parent. * The tree can easily be walked from child to parent, but not in the * opposite direction. * - * This code is mostly derived from clog.c. + * This code is based on clog.c, but the robustness requirements + * are completely different from pg_clog, because we only need to remember + * pg_subtrans information for currently-open transactions. Thus, there is + * no need to preserve data over a crash and restart. + * + * There are no XLOG interactions since we do not care about preserving + * data across crashes. During database startup, we simply force the + * currently-active page of SUBTRANS to zeroes. * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.2 2004/08/22 02:41:57 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.3 2004/08/23 23:22:44 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include -#include -#include -#include - #include "access/slru.h" #include "access/subtrans.h" -#include "miscadmin.h" -#include "storage/lwlock.h" +#include "storage/sinval.h" #include "utils/tqual.h" /* - * Defines for SubTrans page and segment sizes. A page is the same BLCKSZ - * as is used everywhere else in Postgres. + * Defines for SubTrans page sizes. A page is the same BLCKSZ as is used + * everywhere else in Postgres. * * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, * SubTrans page numbering also wraps around at * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE, and segment numbering at * 0xFFFFFFFF/SUBTRANS_XACTS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no * explicit notice of that fact in this module, except when comparing segment - * and page numbers in TruncateSubTrans (see SubTransPagePrecedes). + * and page numbers in TruncateSUBTRANS (see SubTransPagePrecedes). */ /* We need four bytes per xact */ @@ -52,30 +53,15 @@ #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE) -/*---------- - * Shared-memory data structures for SUBTRANS control - * - * XLOG interactions: this module generates an XLOG record whenever a new - * SUBTRANS page is initialized to zeroes. Other writes of SUBTRANS come from - * recording of transaction commit or abort in xact.c, which generates its - * own XLOG records for these events and will re-perform the status update - * on redo; so we need make no additional XLOG entry here. Also, the XLOG - * is guaranteed flushed through the XLOG commit record before we are called - * to log a commit, so the WAL rule "write xlog before data" is satisfied - * automatically for commits, and we don't really care for aborts. Therefore, - * we don't need to mark SUBTRANS pages with LSN information; we have enough - * synchronization already. - *---------- +/* + * Link to shared-memory data structures for SUBTRANS control */ - - static SlruCtlData SubTransCtlData; -static SlruCtl SubTransCtl = &SubTransCtlData; +#define SubTransCtl (&SubTransCtlData) -static int ZeroSUBTRANSPage(int pageno, bool writeXlog); +static int ZeroSUBTRANSPage(int pageno); static bool SubTransPagePrecedes(int page1, int page2); -static void WriteZeroPageXlogRec(int pageno); /* @@ -86,21 +72,23 @@ SubTransSetParent(TransactionId xid, TransactionId parent) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); + int slotno; TransactionId *ptr; - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); - ptr = (TransactionId *) SimpleLruReadPage(SubTransCtl, pageno, xid, true); + slotno = SimpleLruReadPage(SubTransCtl, pageno, xid); + ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; - /* Current state should be 0 or target state */ - Assert(*ptr == InvalidTransactionId || *ptr == parent); + /* Current state should be 0 */ + Assert(*ptr == InvalidTransactionId); *ptr = parent; - /* ...->page_status[slotno] = SLRU_PAGE_DIRTY; already done */ + SubTransCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY; - LWLockRelease(SubTransCtl->ControlLock); + LWLockRelease(SubtransControlLock); } /* @@ -111,6 +99,7 @@ SubTransGetParent(TransactionId xid) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); + int slotno; TransactionId *ptr; TransactionId parent; @@ -121,14 +110,15 @@ SubTransGetParent(TransactionId xid) if (!TransactionIdIsNormal(xid)) return InvalidTransactionId; - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); - ptr = (TransactionId *) SimpleLruReadPage(SubTransCtl, pageno, xid, false); + slotno = SimpleLruReadPage(SubTransCtl, pageno, xid); + ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; parent = *ptr; - LWLockRelease(SubTransCtl->ControlLock); + LWLockRelease(SubtransControlLock); return parent; } @@ -169,7 +159,7 @@ SubTransGetTopmostTransaction(TransactionId xid) /* - * Initialization of shared memory for Subtrans + * Initialization of shared memory for SUBTRANS */ int @@ -181,36 +171,42 @@ SUBTRANSShmemSize(void) void SUBTRANSShmemInit(void) { - SimpleLruInit(SubTransCtl, "SUBTRANS Ctl", "pg_subtrans"); SubTransCtl->PagePrecedes = SubTransPagePrecedes; + SimpleLruInit(SubTransCtl, "SUBTRANS Ctl", + SubtransControlLock, "pg_subtrans"); + /* Override default assumption that writes should be fsync'd */ + SubTransCtl->do_fsync = false; } /* * This func must be called ONCE on system install. It creates - * the initial SubTrans segment. (The SubTrans directory is assumed to - * have been created by initdb, and SubTransShmemInit must have been called - * already.) + * the initial SUBTRANS segment. (The SUBTRANS directory is assumed to + * have been created by the initdb shell script, and SUBTRANSShmemInit + * must have been called already.) + * + * Note: it's not really necessary to create the initial segment now, + * since slru.c would create it on first write anyway. But we may as well + * do it to be sure the directory is set up correctly. */ void BootStrapSUBTRANS(void) { int slotno; - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); - /* Create and zero the first page of the commit log */ - slotno = ZeroSUBTRANSPage(0, false); + /* Create and zero the first page of the subtrans log */ + slotno = ZeroSUBTRANSPage(0); /* Make sure it's written out */ SimpleLruWritePage(SubTransCtl, slotno, NULL); - /* Assert(SubTransCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ + Assert(SubTransCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN); - LWLockRelease(SubTransCtl->ControlLock); + LWLockRelease(SubtransControlLock); } /* - * Initialize (or reinitialize) a page of SubTrans to zeroes. - * If writeXlog is TRUE, also emit an XLOG record saying we did this. + * Initialize (or reinitialize) a page of SUBTRANS to zeroes. * * The page is not actually written, just set up in shared memory. * The slot number of the new page is returned. @@ -218,14 +214,9 @@ BootStrapSUBTRANS(void) * Control lock must be held at entry, and will be held at exit. */ static int -ZeroSUBTRANSPage(int pageno, bool writeXlog) +ZeroSUBTRANSPage(int pageno) { - int slotno = SimpleLruZeroPage(SubTransCtl, pageno); - - if (writeXlog) - WriteZeroPageXlogRec(pageno); - - return slotno; + return SimpleLruZeroPage(SubTransCtl, pageno); } /* @@ -235,11 +226,20 @@ ZeroSUBTRANSPage(int pageno, bool writeXlog) void StartupSUBTRANS(void) { + int startPage; + /* - * Initialize our idea of the latest page number. + * Since we don't expect pg_subtrans to be valid across crashes, + * we initialize the currently-active page to zeroes during startup. + * Whenever we advance into a new page, ExtendSUBTRANS will likewise + * zero the new page without regard to whatever was previously on disk. */ - SimpleLruSetLatestPage(SubTransCtl, - TransactionIdToPage(ShmemVariableCache->nextXid)); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); + + startPage = TransactionIdToPage(ShmemVariableCache->nextXid); + (void) ZeroSUBTRANSPage(startPage); + + LWLockRelease(SubtransControlLock); } /* @@ -248,6 +248,12 @@ StartupSUBTRANS(void) void ShutdownSUBTRANS(void) { + /* + * Flush dirty SUBTRANS pages to disk + * + * This is not actually necessary from a correctness point of view. + * We do it merely as a debugging aid. + */ SimpleLruFlush(SubTransCtl, false); } @@ -257,16 +263,23 @@ ShutdownSUBTRANS(void) void CheckPointSUBTRANS(void) { + /* + * Flush dirty SUBTRANS pages to disk + * + * This is not actually necessary from a correctness point of view. + * We do it merely to improve the odds that writing of dirty pages is done + * by the checkpoint process and not by backends. + */ SimpleLruFlush(SubTransCtl, true); } /* - * Make sure that SubTrans has room for a newly-allocated XID. + * Make sure that SUBTRANS has room for a newly-allocated XID. * * NB: this is called while holding XidGenLock. We want it to be very fast * most of the time; even when it's not so fast, no actual I/O need happen - * unless we're forced to write out a dirty subtrans or xlog page to make room + * unless we're forced to write out a dirty subtrans page to make room * in shared memory. */ void @@ -284,28 +297,20 @@ ExtendSUBTRANS(TransactionId newestXact) pageno = TransactionIdToPage(newestXact); - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); + LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); - /* Zero the page and make an XLOG entry about it */ - ZeroSUBTRANSPage(pageno, true); + /* Zero the page */ + ZeroSUBTRANSPage(pageno); - LWLockRelease(SubTransCtl->ControlLock); + LWLockRelease(SubtransControlLock); } /* - * Remove all SubTrans segments before the one holding the passed transaction ID + * Remove all SUBTRANS segments before the one holding the passed transaction ID * - * When this is called, we know that the database logically contains no - * reference to transaction IDs older than oldestXact. However, we must - * not truncate the SubTrans until we have performed a checkpoint, to ensure - * that no such references remain on disk either; else a crash just after - * the truncation might leave us with a problem. Since SubTrans segments hold - * a large number of transactions, the opportunity to actually remove a - * segment is fairly rare, and so it seems best not to do the checkpoint - * unless we have confirmed that there is a removable segment. Therefore - * we issue the checkpoint command here, not in higher-level code as might - * seem cleaner. + * This is normally called during checkpoint, with oldestXact being the + * oldest XMIN of any running transaction. */ void TruncateSUBTRANS(TransactionId oldestXact) @@ -317,12 +322,13 @@ TruncateSUBTRANS(TransactionId oldestXact) * We pass the *page* containing oldestXact to SimpleLruTruncate. */ cutoffPage = TransactionIdToPage(oldestXact); + SimpleLruTruncate(SubTransCtl, cutoffPage); } /* - * Decide which of two SubTrans page numbers is "older" for truncation purposes. + * Decide which of two SUBTRANS page numbers is "older" for truncation purposes. * * We need to use comparison of TransactionIds here in order to do the right * thing with wraparound XID arithmetic. However, if we are asked about @@ -343,38 +349,3 @@ SubTransPagePrecedes(int page1, int page2) return TransactionIdPrecedes(xid1, xid2); } - - -/* - * Write a ZEROPAGE xlog record - * - * Note: xlog record is marked as outside transaction control, since we - * want it to be redone whether the invoking transaction commits or not. - * (Besides which, this is normally done just before entering a transaction.) - */ -static void -WriteZeroPageXlogRec(int pageno) -{ - XLogRecData rdata; - - rdata.buffer = InvalidBuffer; - rdata.data = (char *) (&pageno); - rdata.len = sizeof(int); - rdata.next = NULL; - (void) XLogInsert(RM_SLRU_ID, SUBTRANS_ZEROPAGE | XLOG_NO_TRAN, &rdata); -} - -/* Redo a ZEROPAGE action during WAL replay */ -void -subtrans_zeropage_redo(int pageno) -{ - int slotno; - - LWLockAcquire(SubTransCtl->ControlLock, LW_EXCLUSIVE); - - slotno = ZeroSUBTRANSPage(pageno, false); - SimpleLruWritePage(SubTransCtl, slotno, NULL); - /* Assert(SubTransCtl->page_status[slotno] == SLRU_PAGE_CLEAN); */ - - LWLockRelease(SubTransCtl->ControlLock); -} diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b9d0398b62..5c07795c8a 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.162 2004/08/12 19:03:23 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.163 2004/08/23 23:22:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -4918,6 +4918,14 @@ CreateCheckPoint(bool shutdown, bool force) if (!shutdown) PreallocXlogFiles(recptr); + /* + * Truncate pg_subtrans if possible. We can throw away all data before + * the oldest XMIN of any running transaction. No future transaction will + * attempt to reference any pg_subtrans entry older than that (see Asserts + * in subtrans.c). + */ + TruncateSUBTRANS(GetOldestXmin(true)); + LWLockRelease(CheckpointLock); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index aa2708fcab..fc80f8efcf 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.286 2004/08/06 04:15:07 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.287 2004/08/23 23:22:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -810,9 +810,8 @@ vac_truncate_clog(TransactionId vacuumXID, TransactionId frozenXID) return; } - /* Truncate CLOG and SUBTRANS to the oldest vacuumxid */ + /* Truncate CLOG to the oldest vacuumxid */ TruncateCLOG(vacuumXID); - TruncateSUBTRANS(vacuumXID); /* Give warning about impending wraparound problems */ if (frozenAlreadyWrapped) diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index f28a883572..dd9ca8244f 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/sinval.c,v 1.69 2004/08/22 02:41:57 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/sinval.c,v 1.70 2004/08/23 23:22:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -661,6 +661,9 @@ result_known: * FALSE is sufficient for non-shared relations, since only backends in my * own database could ever see the tuples in them. * + * This is also used to determine where to truncate pg_subtrans. allDbs + * must be TRUE for that case. + * * Note: we include the currently running xids in the set of considered xids. * This ensures that if a just-started xact has not yet set its snapshot, * when it does set the snapshot it cannot set xmin less than what we compute. @@ -673,7 +676,17 @@ GetOldestXmin(bool allDbs) TransactionId result; int index; - result = GetTopTransactionId(); + /* + * Normally we start the min() calculation with our own XID. But + * if called by checkpointer, we will not be inside a transaction, + * so use next XID as starting point for min() calculation. (Note + * that if there are no xacts running at all, that will be the subtrans + * truncation point!) + */ + if (IsTransactionState()) + result = GetTopTransactionId(); + else + result = ReadNewTransactionId(); LWLockAcquire(SInvalLock, LW_SHARED); diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index e48531c10a..f3ee1173a5 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -15,14 +15,13 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.21 2004/07/01 00:50:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.22 2004/08/23 23:22:45 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "access/clog.h" -#include "access/subtrans.h" +#include "access/slru.h" #include "storage/lwlock.h" #include "storage/proc.h" #include "storage/spin.h" @@ -109,11 +108,11 @@ NumLWLocks(void) /* bufmgr.c needs two for each shared buffer */ numLocks += 2 * NBuffers; - /* clog.c needs one per CLOG buffer + one control lock */ - numLocks += NUM_CLOG_BUFFERS + 1; + /* clog.c needs one per CLOG buffer */ + numLocks += NUM_SLRU_BUFFERS; - /* subtrans.c needs one per SubTrans buffer + one control lock */ - numLocks += NUM_SUBTRANS_BUFFERS + 1; + /* subtrans.c needs one per SubTrans buffer */ + numLocks += NUM_SLRU_BUFFERS; /* Perhaps create a few more for use by user-defined modules? */ diff --git a/src/include/access/clog.h b/src/include/access/clog.h index 2df1cedc1c..0b8fa12075 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/clog.h,v 1.9 2004/07/01 00:51:38 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/clog.h,v 1.10 2004/08/23 23:22:45 tgl Exp $ */ #ifndef CLOG_H #define CLOG_H @@ -27,9 +27,6 @@ typedef int XidStatus; #define TRANSACTION_STATUS_ABORTED 0x02 #define TRANSACTION_STATUS_SUB_COMMITTED 0x03 -/* exported because lwlock.c needs it */ -#define NUM_CLOG_BUFFERS 8 - extern void TransactionIdSetStatus(TransactionId xid, XidStatus status); extern XidStatus TransactionIdGetStatus(TransactionId xid); @@ -42,6 +39,12 @@ extern void ShutdownCLOG(void); extern void CheckPointCLOG(void); extern void ExtendCLOG(TransactionId newestXact); extern void TruncateCLOG(TransactionId oldestXact); -extern void clog_zeropage_redo(int pageno); + +/* XLOG stuff */ +#define CLOG_ZEROPAGE 0x00 + +extern void clog_redo(XLogRecPtr lsn, XLogRecord *record); +extern void clog_undo(XLogRecPtr lsn, XLogRecord *record); +extern void clog_desc(char *buf, uint8 xl_info, char *rec); #endif /* CLOG_H */ diff --git a/src/include/access/rmgr.h b/src/include/access/rmgr.h index 7ea3134031..d43f6fdcaf 100644 --- a/src/include/access/rmgr.h +++ b/src/include/access/rmgr.h @@ -3,7 +3,7 @@ * * Resource managers definition * - * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.11 2004/07/01 00:51:38 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/rmgr.h,v 1.12 2004/08/23 23:22:45 tgl Exp $ */ #ifndef RMGR_H #define RMGR_H @@ -16,7 +16,7 @@ typedef uint8 RmgrId; #define RM_XLOG_ID 0 #define RM_XACT_ID 1 #define RM_SMGR_ID 2 -#define RM_SLRU_ID 3 +#define RM_CLOG_ID 3 #define RM_HEAP_ID 10 #define RM_BTREE_ID 11 #define RM_HASH_ID 12 diff --git a/src/include/access/slru.h b/src/include/access/slru.h index e3245fac65..79abb2899c 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -1,23 +1,66 @@ -/* +/*------------------------------------------------------------------------- + * * slru.h + * Simple LRU buffering for transaction status logfiles * - * Simple LRU - * - * Portions Copyright (c) 2003, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/slru.h,v 1.7 2004/07/01 00:51:38 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/slru.h,v 1.8 2004/08/23 23:22:45 tgl Exp $ + * + *------------------------------------------------------------------------- */ #ifndef SLRU_H #define SLRU_H -#include "access/xlog.h" #include "storage/lwlock.h" -/* Opaque structs known only in slru.c */ -typedef struct SlruSharedData *SlruShared; -typedef struct SlruFlushData *SlruFlush; +/* + * Number of page buffers. Ideally this could be different for CLOG and + * SUBTRANS, but the benefit doesn't seem to be worth any additional + * notational cruft. + */ +#define NUM_SLRU_BUFFERS 8 + +/* Page status codes */ +typedef enum +{ + SLRU_PAGE_EMPTY, /* buffer is not in use */ + SLRU_PAGE_READ_IN_PROGRESS, /* page is being read in */ + SLRU_PAGE_CLEAN, /* page is valid and not dirty */ + SLRU_PAGE_DIRTY, /* page is valid but needs write */ + SLRU_PAGE_WRITE_IN_PROGRESS /* page is being written out */ +} SlruPageStatus; + +/* + * Shared-memory state + */ +typedef struct SlruSharedData +{ + LWLockId ControlLock; + + /* + * Info for each buffer slot. Page number is undefined when status is + * EMPTY. lru_count is essentially the number of page switches since + * last use of this page; the page with highest lru_count is the best + * candidate to replace. + */ + char *page_buffer[NUM_SLRU_BUFFERS]; + SlruPageStatus page_status[NUM_SLRU_BUFFERS]; + int page_number[NUM_SLRU_BUFFERS]; + unsigned int page_lru_count[NUM_SLRU_BUFFERS]; + LWLockId buffer_locks[NUM_SLRU_BUFFERS]; + + /* + * latest_page_number is the page number of the current end of the + * log; this is not critical data, since we use it only to avoid + * swapping out the latest page. + */ + int latest_page_number; +} SlruSharedData; + +typedef SlruSharedData *SlruShared; /* * SlruCtlData is an unshared structure that points to the active information @@ -27,13 +70,11 @@ typedef struct SlruCtlData { SlruShared shared; - LWLockId ControlLock; - /* - * Dir is set during SimpleLruInit and does not change thereafter. - * Since it's always the same, it doesn't need to be in shared memory. + * This flag tells whether to fsync writes (true for pg_clog, + * false for pg_subtrans). */ - char Dir[MAXPGPATH]; + bool do_fsync; /* * Decide which of two page numbers is "older" for truncation purposes. @@ -42,27 +83,27 @@ typedef struct SlruCtlData */ bool (*PagePrecedes) (int, int); + /* + * Dir is set during SimpleLruInit and does not change thereafter. + * Since it's always the same, it doesn't need to be in shared memory. + */ + char Dir[MAXPGPATH]; } SlruCtlData; typedef SlruCtlData *SlruCtl; +/* Opaque struct known only in slru.c */ +typedef struct SlruFlushData *SlruFlush; + extern int SimpleLruShmemSize(void); -extern void SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir); +extern void SimpleLruInit(SlruCtl ctl, const char *name, + LWLockId ctllock, const char *subdir); extern int SimpleLruZeroPage(SlruCtl ctl, int pageno); -extern char *SimpleLruReadPage(SlruCtl ctl, int pageno, - TransactionId xid, bool forwrite); +extern int SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid); extern void SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata); -extern void SimpleLruSetLatestPage(SlruCtl ctl, int pageno); extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint); extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); - -/* XLOG stuff */ -#define CLOG_ZEROPAGE 0x00 -#define SUBTRANS_ZEROPAGE 0x10 - -extern void slru_redo(XLogRecPtr lsn, XLogRecord *record); -extern void slru_undo(XLogRecPtr lsn, XLogRecord *record); -extern void slru_desc(char *buf, uint8 xl_info, char *rec); +extern bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions); #endif /* SLRU_H */ diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h index bf6cec64ec..28a16fbee5 100644 --- a/src/include/access/subtrans.h +++ b/src/include/access/subtrans.h @@ -1,22 +1,16 @@ /* * subtrans.h * - * PostgreSQL subtrans-log manager + * PostgreSQL subtransaction-log manager * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/subtrans.h,v 1.2 2004/08/22 02:41:58 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/subtrans.h,v 1.3 2004/08/23 23:22:45 tgl Exp $ */ #ifndef SUBTRANS_H #define SUBTRANS_H -#include "access/xlog.h" - -/* exported because lwlock.c needs it */ -/* cannot be different from NUM_CLOG_BUFFERS without slru.c changes */ -#define NUM_SUBTRANS_BUFFERS NUM_CLOG_BUFFERS - extern void SubTransSetParent(TransactionId xid, TransactionId parent); extern TransactionId SubTransGetParent(TransactionId xid); extern TransactionId SubTransGetTopmostTransaction(TransactionId xid); @@ -29,6 +23,5 @@ extern void ShutdownSUBTRANS(void); extern void CheckPointSUBTRANS(void); extern void ExtendSUBTRANS(TransactionId newestXact); extern void TruncateSUBTRANS(TransactionId oldestXact); -extern void subtrans_zeropage_redo(int pageno); #endif /* SUBTRANS_H */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 7b08231e51..cef886c384 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.13 2004/08/11 04:07:16 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.14 2004/08/23 23:22:45 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,8 @@ typedef enum LWLockId ControlFileLock, CheckpointLock, CheckpointStartLock, + CLogControlLock, + SubtransControlLock, RelCacheInitLock, BgWriterCommLock,