:-) (CVS 217)

FossilOrigin-Name: ee6760fb62e81af95796c0fcf1e65e5dc0701194
This commit is contained in:
drh 2001-05-15 00:39:25 +00:00
parent 365d68f7f1
commit 2af926b95a
3 changed files with 322 additions and 148 deletions

View File

@ -1,5 +1,5 @@
C :-)\s(CVS\s216)
D 2001-05-11T11:02:47
C :-)\s(CVS\s217)
D 2001-05-15T00:39:25
F COPYRIGHT 74a8a6531a42e124df07ab5599aad63870fa0bd4
F Makefile.in acef0f0275a5ca8e68bda165f7f05d810a207664
F README 51f6a4e7408b34afa5bc1c0485f61b6a4efb6958
@ -12,7 +12,7 @@ F notes/notes1.txt b7c0812b704a022e88c621146ae50955c923d464
F notes/notes2.txt 49b4d7ba35f183feb1fb098a27272b82f5c76eca
F notes/notes3.txt cd5e7bd2167d7ef89b1077abdfa68f0af6337744
F src/TODO 38a68a489e56e9fd4a96263e0ff9404a47368ad4
F src/btree.c 9b985eede929ce3f1a6ad510ecd04c197ce092f7
F src/btree.c f6d724fccaf108c4452dba6589e6027ba8f3e88a
F src/btree.h f21c240d0c95f93e2a128106d04a6c448ed0eb94
F src/build.c 4f6a2d551c56342cd4a0420654835be3ad179651
F src/dbbe.c b18259f99d87240cbe751021cf14dd3aa83a48af
@ -106,7 +106,7 @@ F www/opcode.tcl cb3a1abf8b7b9be9f3a228d097d6bf8b742c2b6f
F www/sqlite.tcl cb0d23d8f061a80543928755ec7775da6e4f362f
F www/tclsqlite.tcl 06f81c401f79a04f2c5ebfb97e7c176225c0aef2
F www/vdbe.tcl 0c8aaa529dd216ccbf7daaabd80985e413d5f9ad
P 624ccbca98be33a26c2af72d2d91c78f8a06ae9c
R dd7612c12a95a523d5ea5cc83b2e1f75
P c3e521190f02120a34f1e9244fe1ea3a975a6caa
R d3ffc66c90d6f5774a5954932275df58
U drh
Z 7bf915d3af7096b69289ffd2ecea3428
Z 0f43bc31472b172caca9953860c2e8b6

View File

@ -1 +1 @@
c3e521190f02120a34f1e9244fe1ea3a975a6caa
ee6760fb62e81af95796c0fcf1e65e5dc0701194

View File

@ -21,13 +21,43 @@
** http://www.hwaci.com/drh/
**
*************************************************************************
** $Id: btree.c,v 1.4 2001/05/11 11:02:47 drh Exp $
** $Id: btree.c,v 1.5 2001/05/15 00:39:25 drh Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
#include "btree.h"
#include <assert.h>
/*
** The maximum number of database entries that can be held in a single
** page of the database.
*/
#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/sizeof(Cell))
/*
** The maximum amount of data (in bytes) that can be stored locally for a
** database entry. If the entry contains more data than this, the
** extra goes onto overflow pages.
*/
#define MX_LOCAL_PAYLOAD ((SQLITE_PAGE_SIZE-sizeof(PageHdr)-4*sizeof(Cell))/4)
/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end. EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
#define EXTRA_SIZE (sizeof(MemPage)-SQLITE_PAGE_SIZE)
/*
** Number of bytes on a single overflow page.
*/
#define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno))
/*
** Primitive data types. u32 must be 4 bytes and u16 must be 2 bytes.
*/
typedef unsigned int u32;
typedef unsigned short int u16;
@ -35,37 +65,43 @@ typedef unsigned short int u16;
** Forward declarations of structures used only in this file.
*/
typedef struct Page1Header Page1Header;
typedef struct MemPage MemPage;
typedef struct PageHdr PageHdr;
typedef struct Cell Cell;
typedef struct FreeBlk FreeBlk;
typedef struct OverflowPage OverflowPage;
/*
** All structures on a database page are aligned to 4-byte boundries.
** This routine rounds up a number of bytes to the next multiple of 4.
*/
#define ROUNDUP(X) ((X+3) & ~3)
/*
** The first page contains the following additional information:
**
** MAGIC-1
** MAGIC-2
** First free block
** The first pages of the database file contains some additional
** information used for housekeeping and sanity checking. Otherwise,
** the first page is just like any other. The additional information
** found on the first page is described by the following structure.
*/
#define EXTRA_PAGE_1_CELLS 3
struct Page1Header {
u32 magic1; /* A magic number for sanity checking */
u32 magic2; /* A second magic number for sanity checking */
Pgno firstList; /* First free page in a list of all free pages */
};
#define MAGIC_1 0x7264dc61
#define MAGIC_2 0x54e55d9e
struct Page1Header {
u32 magic1;
u32 magic2;
Pgno firstList;
};
/*
** Each database page has a header as follows:
**
** page1_header Extra numbers found on page 1 only.
** leftmost_pgno Page number of the leftmost child
** rightmost_pgno Page number of the right-most child page
** first_cell Index into MemPage.aPage of first cell
** first_free Index of first free block
**
** MemPage.pStart always points to the leftmost_pgno. First_free is
** MemPage.pStart always points to the rightmost_pgno. First_free is
** 0 if there is no free space on this page. Otherwise it points to
** an area like this:
**
@ -88,42 +124,10 @@ struct FreeBlk {
u16 iSize; /* Number of u32-sized slots in the block of free space */
u16 iNext; /* Index in MemPage.aPage[] of the next free block */
};
/*
** The maximum number of database entries that can be held in a single
** page of the database. Each entry has a 16-byte header consisting of
** 4 unsigned 32-bit numbers, as follows:
**
** nKey Number of byte in the key
** nData Number of byte in the data
** pgno Page number of the right child block
** next index in MemPage.aPage[] of the next entry in sorted order
**
** The key and data follow this header. The key and data are packed together
** and the total rounded up to the next multiple of 4 bytes. There must
** be at least 4 bytes in the key/data packet, so each entry consumes at
** least 20 bytes of space on the page.
*/
#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/sizeof(Cell))
/*
** The maximum amount of data (in bytes) that can be stored locally for a
** database entry. If the entry contains more data than this, the
** extra goes onto overflow pages.
*/
#define MX_LOCAL_PAYLOAD ((SQLITE_PAGE_SIZE-20-4*24)/4)
/*
** On a single disk page, there are sections of the page that are used
** to hold data and sections that are unused and available for holding
** new data. A single instance of this structure describes a contiguous
** block of free space on a disk page.
*/
struct FreeBlk {
int idx; /* Index into MemPage.aPage[] of the start of freeblock */
int size; /* Number of MemPage.aPage[] slots used by this block */
struct OverflowPage {
Pgno next;
char aData[SQLITE_PAGE_SIZE-sizeof(Pgno)];
};
typedef struct FreeBlk;
/*
** For every page in the database file, an instance of the following structure
@ -131,27 +135,20 @@ typedef struct FreeBlk;
** the disk. The rest is auxiliary data that held in memory only.
*/
struct MemPage {
u32 aPage[SQLITE_PAGE_SIZE/sizeof(u32)]; /* Page data stored on disk */
unsigned char isInit; /* True if sequel is initialized */
unsigned char validUp; /* True if MemPage.up is valid */
unsigned char validLeft; /* True if MemPage.left is valid */
unsigned char validRight; /* True if MemPage.right is valid */
char aPage[SQLITE_PAGE_SIZE]; /* Page data stored on disk */
unsigned char isInit; /* True if sequel is initialized */
unsigned char validUp; /* True if MemPage.up is valid */
unsigned char validLeft; /* True if MemPage.left is valid */
unsigned char validRight; /* True if MemPage.right is valid */
Pgno up; /* The parent page. 0 means this is the root */
Pgno left; /* Left sibling page. 0==none */
Pgno right; /* Right sibling page. 0==none */
int idxStart; /* Index in aPage[] of real data */
int nFree; /* Number of free slots of aPage[] */
PageHdr *pStart; /* Points to aPage[idxStart] */
int nFree; /* Number of free bytes in aPage[] */
int nCell; /* Number of entries on this page */
u32 *aCell[MX_CELL]; /* All entires in sorted order */
}
typedef struct MemPage;
/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end. EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
#define EXTRA_SIZE (sizeof(MemPage)-SQLITE_PAGE_SIZE)
/*
** Everything we need to know about an open database
@ -170,96 +167,163 @@ typedef Btree Bt;
** MemPage.aCell[] of the entry.
*/
struct Cursor {
Btree *pBt; /* The pointer back to the BTree */
MemPage *pPage; /* Page that contains the entry */
int idx; /* Index of the entry in pPage->aCell[] */
int skip_incr; /* */
Btree *pBt; /* The pointer back to the BTree */
Cursor *pPrev, *pNext; /* List of all cursors */
MemPage *pPage; /* Page that contains the entry */
int idx; /* Index of the entry in pPage->aCell[] */
int skip_incr; /* */
};
/*
** The maximum depth of a cursor
*/
#define MX_LEVEL 20
/*
** Within a cursor, each level off the search tree is an instance of
** this structure.
*/
typedef struct BtIdxpt BtIdxpt;
struct BtIdxpt {
Pgno pgno; /* The page number */
u32 *aPage; /* The page data */
int idx; /* Index into pPage[] */
u32 *aIdx; /* Pointer to pPage[idx] */
};
/*
** Everything we need to know about a cursor
*/
struct BtCursor {
Btree *pBt; /* The whole database */
BtCursor *pPrev, *pNext; /* Linked list of all cursors */
int valid; /* True if the cursor points to something */
int nLevel; /* Number of levels of indexing used */
BtIdxpt *pLevel; /* Pointer to aLevel[nLevel] */
BtIdxpt aLevel[MX_LEVEL]; /* The index levels */
};
/*
** Defragment the page given. All of the free space
** is collected into one big block at the end of the
** page.
*/
static void defragmentPage(MemPage *pPage){
int pc;
int i, n;
FreeBlk *pFBlk;
char newPage[SQLITE_PAGE_SIZE];
pc = ROUNDUP(pPage->idxStart + sizeof(PageHdr));
pPage->pStart->firstCell = pc;
memcpy(newPage, pPage->aPage, pc);
for(i=0; i<pPage->nCell; i++){
Cell *pCell = &pPage->aCell[i];
n = pCell->nKey + pCell->nData;
if( n>MAX_LOCAL_PAYLOAD ) n = MAX_LOCAL_PAYLOAD + sizeof(Pgno);
n = ROUNDUP(n);
n += sizeof(Cell) - sizeof(pCell->aData);
pCell->iNext = i<pPage->nCell ? pc + n : 0;
memcpy(&newPage[pc], pCell, n);
pPage->aCell[i] = (Cell*)&pPage->aPage[pc];
pc += n;
}
assert( pPage->nFree==pc );
memcpy(pPage->aPage, newPage, pc);
pFBlk = &pPage->aPage[pc];
pFBlk->iSize = SQLITE_PAGE_SIZE - pc;
pFBlk->iNext = 0;
pPage->pStart->firstFree = pc;
memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk));
}
/*
** Mark a section of the memory block as in-use.
** Allocate space on a page. The space needs to be at least
** nByte bytes in size. (Actually, all allocations are rounded
** up to the next even multiple of 4.) Return the index into
** pPage->aPage[] of the first byte of the new allocation.
** Or return 0 if there is not enough space.
**
** This routine will call defragmentPage if necessary to consolidate
** free space.
*/
static void useSpace(MemPage *pPage, int start, int size){
static int allocSpace(MemPage *pPage, int nByte){
FreeBlk *p;
u16 *pIdx;
int start;
nByte = ROUNDUP(nByte);
if( pPage->nFree<nByte ) return 0;
pIdx = &pPage->pStart->firstFree;
p = (FreeBlk*)&pPage->aPage[*pIdx];
while( p->iSize<nByte ){
if( p->iNext==0 ){
defragmentPage(pPage);
pIdx = &pPage->pStart->firstFree;
}else{
pIdx = &p->iNext;
}
p = (FreeBlk*)&pPage->aPage[*pIdx];
}
if( p->iSize==nByte ){
start = *pIdx;
*pIdx = p->iNext;
}else{
p->iSize -= nByte;
start = *pIdx + p->iSize;
}
pPage->nFree -= nByte;
return start;
}
/*
** Return a section of the MemPage.aPage[] to the freelist.
** The first byte of the new free block is pPage->aPage[start]
** and there are a told of size bytes to be freed.
*/
static void freeSpace(MemPage *pPage, int start, int size){
int end = start + size;
u16 *pIdx, idx;
FreeBlk *pFBlk;
FreeBlk *pNew;
FreeBlk *pNext;
assert( size == ROUNDUP(size) );
assert( start == ROUNDUP(start) );
pIdx = &pPage->pStart->firstFree;
idx = *pIdx;
while( idx!=0 && idx<start ){
pFBlk = (FreeBlk*)&pPage->aPage[idx];
if( idx + pFBlk->iSize == start ){
pFBlk->iSize += size;
if( idx + pFBlk->iSize == pFBlk->iNext ){
pNext = (FreeBlk*)&pPage->aPage[pFblk->iNext];
pFBlk->iSize += pNext->iSize;
pFBlk->iNext = pNext->iNext;
}
pPage->nFree += size;
return;
}
pIdx = &pFBlk->iNext;
idx = *pIdx;
}
pNew = (FreeBlk*)&pPage->aPage[start];
if( idx != end ){
pNew->iSize = size;
pNew->iNext = idx;
}else{
pNext = (FreeBlk*)&pPage->aPage[idx];
pNew->iSize = size + pNext->iSize;
pNew->iNext = pNext->iNext;
}
*pIdx = start;
pPage->nFree += size;
}
/*
** Initialize the auxiliary information for a disk block.
*/
static int initPage(MemPage *pPage, Pgno pgnoThis, Pgno pgnoParent){
u32 idx;
int idx;
Cell *pCell;
FreeBlk *pFBlk;
pPage->idxStart = (pgnoThis==1) ? sizeof(Page1Header) : 0;
pPage->pStart = (PageHdr*)&pPage->aPage[pPage->idxStart];
pPage->isInit = 1;
pPage->validUp = 1;
pPage->up = pgnoParent;
pPage->nFreeSlot = SQLITE_PAGE_SIZE/sizeof(pPage->aPage[0]) - 2;
pPage->nFree = 1;
if( pgnoThis==1 ){
pPage->idxStart = EXTRA_PAGE_1_CELLS;
pPage->nFreeByte -= EXTRA_PAGE_1_CELLS;
}
pPage->aFree[0].idx = pPage->idxStart + 2;
pPage->aFree[0].size = pPage->nFreeByte;
pPage->nCell = 0;
idx = pPage->aPage[pPage->idxStart+1];
idx = pPage->pStart->firstCell;
while( idx!=0 ){
int size;
pPage->aCell[pPage->nCell++] = idx;
size = pPage->aPage[idx] + pPage->aPage[idx+1];
if( size>MX_LOCAL_PAYLOAD ){
if( size>MX_DIRECT_PAYLOAD ){
size = MX_LOCAL_PAYLOAD + 2*sizeof(u32);
}else{
size = MX_LOCAL_PAYLOAD + sizeof(u32);
}
}
size = (size + sizeof(u32) - 1)/sizeof(u32) + 4;
useSpace(pPage, idx, size);
idx = pPage->aPage[idx+3];
if( idx>SQLITE_PAGE_SIZE-sizeof(Cell) ) goto page_format_error;
pCell = (Cell*)&pPage->aPage[idx];
pPage->aCell[pPage->nCell++] = pCell;
idx = pCell->iNext;
}
pPage->nFree = 0;
idx = pPage->pStart->firstFree;
while( idx!=0 ){
if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
pFBlk = (FreeBlk*)&pPage->aPage[idx];
pPage->nFree += pFBlk->iSize;
if( pFBlk->iNext <= idx ) goto page_format_error;
idx = pFBlk->iNext;
}
return SQLITE_OK;
page_format_error:
return SQLITE_CORRUPT;
}
/*
@ -324,7 +388,7 @@ static int lockBtree(Btree *pBt){
if( pBt->page1 ) return SQLITE_OK;
rc = sqlitepager_get(pBt->pPager, 1, &pBt->page1);
if( rc!=SQLITE_OK ) return rc;
rc = initPage(pBt->page1);
rc = initPage(pBt->page1, 1, 0);
if( rc!=SQLITE_OK ){
sqlitepager_unref(pBt->page1);
pBt->page1 = 0;
@ -397,10 +461,18 @@ int sqliteBtreeCursor(Btree *pBt, BtCursor **ppCur){
}
pBt->pCursor = pCur;
pCur->pBt = pBt;
pCur->nLevel = 1;
pCur->aLevel[0].pgno = 1;
pCur->aLevel[0].aPage = pBt->page1;
pCur->aLevel[0].idx = 0;
rc = sqlitepager_get(pBt->pPager, 1, &pCur->pPage);
if( rc!=SQLITE_OK ){
sqliteFree(pCur);
*ppCur = 0;
return rc;
}
if( !pCur->pPage->isInit ){
initPage(pCur->pPage);
}
pCur->idx = 0;
*ppCur = pCur;
return SQLITE_OK;
}
/*
@ -417,9 +489,7 @@ int sqliteBtreeCloseCursor(BtCursor *pCur){
if( pCur->pNext ){
pCur->pNext->pPrev = pCur->pPrev;
}
for(i=pCur->nLevel-1; i>0; i--){
sqlitepager_unref(pCur->aLevel[i].aPage);
}
sqlitepager_unref(pCur->pPage);
if( pBt->pCursor==0 && pBt->inTrans==0 ){
unlockBtree(pBt);
}
@ -432,23 +502,127 @@ int sqliteBtreeCloseCursor(BtCursor *pCur){
** initialized or is pointed to a deleted entry, then return 0.
*/
int sqliteBtreeKeySize(BtCursor *pCur){
int nEntry;
u32 *aPage;
BtIdxpt *pIdx;
int offset;
if( !pCur->valid ) return 0;
pIdx = &pCur->aLevel[pCur->nLevel-1];
aPage = pIdx->aPage;
offset = (pIdx->pgno==1)*EXTRA_PAGE_1_CELLS;
nEntry = aPage[offset];
if( pIdx->idx<nEntry ){
Cell *pCell;
MemPage *pPage;
pPage = pCur->pPage;
if( pCur->idx >= pPage->nCell ) return 0;
pCell = pPage->aCell[pCur->idx];
return pCell->nKey;
}
static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){
char *aData;
Pgno nextPage;
aData = pCur->pPage->aCell[pCur->idx].aData;
if( offset<MX_LOCAL_PAYLOAD ){
int a = amt;
if( a+offset>MX_LOCAL_PAYLOAD ){
a = MX_LOCAL_PAYLOAD - offset;
}
memcpy(zBuf, &aData[offset], a);
if( a==amt ){
return SQLITE_OK;
}
offset += a;
zBuf += a;
amt -= a;
if( amt>0 ){
assert( a==ROUNDUP(a) );
nextPage = *(Pgno*)&aData[a];
}
}
while( amt>0 && nextPage ){
OverflowPage *pOvfl;
rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl);
if( rc!=0 ){
return rc;
}
nextPage = pOvfl->next;
if( offset<OVERFLOW_SIZE ){
int a = amt;
if( a + offset > OVERFLOW_SIZE ){
a = OVERFLOW_SIZE - offset;
}
memcpy(zBuf, &pOvfl->aData[offset], a);
offset += a;
amt -= a;
zBuf += a;
}
sqlitepager_unref(pOvfl);
}
return amt==0 ? SQLITE_OK : SQLITE_CORRUPT;
}
int sqliteBtreeKey(BtCursor*, int offset, int amt, char *zBuf);
int sqliteBtreeDataSize(BtCursor*);
int sqliteBtreeData(BtCursor*, int offset, int amt, char *zBuf);
/*
** Compare the key for the entry that pCur points to against the
** given key (pKey,nKeyOrig). Put the comparison result in *pResult.
** The result is negative if pCur<pKey, zero if they are equal and
** positive if pCur>pKey.
**
** SQLITE_OK is returned on success. If part of the cursor key
** is on overflow pages and we are unable to access those overflow
** pages, then some other value might be returned to indicate the
** reason for the error.
*/
static int compareKey(BtCursor *pCur, char *pKey, int nKeyOrig, int *pResult){
Pgno nextPage;
int nKey = nKeyOrig;
int n;
Cell *pCell;
assert( pCur->pPage );
assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
pCell = &pCur->pPage->aCell[pCur->idx];
if( nKey > pCell->nKey ){
nKey = pCell->nKey;
}
n = nKey;
if( n>MX_LOCAL_PAYLOAD ){
n = MX_LOCAL_PAYLOAD;
}
c = memcmp(pCell->aData, pKey, n);
if( c!=0 ){
*pResult = c;
return SQLITE_OK;
}
pKey += n;
nKey -= n;
nextPage = *(Pgno*)&pCell->aData[MX_LOCAL_PAYLOAD];
while( nKey>0 ){
OverflowPage *pOvfl;
if( nextPage==0 ){
return SQLITE_CORRUPT;
}
rc = sqlitepager_get(pCur->pBt->pPager, nextPage, &pOvfl);
if( rc!=0 ){
return rc;
}
nextPage = pOvfl->next;
n = nKey;
if( n>OVERFLOW_SIZE ){
n = OVERFLOW_SIZE;
}
c = memcmp(pOvfl->aData, pKey, n);
sqlitepager_unref(pOvfl);
if( c!=0 ){
*pResult = c;
return SQLITE_OK;
}
nKey -= n;
pKey += n;
}
c = pCell->nKey - nKeyOrig;
*pResult = c;
return SQLITE_OK;
}
/* Move the cursor so that it points to an entry near pKey.
** Return 0 if the cursor is left pointing exactly at pKey.
** Return -1 if the cursor points to the largest entry less than pKey.