Store minimal terms in interior nodes. Whenever there's a break
between leaf nodes, instead of storing the entire leftmost term of the rightmost child, store only that portion of the leftmost term necessary to distinguish it from the rightmost term of the leftmost child. (CVS 3513) FossilOrigin-Name: f6e0b080dcfaf554b2c05df5e7d4db69d012fba3
This commit is contained in:
parent
9e6a561554
commit
f72442be68
@ -152,7 +152,10 @@
|
||||
** than the first term encoded (or all terms if no term is encoded).
|
||||
** Otherwise, for terms greater than or equal to pTerm[i] but less
|
||||
** than pTerm[i+1], the subtree for that term will be rooted at
|
||||
** iBlockid+i.
|
||||
** iBlockid+i. Interior nodes only store enough term data to
|
||||
** distinguish adjacent children (if the rightmost term of the left
|
||||
** child is "something", and the leftmost term of the right child is
|
||||
** "wicked", only "w" is stored).
|
||||
**
|
||||
** New data is spilled to a new interior node at the same height when
|
||||
** the current node exceeds INTERIOR_MAX bytes (default 2048).
|
||||
@ -3961,6 +3964,11 @@ typedef struct LeafWriter {
|
||||
DataBuffer term; /* previous encoded term */
|
||||
DataBuffer data; /* encoding buffer */
|
||||
|
||||
/* bytes of first term in the current node which distinguishes that
|
||||
** term from the last term of the previous node.
|
||||
*/
|
||||
int nTermDistinct;
|
||||
|
||||
InteriorWriter parentWriter; /* if we overflow */
|
||||
int has_parent;
|
||||
} LeafWriter;
|
||||
@ -4072,6 +4080,9 @@ static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter,
|
||||
n = getVarint32(pWriter->data.pData+iData+1, &nStartingTerm);
|
||||
pStartingTerm = pWriter->data.pData+iData+1+n;
|
||||
assert( pWriter->data.nData>iData+1+n+nStartingTerm );
|
||||
assert( pWriter->nTermDistinct>0 );
|
||||
assert( pWriter->nTermDistinct<=nStartingTerm );
|
||||
nStartingTerm = pWriter->nTermDistinct;
|
||||
|
||||
if( pWriter->has_parent ){
|
||||
interiorWriterAppend(&pWriter->parentWriter,
|
||||
@ -4166,11 +4177,23 @@ static void leafWriterDestroy(LeafWriter *pWriter){
|
||||
dataBufferDestroy(&pWriter->data);
|
||||
}
|
||||
|
||||
/* Encode a term into the leafWriter, delta-encoding as appropriate. */
|
||||
static void leafWriterEncodeTerm(LeafWriter *pWriter,
|
||||
const char *pTerm, int nTerm){
|
||||
/* Encode a term into the leafWriter, delta-encoding as appropriate.
|
||||
** Returns the length of the new term which distinguishes it from the
|
||||
** previous term, which can be used to set nTermDistinct when a node
|
||||
** boundary is crossed.
|
||||
*/
|
||||
static int leafWriterEncodeTerm(LeafWriter *pWriter,
|
||||
const char *pTerm, int nTerm){
|
||||
char c[VARINT_MAX+VARINT_MAX];
|
||||
int n;
|
||||
int n, nPrefix = 0;
|
||||
|
||||
assert( nTerm>0 );
|
||||
while( nPrefix<pWriter->term.nData &&
|
||||
pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
|
||||
nPrefix++;
|
||||
/* Failing this implies that the terms weren't in order. */
|
||||
assert( nPrefix<nTerm );
|
||||
}
|
||||
|
||||
if( pWriter->data.nData==0 ){
|
||||
/* Encode the node header and leading term as:
|
||||
@ -4187,21 +4210,13 @@ static void leafWriterEncodeTerm(LeafWriter *pWriter,
|
||||
** varint(nSuffix)
|
||||
** char pTermSuffix[nSuffix]
|
||||
*/
|
||||
int nPrefix = 0;
|
||||
|
||||
assert( nTerm>0 );
|
||||
while( nPrefix<pWriter->term.nData &&
|
||||
pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){
|
||||
nPrefix++;
|
||||
/* Failing this implies that the terms weren't in order. */
|
||||
assert( nPrefix<nTerm );
|
||||
}
|
||||
|
||||
n = putVarint(c, nPrefix);
|
||||
n += putVarint(c+n, nTerm-nPrefix);
|
||||
dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix);
|
||||
}
|
||||
dataBufferReplace(&pWriter->term, pTerm, nTerm);
|
||||
|
||||
return nPrefix+1;
|
||||
}
|
||||
|
||||
/* Used to avoid a memmove when a large amount of doclist data is in
|
||||
@ -4238,10 +4253,13 @@ static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
|
||||
DLReader *pReaders, int nReaders){
|
||||
char c[VARINT_MAX+VARINT_MAX];
|
||||
int iTermData = pWriter->data.nData, iDoclistData;
|
||||
int i, nData, n, nActualData, nActual, rc;
|
||||
int i, nData, n, nActualData, nActual, rc, nTermDistinct;
|
||||
|
||||
ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData);
|
||||
leafWriterEncodeTerm(pWriter, pTerm, nTerm);
|
||||
nTermDistinct = leafWriterEncodeTerm(pWriter, pTerm, nTerm);
|
||||
|
||||
/* Remember nTermDistinct if opening a new node. */
|
||||
if( iTermData==0 ) pWriter->nTermDistinct = nTermDistinct;
|
||||
|
||||
iDoclistData = pWriter->data.nData;
|
||||
|
||||
@ -4283,6 +4301,8 @@ static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
|
||||
if( iTermData>0 ){
|
||||
rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
pWriter->nTermDistinct = nTermDistinct;
|
||||
}
|
||||
|
||||
/* Fix the encoded doclist length. */
|
||||
@ -4323,6 +4343,8 @@ static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter,
|
||||
rc = leafWriterInternalFlush(v, pWriter, 0, iTermData);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
pWriter->nTermDistinct = nTermDistinct;
|
||||
|
||||
/* Rebuild header using the current term */
|
||||
n = putVarint(pWriter->data.pData, 0);
|
||||
n += putVarint(pWriter->data.pData+n, nTerm);
|
||||
|
12
manifest
12
manifest
@ -1,5 +1,5 @@
|
||||
C Refactoring\sgroundwork\sfor\scoming\swork\son\sinterior\snodes.\s\sChange\nLeafWriter\sto\suse\sempty\sdata\sbuffer\s(instead\sof\sempty\sterm)\sto\sdetect\nan\sempty\sblock.\s\sCode\sto\svalidate\sinterior\snodes.\s\sModerate\srevisions\nto\sleaf-node\sand\sdoclist\svalidation.\s\sRecast\sleafWriterStep()\sin\sterms\nof\sLeafWriterStepMerge().\s(CVS\s3512)
|
||||
D 2006-11-17T21:12:16
|
||||
C Store\sminimal\sterms\sin\sinterior\snodes.\s\sWhenever\sthere's\sa\sbreak\nbetween\sleaf\snodes,\sinstead\sof\sstoring\sthe\sentire\sleftmost\sterm\sof\sthe\nrightmost\schild,\sstore\sonly\sthat\sportion\sof\sthe\sleftmost\sterm\nnecessary\sto\sdistinguish\sit\sfrom\sthe\srightmost\sterm\sof\sthe\sleftmost\nchild.\s(CVS\s3513)
|
||||
D 2006-11-18T00:12:45
|
||||
F Makefile.in 8e14898d41a53033ecb687d93c9cd5d109fb9ae3
|
||||
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
|
||||
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
||||
@ -33,7 +33,7 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
|
||||
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
|
||||
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
|
||||
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
|
||||
F ext/fts2/fts2.c 57d8cd57ce18c3ce7b194b4810fe7e119ec7e6a3
|
||||
F ext/fts2/fts2.c 74a5db3f7f8e49dfa2a5d40e5fdece09bf23e5a8
|
||||
F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1
|
||||
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
|
||||
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
|
||||
@ -421,7 +421,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
|
||||
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
||||
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
||||
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
|
||||
P 9b6d413d751d962b67cb4e3a208efe61581cb822
|
||||
R ff81ed1c8b4721212823c87e00f2e6b9
|
||||
P f30771d5c7ef2b502af95d81a18796b75271ada4
|
||||
R ef562280fdedbce7ff0fcbb49811b62c
|
||||
U shess
|
||||
Z 82e324f504b7a8a8d9f0a515f8d329aa
|
||||
Z 674bc601b7eaf4b058181b0cf2143d33
|
||||
|
@ -1 +1 @@
|
||||
f30771d5c7ef2b502af95d81a18796b75271ada4
|
||||
f6e0b080dcfaf554b2c05df5e7d4db69d012fba3
|
Loading…
x
Reference in New Issue
Block a user