diff --git a/ext/fts2/fts2.c b/ext/fts2/fts2.c index 3cd1ad59bf..4689db65c9 100644 --- a/ext/fts2/fts2.c +++ b/ext/fts2/fts2.c @@ -159,9 +159,11 @@ ** iBlockid+i. ** ** New data is spilled to a new interior node at the same height when -** the current node exceeds INTERIOR_MAX bytes (default 2048). The -** interior nodes at a given height are naturally tracked by interior -** nodes at height+1, and so on. +** the current node exceeds INTERIOR_MAX bytes (default 2048). +** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing +** interior nodes and making the tree too skinny. The interior nodes +** at a given height are naturally tracked by interior nodes at +** height+1, and so on. ** ** **** Segment directory **** @@ -3598,6 +3600,16 @@ static int index_update(fulltext_vtab *v, sqlite_int64 iRow, /* How large interior nodes can grow. */ #define INTERIOR_MAX 2048 +/* Minimum number of terms per interior node (except the root). This +** prevents large terms from making the tree too skinny - must be >0 +** so that the tree always makes progress. Note that the min tree +** fanout will be INTERIOR_MIN_TERMS+1. +*/ +#define INTERIOR_MIN_TERMS 7 +#if INTERIOR_MIN_TERMS<1 +# error INTERIOR_MIN_TERMS must be greater than 0. +#endif + /* ROOT_MAX controls how much data is stored inline in the segment ** directory. */ @@ -3642,6 +3654,7 @@ typedef struct InteriorWriter { InteriorBlock *first, *last; struct InteriorWriter *parentWriter; + sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ #ifndef NDEBUG sqlite_int64 iLastChildBlock; /* for consistency checks. */ #endif @@ -3659,6 +3672,7 @@ static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm, CLEAR(pWriter); pWriter->iHeight = iHeight; + pWriter->iOpeningChildBlock = iChildBlock; #ifndef NDEBUG pWriter->iLastChildBlock = iChildBlock; #endif @@ -3680,11 +3694,15 @@ static void interiorWriterAppend(InteriorWriter *pWriter, #endif assert( pWriter->iLastChildBlock==iChildBlock ); - if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX ){ - /* Overflow to a new block. */ + /* Overflow to a new block if the new term makes the current block + ** too big, and the current block already has enough terms. + */ + if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX && + iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->last->next; + pWriter->iOpeningChildBlock = iChildBlock; }else{ dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm); } diff --git a/manifest b/manifest index 6f947555e1..787fdc05bc 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Allow\sbacking\stables\sto\sbe\smissing\son\sdropping\sfts\stable.\s\sFixes\nhttp://www.sqlite.org/cvstrac/tktview?tn=1992,35\s.\s(CVS\s3509) -D 2006-11-13T20:15:27 +C Require\sa\sminimum\sfanout\sfor\sinterior\snodes.\s\sThis\sprevents\scases\nwhere\sexcessively\slarge\sterms\skeep\sthe\stree\sfrom\sfinding\sa\ssingle\nroot.\s\sA\sdownside\sis\sthat\sthis\scould\sresult\sin\slarge\sinterior\snodes\sin\nthe\spresence\sof\slarge\sterms,\swhich\smay\sbe\sprone\sto\sfragmentation,\nthough\sif\sthe\snodes\swere\ssmaller\sthat\swould\stranslate\sinto\smore\slevels\nin\sthe\stree,\swhich\swould\salso\shave\sthat\sproblem.\s(CVS\s3510) +D 2006-11-13T21:00:55 F Makefile.in 8e14898d41a53033ecb687d93c9cd5d109fb9ae3 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -33,7 +33,7 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9 F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts2/fts2.c 9b28f218c003319c7cddc374e4eae62229ab4f83 +F ext/fts2/fts2.c 667a93b3fe079f20870a3042bd4b4c3841925c01 F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1 F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e @@ -421,7 +421,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 -P b9dd16ef3df0785d8d0eea9a6f2ad580a7289834 -R fe80027f82af0692a9ff86e7596c1cb4 +P 9628a61a6f33b7bec3455086534b76437d2622b4 +R 2b75980ff8122f283fe2f8c11a712490 U shess -Z fc29ce5f806e2ffd152c1e84f8a98a96 +Z 5a39d4513967a7196d065949839c93cd diff --git a/manifest.uuid b/manifest.uuid index 9d1bc15954..bb7665bdd3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9628a61a6f33b7bec3455086534b76437d2622b4 \ No newline at end of file +64b7e3406134ac4891113b9bb432ad97504268bb \ No newline at end of file