Merge trunk enhancements, and espeically the fix for allowing strings

as column identifers in CREATE INDEX statements. FossilOrigin-Name: 5ff855293865c244ac632c630e8e7e8d7c05a5f6
2015-09-04 13:02:21 +00:00 · 2015-09-04 13:02:21 +00:00 · aac39e1ded
commit aac39e1ded
parent 34de0c8c5e edb04ed946
34 changed files with 1848 additions and 733 deletions
--- a/ext/fts5/extract_api_docs.tcl
+++ b/ext/fts5/extract_api_docs.tcl
@ -108,13 +108,15 @@ proc get_tokenizer_docs {data} {
      append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
      continue
    }
+    if {[regexp {SYNONYM SUPPORT} $line]} {
+      set line "</dl><h3>Synonym Support</h3>"
+    }
    if {[string trim $line] == ""} {
      append res "<p>\n"
    } else {
      append res "$line\n"
    }
  }
-  append res "</dl>\n"

  set res
 }
@ -208,6 +210,10 @@ proc main {data} {

    fts5_tokenizer {
      output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]
+      output [get_fts5_struct $data \
+        "Flags that may be passed as the third argument to xTokenize()" \
+        "#define FTS5_TOKEN_COLOCATED"
+      ]
    }

    fts5_extension {
--- a/ext/fts5/fts5.h
+++ b/ext/fts5/fts5.h
@ -217,7 +217,7 @@ struct Fts5ExtensionApi {
  int (*xTokenize)(Fts5Context*, 
    const char *pText, int nText, /* Text to tokenize */
    void *pCtx,                   /* Context passed to xToken() */
-    int (*xToken)(void*, const char*, int, int, int)       /* Callback */
+    int (*xToken)(void*, int, const char*, int, int, int)       /* Callback */
  );

  int (*xPhraseCount)(Fts5Context*);
@ -278,18 +278,46 @@ struct Fts5ExtensionApi {
 **
 ** xTokenize:
 **   This function is expected to tokenize the nText byte string indicated 
-**   by argument pText. pText may not be nul-terminated. The first argument
-**   passed to this function is a pointer to an Fts5Tokenizer object returned 
-**   by an earlier call to xCreate().
+**   by argument pText. pText may or may not be nul-terminated. The first
+**   argument passed to this function is a pointer to an Fts5Tokenizer object
+**   returned by an earlier call to xCreate().
+**
+**   The second argument indicates the reason that FTS5 is requesting
+**   tokenization of the supplied text. This is always one of the following
+**   four values:
+**
+**   <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
+**            or removed from the FTS table. The tokenizer is being invoked to
+**            determine the set of tokens to add to (or delete from) the
+**            FTS index.
+**
+**       <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed 
+**            against the FTS index. The tokenizer is being called to tokenize 
+**            a bareword or quoted string specified as part of the query.
+**
+**       <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
+**            FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
+**            followed by a "*" character, indicating that the last token
+**            returned by the tokenizer will be treated as a token prefix.
+**
+**       <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to 
+**            satisfy an fts5_api.xTokenize() request made by an auxiliary
+**            function. Or an fts5_api.xColumnSize() request made by the same
+**            on a columnsize=0 database.  
+**   </ul>
 **
 **   For each token in the input string, the supplied callback xToken() must
 **   be invoked. The first argument to it should be a copy of the pointer
-**   passed as the second argument to xTokenize(). The next two arguments
-**   are a pointer to a buffer containing the token text, and the size of
-**   the token in bytes. The 4th and 5th arguments are the byte offsets of
-**   the first byte of and first byte immediately following the text from 
+**   passed as the second argument to xTokenize(). The third and fourth
+**   arguments are a pointer to a buffer containing the token text, and the
+**   size of the token in bytes. The 4th and 5th arguments are the byte offsets
+**   of the first byte of and first byte immediately following the text from
 **   which the token is derived within the input.
 **
+**   The second argument passed to the xToken() callback ("tflags") should
+**   normally be set to 0. The exception is if the tokenizer supports 
+**   synonyms. In this case see the discussion below for details.
+**
 **   FTS5 assumes the xToken() callback is invoked for each token in the 
 **   order that they occur within the input text.
 **
@ -301,6 +329,112 @@ struct Fts5ExtensionApi {
 **   may abandon the tokenization and return any error code other than
 **   SQLITE_OK or SQLITE_DONE.
 **
+** SYNONYM SUPPORT
+**
+**   Custom tokenizers may also support synonyms. Consider a case in which a
+**   user wishes to query for a phrase such as "first place". Using the 
+**   built-in tokenizers, the FTS5 query 'first + place' will match instances
+**   of "first place" within the document set, but not alternative forms
+**   such as "1st place". In some applications, it would be better to match
+**   all instances of "first place" or "1st place" regardless of which form
+**   the user specified in the MATCH query text.
+**
+**   There are several ways to approach this in FTS5:
+**
+**   <ol><li> By mapping all synonyms to a single token. In this case, the 
+**            In the above example, this means that the tokenizer returns the
+**            same token for inputs "first" and "1st". Say that token is in
+**            fact "first", so that when the user inserts the document "I won
+**            1st place" entries are added to the index for tokens "i", "won",
+**            "first" and "place". If the user then queries for '1st + place',
+**            the tokenizer substitutes "first" for "1st" and the query works
+**            as expected.
+**
+**       <li> By adding multiple synonyms for a single term to the FTS index.
+**            In this case, when tokenizing query text, the tokenizer may 
+**            provide multiple synonyms for a single term within the document.
+**            FTS5 then queries the index for each synonym individually. For
+**            example, faced with the query:
+**
+**   <codeblock>
+**     ... MATCH 'first place'</codeblock>
+**
+**            the tokenizer offers both "1st" and "first" as synonyms for the
+**            first token in the MATCH query and FTS5 effectively runs a query 
+**            similar to:
+**
+**   <codeblock>
+**     ... MATCH '(first OR 1st) place'</codeblock>
+**
+**            except that, for the purposes of auxiliary functions, the query
+**            still appears to contain just two phrases - "(first OR 1st)" 
+**            being treated as a single phrase.
+**
+**       <li> By adding multiple synonyms for a single term to the FTS index.
+**            Using this method, when tokenizing document text, the tokenizer
+**            provides multiple synonyms for each token. So that when a 
+**            document such as "I won first place" is tokenized, entries are
+**            added to the FTS index for "i", "won", "first", "1st" and
+**            "place".
+**
+**            This way, even if the tokenizer does not provide synonyms
+**            when tokenizing query text (it should not - to do would be
+**            inefficient), it doesn't matter if the user queries for 
+**            'first + place' or '1st + place', as there are entires in the
+**            FTS index corresponding to both forms of the first token.
+**   </ol>
+**
+**   Whether is is parsing document or query text, any call to xToken that
+**   specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
+**   is considered to supply a synonym for the previous token. For example,
+**   when parsing the document "I won first place", a tokenizer that supports
+**   synonyms would call xToken() 5 times, as follows:
+**
+**   <codeblock>
+**       xToken(pCtx, 0, "i",                      1,  0,  1);
+**       xToken(pCtx, 0, "won",                    3,  2,  5);
+**       xToken(pCtx, 0, "first",                  5,  6, 11);
+**       xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3,  6, 11);
+**       xToken(pCtx, 0, "place",                  5, 12, 17);
+**</codeblock>
+**
+**   It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
+**   xToken() is called. Multiple synonyms may be specified for a single token
+**   by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. 
+**   There is no limit to the number of synonyms that may be provided for a
+**   single token.
+**
+**   In many cases, method (1) above is the best approach. It does not add 
+**   extra data to the FTS index or require FTS5 to query for multiple terms,
+**   so it is efficient in terms of disk space and query speed. However, it
+**   does not support prefix queries very well. If, as suggested above, the
+**   token "first" is subsituted for "1st" by the tokenizer, then the query:
+**
+**   <codeblock>
+**     ... MATCH '1s*'</codeblock>
+**
+**   will not match documents that contain the token "1st" (as the tokenizer
+**   will probably not map "1s" to any prefix of "first").
+**
+**   For full prefix support, method (3) may be preferred. In this case, 
+**   because the index contains entries for both "first" and "1st", prefix
+**   queries such as 'fi*' or '1s*' will match correctly. However, because
+**   extra entries are added to the FTS index, this method uses more space
+**   within the database.
+**
+**   Method (2) offers a midpoint between (1) and (3). Using this method,
+**   a query such as '1s*' will match documents that contain the literal 
+**   token "1st", but not "first" (assuming the tokenizer is not able to
+**   provide synonyms for prefixes). However, a non-prefix query like '1st'
+**   will match against "1st" and "first". This method does not require
+**   extra disk space, as no extra entries are added to the FTS index. 
+**   On the other hand, it may require more CPU cycles to run MATCH queries,
+**   as separate queries of the FTS index are required for each synonym.
+**
+**   When using methods (2) or (3), it is important that the tokenizer only
+**   provide synonyms when tokenizing document text (method (2)) or query
+**   text (method (3)), not both. Doing so will not cause any errors, but is
+**   inefficient.
 */
 typedef struct Fts5Tokenizer Fts5Tokenizer;
 typedef struct fts5_tokenizer fts5_tokenizer;
@ -309,9 +443,11 @@ struct fts5_tokenizer {
  void (*xDelete)(Fts5Tokenizer*);
  int (*xTokenize)(Fts5Tokenizer*, 
      void *pCtx,
+      int flags,            /* Mask of FTS5_TOKENIZE_* flags */
      const char *pText, int nText, 
      int (*xToken)(
        void *pCtx,         /* Copy of 2nd argument to xTokenize() */
+        int tflags,         /* Mask of FTS5_TOKEN_* flags */
        const char *pToken, /* Pointer to buffer containing token */
        int nToken,         /* Size of token in bytes */
        int iStart,         /* Byte offset of token within input text */
@ -320,6 +456,16 @@ struct fts5_tokenizer {
  );
 };

+/* Flags that may be passed as the third argument to xTokenize() */
+#define FTS5_TOKENIZE_QUERY     0x0001
+#define FTS5_TOKENIZE_PREFIX    0x0002
+#define FTS5_TOKENIZE_DOCUMENT  0x0004
+#define FTS5_TOKENIZE_AUX       0x0008
+
+/* Flags that may be passed by the tokenizer implementation back to FTS5
+** as the third argument to the supplied xToken callback. */
+#define FTS5_TOKEN_COLOCATED    0x0001      /* Same position as prev. token */
+
 /*
 ** END OF CUSTOM TOKENIZERS
 *************************************************************************/
@ -329,7 +475,7 @@ struct fts5_tokenizer {
 */
 typedef struct fts5_api fts5_api;
 struct fts5_api {
-  int iVersion;                   /* Currently always set to 1 */
+  int iVersion;                   /* Currently always set to 2 */

  /* Create a new tokenizer */
  int (*xCreateTokenizer)(
--- a/ext/fts5/fts5Int.h
+++ b/ext/fts5/fts5Int.h
@ -166,9 +166,10 @@ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);

 int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
+  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
-  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
+  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
 );

 void sqlite3Fts5Dequote(char *z);
@ -234,8 +235,10 @@ struct Fts5PoslistReader {
  int n;                          /* Size of buffer at a[] in bytes */
  int i;                          /* Current offset in a[] */

+  u8 bFlag;                       /* For client use (any custom purpose) */
+
  /* Output variables */
-  int bEof;                       /* Set to true at EOF */
+  u8 bEof;                        /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
 };
 int sqlite3Fts5PoslistReaderInit(
@ -381,9 +384,9 @@ int sqlite3Fts5IndexErrcode(Fts5Index*);
 void sqlite3Fts5IndexReset(Fts5Index*);

 /*
-** Get or set the "averages" record.
+** Get or set the "averages" values.
 */
-int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf);
+int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);

 /*
@ -596,7 +599,7 @@ int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
 int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
 int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);

-int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);
+int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**);

 /*******************************************
 ** The fts5_expr.c API above this point is used by the other hand-written
--- a/ext/fts5/fts5_aux.c
+++ b/ext/fts5/fts5_aux.c
@ -148,6 +148,7 @@ static void fts5HighlightAppend(
 */
 static int fts5HighlightCb(
  void *pContext,                 /* Pointer to HighlightContext object */
+  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStartOff,                  /* Start offset of token */
@ -155,7 +156,10 @@ static int fts5HighlightCb(
 ){
  HighlightContext *p = (HighlightContext*)pContext;
  int rc = SQLITE_OK;
-  int iPos = p->iPos++;
+  int iPos;
+
+  if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
+  iPos = p->iPos++;

  if( p->iRangeEnd>0 ){
    if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
--- a/ext/fts5/fts5_config.c
+++ b/ext/fts5/fts5_config.c
@ -645,12 +645,15 @@ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
 */
 int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
+  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
-  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
+  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
 ){
  if( pText==0 ) return SQLITE_OK;
-  return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);
+  return pConfig->pTokApi->xTokenize(
+      pConfig->pTok, pCtx, flags, pText, nText, xToken
+  );
 }

 /*
--- a/ext/fts5/fts5_expr.c
+++ b/ext/fts5/fts5_expr.c
@ -22,6 +22,8 @@
 */
 #define FTS5_EOF 0

+#define FTS5_LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))
+
 typedef struct Fts5ExprTerm Fts5ExprTerm;

 /*
@ -73,6 +75,7 @@ struct Fts5ExprTerm {
  int bPrefix;                    /* True for a prefix term */
  char *zTerm;                    /* nul-terminated term */
  Fts5IndexIter *pIter;           /* Iterator for this term */
+  Fts5ExprTerm *pSynonym;         /* Pointer to first in list of synonyms */
 };

 /*
@ -181,6 +184,10 @@ static int fts5ExprGetToken(

    default: {
      const char *z2;
+      if( sqlite3Fts5IsBareword(z[0])==0 ){
+        sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
+        return FTS5_EOF;
+      }
      tok = FTS5_STRING;
      for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
      pToken->n = (z2 - z);
@ -244,79 +251,6 @@ int sqlite3Fts5ExprNew(
  return sParse.rc;
 }

-/*
-** Create a new FTS5 expression by cloning phrase iPhrase of the
-** expression passed as the second argument.
-*/
-int sqlite3Fts5ExprPhraseExpr(
-  Fts5Config *pConfig,
-  Fts5Expr *pExpr, 
-  int iPhrase, 
-  Fts5Expr **ppNew
-){
-  int rc = SQLITE_OK;             /* Return code */
-  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
-  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
-  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
-
-  pOrig = pExpr->apExprPhrase[iPhrase];
-  pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, 
-      sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
-  );
-  if( pCopy ){
-    int i;                          /* Used to iterate through phrase terms */
-    Fts5ExprPhrase **apPhrase;
-    Fts5ExprNode *pNode;
-    Fts5ExprNearset *pNear;
-
-    pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
-    apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
-        sizeof(Fts5ExprPhrase*)
-    );
-    pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode));
-    pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
-        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
-    );
-
-    for(i=0; i<pOrig->nTerm; i++){
-      pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1);
-      pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
-    }
-
-    if( rc==SQLITE_OK ){
-      /* All the allocations succeeded. Put the expression object together. */
-      pNew->pIndex = pExpr->pIndex;
-      pNew->pRoot = pNode;
-      pNew->nPhrase = 1;
-      pNew->apExprPhrase = apPhrase;
-      pNew->apExprPhrase[0] = pCopy;
-
-      pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING);
-      pNode->pNear = pNear;
-
-      pNear->nPhrase = 1;
-      pNear->apPhrase[0] = pCopy;
-
-      pCopy->nTerm = pOrig->nTerm;
-      pCopy->pNode = pNode;
-    }else{
-      /* At least one allocation failed. Free them all. */
-      for(i=0; i<pOrig->nTerm; i++){
-        sqlite3_free(pCopy->aTerm[i].zTerm);
-      }
-      sqlite3_free(pCopy);
-      sqlite3_free(pNear);
-      sqlite3_free(pNode);
-      sqlite3_free(apPhrase);
-      sqlite3_free(pNew);
-      pNew = 0;
-    }
-  }
-
-  *ppNew = pNew;
-  return rc;
-}
-
 /*
 ** Free the expression node object passed as the only argument.
 */
@ -350,6 +284,115 @@ static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
  return 0;
 }

+/*
+** Argument pTerm must be a synonym iterator. Return the current rowid
+** that it points to.
+*/
+static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
+  i64 iRet = 0;
+  int bRetValid = 0;
+  Fts5ExprTerm *p;
+
+  assert( pTerm->pSynonym );
+  assert( bDesc==0 || bDesc==1 );
+  for(p=pTerm; p; p=p->pSynonym){
+    if( 0==sqlite3Fts5IterEof(p->pIter) ){
+      i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
+      if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
+        iRet = iRowid;
+        bRetValid = 1;
+      }
+    }
+  }
+
+  if( pbEof && bRetValid==0 ) *pbEof = 1;
+  return iRet;
+}
+
+/*
+** Argument pTerm must be a synonym iterator.
+*/
+static int fts5ExprSynonymPoslist(
+  Fts5ExprTerm *pTerm, 
+  i64 iRowid,
+  int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
+  u8 **pa, int *pn
+){
+  Fts5PoslistWriter writer = {0};
+  Fts5PoslistReader aStatic[4];
+  Fts5PoslistReader *aIter = aStatic;
+  int nIter = 0;
+  int nAlloc = 4;
+  int rc = SQLITE_OK;
+  Fts5ExprTerm *p;
+
+  assert( pTerm->pSynonym );
+  for(p=pTerm; p; p=p->pSynonym){
+    Fts5IndexIter *pIter = p->pIter;
+    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
+      const u8 *a;
+      int n;
+      i64 dummy;
+      rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy);
+      if( rc!=SQLITE_OK ) goto synonym_poslist_out;
+      if( nIter==nAlloc ){
+        int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
+        Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
+        if( aNew==0 ){
+          rc = SQLITE_NOMEM;
+          goto synonym_poslist_out;
+        }
+        memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
+        nAlloc = nAlloc*2;
+        if( aIter!=aStatic ) sqlite3_free(aIter);
+        aIter = aNew;
+      }
+      sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]);
+      assert( aIter[nIter].bEof==0 );
+      nIter++;
+    }
+  }
+
+  assert( *pbDel==0 );
+  if( nIter==1 ){
+    *pa = (u8*)aIter[0].a;
+    *pn = aIter[0].n;
+  }else{
+    Fts5PoslistWriter writer = {0};
+    Fts5Buffer buf = {0,0,0};
+    i64 iPrev = -1;
+    while( 1 ){
+      int i;
+      i64 iMin = FTS5_LARGEST_INT64;
+      for(i=0; i<nIter; i++){
+        if( aIter[i].bEof==0 ){
+          if( aIter[i].iPos==iPrev ){
+            if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
+          }
+          if( aIter[i].iPos<iMin ){
+            iMin = aIter[i].iPos;
+          }
+        }
+      }
+      if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
+      rc = sqlite3Fts5PoslistWriterAppend(&buf, &writer, iMin);
+      iPrev = iMin;
+    }
+    if( rc ){
+      sqlite3_free(buf.p);
+    }else{
+      *pa = buf.p;
+      *pn = buf.n;
+      *pbDel = 1;
+    }
+  }
+
+ synonym_poslist_out:
+  if( aIter!=aStatic ) sqlite3_free(aIter);
+  return rc;
+}
+
+
 /*
 ** All individual term iterators in pPhrase are guaranteed to be valid and
 ** pointing to the same rowid when this function is called. This function 
@ -362,7 +405,7 @@ static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
 ** not a match.
 */
 static int fts5ExprPhraseIsMatch(
-  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
+  Fts5ExprNode *pNode,            /* Node pPhrase belongs to */
  Fts5ExprColset *pColset,        /* Restrict matches to these columns */
  Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
  int *pbMatch                    /* OUT: Set to true if really a match */
@ -388,16 +431,24 @@ static int fts5ExprPhraseIsMatch(
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }
+  memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
+    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
-    int n;
-    const u8 *a;
-    rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);
-    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
-      goto ismatch_out;
+    int n = 0;
+    int bFlag = 0;
+    const u8 *a = 0;
+    if( pTerm->pSynonym ){
+      rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n);
+    }else{
+      rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
    }
+    if( rc!=SQLITE_OK ) goto ismatch_out;
+    sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]);
+    aIter[i].bFlag = bFlag;
+    if( aIter[i].bEof ) goto ismatch_out;
  }

  while( 1 ){
@ -431,6 +482,9 @@ static int fts5ExprPhraseIsMatch(

 ismatch_out:
  *pbMatch = (pPhrase->poslist.n>0);
+  for(i=0; i<pPhrase->nTerm; i++){
+    if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
+  }
  if( aIter!=aStatic ) sqlite3_free(aIter);
  return rc;
 }
@ -598,17 +652,55 @@ static int fts5ExprNearAdvanceFirst(
  int bFromValid,
  i64 iFrom 
 ){
-  Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
+  Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
  int rc;

-  assert( Fts5NodeIsString(pNode) );
-  if( bFromValid ){
-    rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+  if( pTerm->pSynonym ){
+    int bEof = 1;
+    Fts5ExprTerm *p;
+
+    /* Find the firstest rowid any synonym points to. */
+    i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
+
+    /* Advance each iterator that currently points to iRowid. Or, if iFrom
+    ** is valid - each iterator that points to a rowid before iFrom.  */
+    for(p=pTerm; p; p=p->pSynonym){
+      if( sqlite3Fts5IterEof(p->pIter)==0 ){
+        i64 ii = sqlite3Fts5IterRowid(p->pIter);
+        if( ii==iRowid 
+         || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) 
+        ){
+          if( bFromValid ){
+            rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
+          }else{
+            rc = sqlite3Fts5IterNext(p->pIter);
+          }
+          if( rc!=SQLITE_OK ) break;
+          if( sqlite3Fts5IterEof(p->pIter)==0 ){
+            bEof = 0;
+          }
+        }else{
+          bEof = 0;
+        }
+      }
+    }
+
+    /* Set the EOF flag if either all synonym iterators are at EOF or an
+    ** error has occurred.  */
+    pNode->bEof = (rc || bEof);
  }else{
-    rc = sqlite3Fts5IterNext(pIter);
+    Fts5IndexIter *pIter = pTerm->pIter;
+
+    assert( Fts5NodeIsString(pNode) );
+    if( bFromValid ){
+      rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+    }else{
+      rc = sqlite3Fts5IterNext(pIter);
+    }
+
+    pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
  }

-  pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
  return rc;
 }

@ -647,6 +739,35 @@ static int fts5ExprAdvanceto(
  return 0;
 }

+static int fts5ExprSynonymAdvanceto(
+  Fts5ExprTerm *pTerm,            /* Term iterator to advance */
+  int bDesc,                      /* True if iterator is "rowid DESC" */
+  i64 *piLast,                    /* IN/OUT: Lastest rowid seen so far */
+  int *pRc                        /* OUT: Error code */
+){
+  int rc = SQLITE_OK;
+  i64 iLast = *piLast;
+  Fts5ExprTerm *p;
+  int bEof = 0;
+
+  for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
+    if( sqlite3Fts5IterEof(p->pIter)==0 ){
+      i64 iRowid = sqlite3Fts5IterRowid(p->pIter);
+      if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
+        rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
+      }
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    *pRc = rc;
+    bEof = 1;
+  }else{
+    *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
+  }
+  return bEof;
+}
+
 /*
 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
 ** the position list contains entries for column iCol, then (*pa) is set
@ -717,9 +838,9 @@ static int fts5ExprNearTest(
  ** phrase is not a match, break out of the loop early.  */
  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
-    if( pPhrase->nTerm>1 || pNear->pColset ){
+    if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
      int bMatch = 0;
-      rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
+      rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
@ -755,6 +876,7 @@ static int fts5ExprTokenTest(

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
+  assert( pPhrase->aTerm[0].pSynonym==0 );

  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

@ -801,69 +923,99 @@ static int fts5ExprNearNextMatch(
  i64 iLast;                      /* Lastest rowid any iterator points to */
  int i, j;                       /* Phrase and token index, respectively */
  int bMatch;                     /* True if all terms are at the same rowid */
+  const int bDesc = pExpr->bDesc;

-  assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 );
+  /* Check that this node should not be FTS5_TERM */
+  assert( pNear->nPhrase>1 
+       || pNear->apPhrase[0]->nTerm>1 
+       || pNear->apPhrase[0]->aTerm[0].pSynonym
+  );

  /* Initialize iLast, the "lastest" rowid any iterator points to. If the
  ** iterator skips through rowids in the default ascending order, this means
  ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
  ** means the minimum rowid.  */
-  iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
+  if( pLeft->aTerm[0].pSynonym ){
+    iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
+  }else{
+    iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);
+  }

  do {
    bMatch = 1;
    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      for(j=0; j<pPhrase->nTerm; j++){
-        Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
-        i64 iRowid = sqlite3Fts5IterRowid(pIter);
-        if( iRowid!=iLast ) bMatch = 0;
-        if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){
-          return rc;
+        Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
+        if( pTerm->pSynonym ){
+          Fts5ExprTerm *p;
+          int bEof = 1;
+          i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
+          if( iRowid==iLast ) continue;
+          bMatch = 0;
+          if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
+            pNode->bEof = 1;
+            return rc;
+          }
+        }else{
+          Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
+          i64 iRowid = sqlite3Fts5IterRowid(pIter);
+          if( iRowid==iLast ) continue;
+          bMatch = 0;
+          if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
+            return rc;
+          }
        }
      }
    }
  }while( bMatch==0 );

-  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
  pNode->iRowid = iLast;
+  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));

  return rc;
 }

 /*
 ** Initialize all term iterators in the pNear object. If any term is found
-** to match no documents at all, set *pbEof to true and return immediately,
-** without initializing any further iterators.
+** to match no documents at all, return immediately without initializing any
+** further iterators.
 */
 static int fts5ExprNearInitAll(
  Fts5Expr *pExpr,
  Fts5ExprNode *pNode
 ){
  Fts5ExprNearset *pNear = pNode->pNear;
-  Fts5ExprTerm *pTerm;
-  Fts5ExprPhrase *pPhrase;
  int i, j;
  int rc = SQLITE_OK;

  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
-    pPhrase = pNear->apPhrase[i];
+    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    for(j=0; j<pPhrase->nTerm; j++){
-      pTerm = &pPhrase->aTerm[j];
-      if( pTerm->pIter ){
-        sqlite3Fts5IterClose(pTerm->pIter);
-        pTerm->pIter = 0;
+      Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
+      Fts5ExprTerm *p;
+      int bEof = 1;
+
+      for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){
+        if( p->pIter ){
+          sqlite3Fts5IterClose(p->pIter);
+          p->pIter = 0;
+        }
+        rc = sqlite3Fts5IndexQuery(
+            pExpr->pIndex, p->zTerm, strlen(p->zTerm),
+            (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
+            (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
+            &p->pIter
+        );
+        assert( rc==SQLITE_OK || p->pIter==0 );
+        if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){
+          bEof = 0;
+        }
      }
-      rc = sqlite3Fts5IndexQuery(
-          pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
-          (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
-          (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
-          &pTerm->pIter
-      );
-      assert( rc==SQLITE_OK || pTerm->pIter==0 );
-      if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){
+
+      if( bEof ){
        pNode->bEof = 1;
-        break;
+        return rc;
      }
    }
  }
@ -1029,10 +1181,17 @@ static int fts5ExprNodeNext(
      };

      case FTS5_TERM: {
-        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
-        if( pNode->bEof==0 ){
+        Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
+        if( bFromValid ){
+          rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+        }else{
+          rc = sqlite3Fts5IterNext(pIter);
+        }
+        if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
          assert( rc==SQLITE_OK );
          rc = fts5ExprTokenTest(pExpr, pNode);
+        }else{
+          pNode->bEof = 1;
        }
        return rc;
      };
@ -1266,10 +1425,16 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
  if( pPhrase ){
    int i;
    for(i=0; i<pPhrase->nTerm; i++){
+      Fts5ExprTerm *pSyn;
+      Fts5ExprTerm *pNext;
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
-      if( pTerm->pIter ){
-        sqlite3Fts5IterClose(pTerm->pIter);
+      sqlite3Fts5IterClose(pTerm->pIter);
+
+      for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
+        pNext = pSyn->pSynonym;
+        sqlite3Fts5IterClose(pSyn->pIter);
+        sqlite3_free(pSyn);
      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
@ -1331,6 +1496,7 @@ Fts5ExprNearset *sqlite3Fts5ParseNearset(
 typedef struct TokenCtx TokenCtx;
 struct TokenCtx {
  Fts5ExprPhrase *pPhrase;
+  int rc;
 };

 /*
@ -1338,34 +1504,60 @@ struct TokenCtx {
 */
 static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
+  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
-  int iStart,                     /* Start offset of token */
-  int iEnd                        /* End offset of token */
+  int iUnused1,                   /* Start offset of token */
+  int iUnused2                    /* End offset of token */
 ){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
-  Fts5ExprTerm *pTerm;

-  if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
-    Fts5ExprPhrase *pNew;
-    int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
+  /* If an error has already occurred, this is a no-op */
+  if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;

-    pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
-        sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
-    );
-    if( pNew==0 ) return SQLITE_NOMEM;
-    if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
-    pCtx->pPhrase = pPhrase = pNew;
-    pNew->nTerm = nNew - SZALLOC;
+  assert( pPhrase==0 || pPhrase->nTerm>0 );
+  if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
+    Fts5ExprTerm *pSyn;
+    int nByte = sizeof(Fts5ExprTerm) + nToken+1;
+    pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
+    if( pSyn==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      memset(pSyn, 0, nByte);
+      pSyn->zTerm = (char*)&pSyn[1];
+      memcpy(pSyn->zTerm, pToken, nToken);
+      pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
+      pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
+    }
+  }else{
+    Fts5ExprTerm *pTerm;
+    if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
+      Fts5ExprPhrase *pNew;
+      int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
+
+      pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
+          sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
+      );
+      if( pNew==0 ){
+        rc = SQLITE_NOMEM;
+      }else{
+        if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
+        pCtx->pPhrase = pPhrase = pNew;
+        pNew->nTerm = nNew - SZALLOC;
+      }
+    }
+
+    if( rc==SQLITE_OK ){
+      pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
+      memset(pTerm, 0, sizeof(Fts5ExprTerm));
+      pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
+    }
  }

-  pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
-  memset(pTerm, 0, sizeof(Fts5ExprTerm));
-  pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
-
+  pCtx->rc = rc;
  return rc;
 }

@ -1417,11 +1609,14 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(

  rc = fts5ParseStringFromToken(pToken, &z);
  if( rc==SQLITE_OK ){
+    int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
+    int n;
    sqlite3Fts5Dequote(z);
-    rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize);
+    n = strlen(z);
+    rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
-  if( rc ){
+  if( rc || (rc = sCtx.rc) ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
@ -1450,6 +1645,83 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm(
  return sCtx.pPhrase;
 }

+/*
+** Create a new FTS5 expression by cloning phrase iPhrase of the
+** expression passed as the second argument.
+*/
+int sqlite3Fts5ExprClonePhrase(
+  Fts5Config *pConfig,
+  Fts5Expr *pExpr, 
+  int iPhrase, 
+  Fts5Expr **ppNew
+){
+  int rc = SQLITE_OK;             /* Return code */
+  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
+  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
+  int i;                          /* Used to iterate through phrase terms */
+
+  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
+  Fts5ExprPhrase **apPhrase;      /* pNew->apPhrase */
+  Fts5ExprNode *pNode;            /* pNew->pRoot */
+  Fts5ExprNearset *pNear;         /* pNew->pRoot->pNear */
+
+  TokenCtx sCtx = {0,0};          /* Context object for fts5ParseTokenize */
+
+
+  pOrig = pExpr->apExprPhrase[iPhrase];
+
+  pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
+  if( rc==SQLITE_OK ){
+    pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
+        sizeof(Fts5ExprPhrase*));
+  }
+  if( rc==SQLITE_OK ){
+    pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, 
+        sizeof(Fts5ExprNode));
+  }
+  if( rc==SQLITE_OK ){
+    pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
+        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
+  }
+
+  for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
+    int tflags = 0;
+    Fts5ExprTerm *p;
+    for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
+      const char *zTerm = p->zTerm;
+      rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, strlen(zTerm), 0, 0);
+      tflags = FTS5_TOKEN_COLOCATED;
+    }
+    if( rc==SQLITE_OK ){
+      sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    /* All the allocations succeeded. Put the expression object together. */
+    pNew->pIndex = pExpr->pIndex;
+    pNew->nPhrase = 1;
+    pNew->apExprPhrase[0] = sCtx.pPhrase;
+    pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
+    pNew->pRoot->pNear->nPhrase = 1;
+    sCtx.pPhrase->pNode = pNew->pRoot;
+
+    if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
+      pNew->pRoot->eType = FTS5_TERM;
+    }else{
+      pNew->pRoot->eType = FTS5_STRING;
+    }
+  }else{
+    sqlite3Fts5ExprFree(pNew);
+    fts5ExprPhraseFree(sCtx.pPhrase);
+    pNew = 0;
+  }
+
+  *ppNew = pNew;
+  return rc;
+}
+
+
 /*
 ** Token pTok has appeared in a MATCH expression where the NEAR operator
 ** is expected. If token pTok does not contain "NEAR", store an error
@ -1630,7 +1902,10 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
        for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
          pNear->apPhrase[iPhrase]->pNode = pRet;
        }
-        if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){
+        if( pNear->nPhrase==1 
+         && pNear->apPhrase[0]->nTerm==1 
+         && pNear->apPhrase[0]->aTerm[0].pSynonym==0
+        ){
          pRet->eType = FTS5_TERM;
        }
      }else{
@ -1650,16 +1925,28 @@ Fts5ExprNode *sqlite3Fts5ParseNode(
 }

 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
-  char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);
+  int nByte = 0;
+  Fts5ExprTerm *p;
+  char *zQuoted;
+
+  /* Determine the maximum amount of space required. */
+  for(p=pTerm; p; p=p->pSynonym){
+    nByte += strlen(pTerm->zTerm) * 2 + 3 + 2;
+  }
+  zQuoted = sqlite3_malloc(nByte);
+
  if( zQuoted ){
    int i = 0;
-    char *zIn = pTerm->zTerm;
-    zQuoted[i++] = '"';
-    while( *zIn ){
-      if( *zIn=='"' ) zQuoted[i++] = '"';
-      zQuoted[i++] = *zIn++;
+    for(p=pTerm; p; p=p->pSynonym){
+      char *zIn = p->zTerm;
+      zQuoted[i++] = '"';
+      while( *zIn ){
+        if( *zIn=='"' ) zQuoted[i++] = '"';
+        zQuoted[i++] = *zIn++;
+      }
+      zQuoted[i++] = '"';
+      if( p->pSynonym ) zQuoted[i++] = '|';
    }
-    zQuoted[i++] = '"';
    if( pTerm->bPrefix ){
      zQuoted[i++] = ' ';
      zQuoted[i++] = '*';
--- a/ext/fts5/fts5_index.c
+++ b/ext/fts5/fts5_index.c
@ -293,7 +293,6 @@ typedef struct Fts5Data Fts5Data;
 typedef struct Fts5DlidxIter Fts5DlidxIter;
 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
-typedef struct Fts5NodeIter Fts5NodeIter;
 typedef struct Fts5PageWriter Fts5PageWriter;
 typedef struct Fts5SegIter Fts5SegIter;
 typedef struct Fts5DoclistIter Fts5DoclistIter;
@ -526,24 +525,6 @@ struct Fts5IndexIter {
 };


-/*
-** Object for iterating through the conents of a single internal node in 
-** memory.
-*/
-struct Fts5NodeIter {
-  /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, 
-  ** the EOF test for the iterator is (Fts5NodeIter.aData==0).  */
-  const u8 *aData;
-  int nData;
-  int iOff;
-
-  /* Output variables */
-  Fts5Buffer term;
-  int nEmpty;
-  int iChild;
-  int bDlidx;
-};
-
 /*
 ** An instance of the following type is used to iterate through the contents
 ** of a doclist-index record.
@ -573,23 +554,6 @@ struct Fts5DlidxIter {
  Fts5DlidxLvl aLvl[1];
 };

-
-
-/*
-** The first argument passed to this macro is a pointer to an Fts5Buffer
-** object.
-*/
-#define fts5BufferSize(pBuf,n) {                \
-  if( pBuf->nSpace<n ) {                        \
-    u8 *pNew = sqlite3_realloc(pBuf->p, n);     \
-    if( pNew==0 ){                              \
-      sqlite3_free(pBuf->p);                    \
-    }                                           \
-    pBuf->nSpace = n;                           \
-    pBuf->p = pNew;                             \
-  }                                             \
-}
-
 static void fts5PutU16(u8 *aOut, u16 iVal){
  aOut[0] = (iVal>>8);
  aOut[1] = (iVal&0xFF);
@ -617,6 +581,7 @@ static void *fts5IdxMalloc(Fts5Index *p, int nByte){
 **
 **     res = *pLeft - *pRight
 */
+#ifdef SQLITE_DEBUG
 static int fts5BufferCompareBlob(
  Fts5Buffer *pLeft,              /* Left hand side of comparison */
  const u8 *pRight, int nRight    /* Right hand side of comparison */
@ -625,7 +590,7 @@ static int fts5BufferCompareBlob(
  int res = memcmp(pLeft->p, pRight, nCmp);
  return (res==0 ? (pLeft->n - nRight) : res);
 }
-
+#endif

 /*
 ** Compare the contents of the two buffers using memcmp(). If one buffer
@ -665,11 +630,14 @@ static void fts5CloseReader(Fts5Index *p){
  }
 }

-static Fts5Data *fts5DataReadOrBuffer(
-  Fts5Index *p, 
-  Fts5Buffer *pBuf, 
-  i64 iRowid
-){
+
+/*
+** Retrieve a record from the %_data table.
+**
+** If an error occurs, NULL is returned and an error left in the 
+** Fts5Index object.
+*/
+static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
  Fts5Data *pRet = 0;
  if( p->rc==SQLITE_OK ){
    int rc = SQLITE_OK;
@ -689,8 +657,8 @@ static Fts5Data *fts5DataReadOrBuffer(
      if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
    }

-    /* If the blob handle is not yet open, open and seek it. Otherwise, use
-    ** the blob_reopen() API to reseek the existing blob handle.  */
+    /* If the blob handle is not open at this point, open it and seek 
+    ** to the requested entry.  */
    if( p->pReader==0 && rc==SQLITE_OK ){
      Fts5Config *pConfig = p->pConfig;
      rc = sqlite3_blob_open(pConfig->db, 
@ -708,22 +676,13 @@ static Fts5Data *fts5DataReadOrBuffer(
    if( rc==SQLITE_OK ){
      u8 *aOut = 0;               /* Read blob data into this buffer */
      int nByte = sqlite3_blob_bytes(p->pReader);
-      if( pBuf ){
-        fts5BufferSize(pBuf, MAX(nByte, p->pConfig->pgsz) + 20);
-        pBuf->n = nByte;
-        aOut = pBuf->p;
-        if( aOut==0 ){
-          rc = SQLITE_NOMEM;
-        }
+      int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
+      pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
+      if( pRet ){
+        pRet->n = nByte;
+        aOut = pRet->p = (u8*)&pRet[1];
      }else{
-        int nSpace = nByte + FTS5_DATA_PADDING;
-        pRet = (Fts5Data*)sqlite3_malloc(nSpace+sizeof(Fts5Data));
-        if( pRet ){
-          pRet->n = nByte;
-          aOut = pRet->p = (u8*)&pRet[1];
-        }else{
-          rc = SQLITE_NOMEM;
-        }
+        rc = SQLITE_NOMEM;
      }

      if( rc==SQLITE_OK ){
@ -738,33 +697,10 @@ static Fts5Data *fts5DataReadOrBuffer(
    p->nRead++;
  }

-  return pRet;
-}
-
-/*
-** Retrieve a record from the %_data table.
-**
-** If an error occurs, NULL is returned and an error left in the 
-** Fts5Index object.
-*/
-static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
-  Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid);
  assert( (pRet==0)==(p->rc!=SQLITE_OK) );
  return pRet;
 }

-/*
-** Read a record from the %_data table into the buffer supplied as the
-** second argument.
-**
-** If an error occurs, an error is left in the Fts5Index object. If an
-** error has already occurred when this function is called, it is a 
-** no-op.
-*/
-static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){
-  (void)fts5DataReadOrBuffer(p, pBuf, iRowid);
-}
-
 /*
 ** Release a reference to data record returned by an earlier call to
 ** fts5DataRead().
@ -1033,19 +969,18 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p){
  Fts5Config *pConfig = p->pConfig;
  Fts5Structure *pRet = 0;        /* Object to return */
  int iCookie;                    /* Configuration cookie */
+  Fts5Data *pData;
  Fts5Buffer buf = {0, 0, 0};

-  fts5DataBuffer(p, &buf, FTS5_STRUCTURE_ROWID);
-  if( buf.p==0 ) return 0;
-  assert( buf.nSpace>=(buf.n + FTS5_DATA_ZERO_PADDING) );
-  memset(&buf.p[buf.n], 0, FTS5_DATA_ZERO_PADDING);
-  p->rc = fts5StructureDecode(buf.p, buf.n, &iCookie, &pRet);
-
+  pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
+  if( p->rc ) return 0;
+  memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING);
+  p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);
  if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
    p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  }

-  fts5BufferFree(&buf);
+  fts5DataRelease(pData);
  if( p->rc!=SQLITE_OK ){
    fts5StructureRelease(pRet);
    pRet = 0;
@ -1228,62 +1163,6 @@ static void fts5StructurePromote(
 }


-/*
-** If the pIter->iOff offset currently points to an entry indicating one
-** or more term-less nodes, advance past it and set pIter->nEmpty to
-** the number of empty child nodes.
-*/
-static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){
-  if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){
-    pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01;
-    pIter->iOff++;
-    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty);
-  }else{
-    pIter->nEmpty = 0;
-    pIter->bDlidx = 0;
-  }
-}
-
-/*
-** Advance to the next entry within the node.
-*/
-static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){
-  if( pIter->iOff>=pIter->nData ){
-    pIter->aData = 0;
-    pIter->iChild += pIter->nEmpty;
-  }else{
-    int nPre, nNew;
-    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre);
-    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew);
-    pIter->term.n = nPre-2;
-    fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff);
-    pIter->iOff += nNew;
-    pIter->iChild += (1 + pIter->nEmpty);
-    fts5NodeIterGobbleNEmpty(pIter);
-    if( *pRc ) pIter->aData = 0;
-  }
-}
-
-
-/*
-** Initialize the iterator object pIter to iterate through the internal
-** segment node in pData.
-*/
-static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){
-  memset(pIter, 0, sizeof(*pIter));
-  pIter->aData = aData;
-  pIter->nData = nData;
-  pIter->iOff = fts5GetVarint32(aData, pIter->iChild);
-  fts5NodeIterGobbleNEmpty(pIter);
-}
-
-/*
-** Free any memory allocated by the iterator object.
-*/
-static void fts5NodeIterFree(Fts5NodeIter *pIter){
-  fts5BufferFree(&pIter->term);
-}
-
 /*
 ** Advance the iterator passed as the only argument. If the end of the 
 ** doclist-index page is reached, return non-zero.
@ -2041,119 +1920,6 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
  pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
 }

-#ifdef SQLITE_DEBUG
-static void fts5AssertNodeSeekOk(
-  Fts5Buffer *pNode,
-  const u8 *pTerm, int nTerm,     /* Term to search for */
-  int iExpectPg,
-  int bExpectDlidx
-){
-  int bDlidx;
-  int iPg;
-  int rc = SQLITE_OK;
-  Fts5NodeIter node;
-
-  fts5NodeIterInit(pNode->p, pNode->n, &node);
-  assert( node.term.n==0 );
-  iPg = node.iChild;
-  bDlidx = node.bDlidx;
-  for(fts5NodeIterNext(&rc, &node);
-      node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0;
-      fts5NodeIterNext(&rc, &node)
-  ){
-    iPg = node.iChild;
-    bDlidx = node.bDlidx;
-  }
-  fts5NodeIterFree(&node);
-
-  assert( rc!=SQLITE_OK || iPg==iExpectPg );
-  assert( rc!=SQLITE_OK || bDlidx==bExpectDlidx );
-}
-#else
-#define fts5AssertNodeSeekOk(v,w,x,y,z)
-#endif
-
-/*
-** Argument pNode is an internal b-tree node. This function searches
-** within the node for the largest term that is smaller than or equal
-** to (pTerm/nTerm).
-**
-** It returns the associated page number. Or, if (pTerm/nTerm) is smaller
-** than all terms within the node, the leftmost child page number. 
-**
-** Before returning, (*pbDlidx) is set to true if the last term on the
-** returned child page number has a doclist-index. Or left as is otherwise.
-*/
-static int fts5NodeSeek(
-  Fts5Buffer *pNode,              /* Node to search */
-  const u8 *pTerm, int nTerm,     /* Term to search for */
-  int *pbDlidx                    /* OUT: True if dlidx flag is set */
-){
-  int iPg;
-  u8 *pPtr = pNode->p;
-  u8 *pEnd = &pPtr[pNode->n];
-  int nMatch = 0;                 /* Number of bytes of pTerm already matched */
-  
-  assert( *pbDlidx==0 );
-
-  pPtr += fts5GetVarint32(pPtr, iPg);
-  while( pPtr<pEnd ){
-    int nEmpty = 0;
-    int nKeep;
-    int nNew;
-
-    /* If there is a "no terms" record at pPtr, read it now. Store the
-    ** number of termless pages in nEmpty. If it indicates a doclist-index, 
-    ** set (*pbDlidx) to true.*/
-    if( *pPtr<2 ){
-      *pbDlidx = (*pPtr==0x01);
-      pPtr++;
-      pPtr += fts5GetVarint32(pPtr, nEmpty);
-      if( pPtr>=pEnd ) break;
-    }
-
-    /* Read the next "term" pointer. Set nKeep to the number of bytes to
-    ** keep from the previous term, and nNew to the number of bytes of
-    ** new data that will be appended to it. */
-    nKeep = (int)*pPtr++;
-    nNew = (int)*pPtr++;
-    if( (nKeep | nNew) & 0x0080 ){
-      pPtr -= 2;
-      pPtr += fts5GetVarint32(pPtr, nKeep);
-      pPtr += fts5GetVarint32(pPtr, nNew);
-    }
-    nKeep -= 2;
-
-    /* Compare (pTerm/nTerm) to the current term on the node (the one described
-    ** by nKeep/nNew). If the node term is larger, break out of the while()
-    ** loop. 
-    **
-    ** Otherwise, if (pTerm/nTerm) is larger or the two terms are equal, 
-    ** leave variable nMatch set to the size of the largest prefix common to
-    ** both terms in bytes.  */
-    if( nKeep==nMatch ){
-      int nTst = MIN(nNew, nTerm-nMatch);
-      int i;
-      for(i=0; i<nTst; i++){
-        if( pTerm[nKeep+i]!=pPtr[i] ) break;
-      }
-      nMatch += i;
-      assert( nMatch<=nTerm );
-
-      if( i<nNew && (nMatch==nTerm || pPtr[i] > pTerm[nMatch]) ) break;
-    }else if( nKeep<nMatch ){
-      break;
-    }
-
-    iPg += 1 + nEmpty;
-    *pbDlidx = 0;
-    pPtr += nNew;
-  }
-
-  fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
-  return iPg;
-}
-
 #define fts5IndexGetVarint32(a, iOff, nVal) {     \
  nVal = a[iOff++];                               \
  if( nVal & 0x80 ){                              \
@ -2677,13 +2443,13 @@ static void fts5SegIterNextFrom(
    }
  }

-  while( p->rc==SQLITE_OK ){
+  do{
    if( bMove ) fts5SegIterNext(p, pIter, 0);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid>=iMatch ) break;
    if( bRev!=0 && pIter->iRowid<=iMatch ) break;
    bMove = 1;
-  }
+  }while( p->rc==SQLITE_OK );
 }


@ -4459,13 +4225,9 @@ int sqlite3Fts5IndexRollback(Fts5Index *p){
 */
 int sqlite3Fts5IndexReinit(Fts5Index *p){
  Fts5Structure s;
-
-  assert( p->rc==SQLITE_OK );
-  p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0);
-
  memset(&s, 0, sizeof(Fts5Structure));
+  fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
  fts5StructureWrite(p, &s);
-
  return fts5IndexReturn(p);
 }

@ -4787,13 +4549,28 @@ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
 }

 /*
-** Read the "averages" record into the buffer supplied as the second 
-** argument. Return SQLITE_OK if successful, or an SQLite error code
-** if an error occurs.
+** Read and decode the "averages" record from the database. 
+**
+** Parameter anSize must point to an array of size nCol, where nCol is
+** the number of user defined columns in the FTS table.
 */
-int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){
-  assert( p->rc==SQLITE_OK );
-  fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID);
+int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
+  int nCol = p->pConfig->nCol;
+  Fts5Data *pData;
+
+  *pnRow = 0;
+  memset(anSize, 0, sizeof(i64) * nCol);
+  pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
+  if( p->rc==SQLITE_OK && pData->n ){
+    int i = 0;
+    int iCol;
+    i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
+    for(iCol=0; i<pData->n && iCol<nCol; iCol++){
+      i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
+    }
+  }
+
+  fts5DataRelease(pData);
  return fts5IndexReturn(p);
 }

@ -5327,13 +5104,13 @@ static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){

  if( iSegid==0 ){
    if( iKey==FTS5_AVERAGES_ROWID ){
-      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) ");
+      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
    }else{
-      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)");
+      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
    }
  }
  else{
-    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)",
+    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
        bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
    );
  }
@ -5487,73 +5264,53 @@ static void fts5DecodeFunction(
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{
-
    Fts5Buffer term;
+    int iTermOff = 0;
+    int iRowidOff = 0;
+    int iOff;
+    int nKeep = 0;
+
    memset(&term, 0, sizeof(Fts5Buffer));

-    if( iHeight==0 ){
-      int iTermOff = 0;
-      int iRowidOff = 0;
-      int iOff;
-      int nKeep = 0;
-
-      if( n>=4 ){
-        iRowidOff = fts5GetU16(&a[0]);
-        iTermOff = fts5GetU16(&a[2]);
-      }else{
-        sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
-        goto decode_out;
-      }
-
-      if( iRowidOff ){
-        iOff = iRowidOff;
-      }else if( iTermOff ){
-        iOff = iTermOff;
-      }else{
-        iOff = n;
-      }
-      fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
-
-      assert( iRowidOff==0 || iOff==iRowidOff );
-      if( iRowidOff ){
-        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
-      }
-
-      assert( iTermOff==0 || iOff==iTermOff );
-      while( iOff<n ){
-        int nByte;
-        iOff += fts5GetVarint32(&a[iOff], nByte);
-        term.n= nKeep;
-        fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
-        iOff += nByte;
-
-        sqlite3Fts5BufferAppendPrintf(
-            &rc, &s, " term=%.*s", term.n, (const char*)term.p
-        );
-        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
-        if( iOff<n ){
-          iOff += fts5GetVarint32(&a[iOff], nKeep);
-        }
-      }
-      fts5BufferFree(&term);
+    if( n>=4 ){
+      iRowidOff = fts5GetU16(&a[0]);
+      iTermOff = fts5GetU16(&a[2]);
    }else{
-      Fts5NodeIter ss;
-      for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){
-        if( ss.term.n==0 ){
-          sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild);
-        }else{
-          sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", 
-              ss.term.n, ss.term.p
-          );
-        }
-        if( ss.nEmpty ){
-          sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty,
-              ss.bDlidx ? "*" : ""
-          );
-        }
-      }
-      fts5NodeIterFree(&ss);
+      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
+      goto decode_out;
    }
+
+    if( iRowidOff ){
+      iOff = iRowidOff;
+    }else if( iTermOff ){
+      iOff = iTermOff;
+    }else{
+      iOff = n;
+    }
+    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
+
+    assert( iRowidOff==0 || iOff==iRowidOff );
+    if( iRowidOff ){
+      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
+    }
+
+    assert( iTermOff==0 || iOff==iTermOff );
+    while( iOff<n ){
+      int nByte;
+      iOff += fts5GetVarint32(&a[iOff], nByte);
+      term.n= nKeep;
+      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
+      iOff += nByte;
+
+      sqlite3Fts5BufferAppendPrintf(
+          &rc, &s, " term=%.*s", term.n, (const char*)term.p
+          );
+      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
+      if( iOff<n ){
+        iOff += fts5GetVarint32(&a[iOff], nKeep);
+      }
+    }
+    fts5BufferFree(&term);
  }
  
 decode_out:
--- a/ext/fts5/fts5_main.c
+++ b/ext/fts5/fts5_main.c
@ -1498,11 +1498,13 @@ static int fts5ApiTokenize(
  Fts5Context *pCtx, 
  const char *pText, int nText, 
  void *pUserData,
-  int (*xToken)(void*, const char*, int, int, int)
+  int (*xToken)(void*, int, const char*, int, int, int)
 ){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
-  return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken);
+  return sqlite3Fts5Tokenize(
+      pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
+  );
 }

 static int fts5ApiPhraseCount(Fts5Context *pCtx){
@ -1655,13 +1657,16 @@ static int fts5ApiColumnText(

 static int fts5ColumnSizeCb(
  void *pContext,                 /* Pointer to int */
+  int tflags,
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
 ){
  int *pCnt = (int*)pContext;
-  *pCnt = *pCnt + 1;
+  if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
+    (*pCnt)++;
+  }
  return SQLITE_OK;
 }

@ -1691,7 +1696,9 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
          pCsr->aColumnSize[i] = 0;
          rc = fts5ApiColumnText(pCtx, i, &z, &n);
          if( rc==SQLITE_OK ){
-            rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb);
+            rc = sqlite3Fts5Tokenize(
+                pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
+            );
          }
        }
      }
@ -1853,7 +1860,7 @@ static int fts5ApiQueryPhrase(
    pNew->iFirstRowid = SMALLEST_INT64;
    pNew->iLastRowid = LARGEST_INT64;
    pNew->base.pVtab = (sqlite3_vtab*)pTab;
-    rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
+    rc = sqlite3Fts5ExprClonePhrase(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
  }

  if( rc==SQLITE_OK ){
@ -2344,7 +2351,7 @@ int sqlite3_fts5_init(
    void *p = (void*)pGlobal;
    memset(pGlobal, 0, sizeof(Fts5Global));
    pGlobal->db = db;
-    pGlobal->api.iVersion = 1;
+    pGlobal->api.iVersion = 2;
    pGlobal->api.xCreateFunction = fts5CreateAux;
    pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
    pGlobal->api.xFindTokenizer = fts5FindTokenizer;
--- a/ext/fts5/fts5_storage.c
+++ b/ext/fts5/fts5_storage.c
@ -359,6 +359,7 @@ struct Fts5InsertCtx {
 */
 static int fts5StorageInsertCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
+  int tflags,
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
@ -366,8 +367,10 @@ static int fts5StorageInsertCallback(
 ){
  Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
  Fts5Index *pIdx = pCtx->pStorage->pIndex;
-  int iPos = pCtx->szCol++;
-  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
+  if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
+    pCtx->szCol++;
+  }
+  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
 }

 /*
@ -394,6 +397,7 @@ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){
        if( pConfig->abUnindexed[iCol-1] ) continue;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(pConfig, 
+            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pSeek, iCol),
            sqlite3_column_bytes(pSeek, iCol),
            (void*)&ctx,
@ -451,22 +455,7 @@ static int fts5StorageInsertDocsize(
 static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
  int rc = SQLITE_OK;
  if( p->bTotalsValid==0 ){
-    int nCol = p->pConfig->nCol;
-    Fts5Buffer buf;
-    memset(&buf, 0, sizeof(buf));
-
-    memset(p->aTotalSize, 0, sizeof(i64) * nCol);
-    p->nTotalRow = 0;
-    rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf);
-    if( rc==SQLITE_OK && buf.n ){
-      int i = 0;
-      int iCol;
-      i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow);
-      for(iCol=0; i<buf.n && iCol<nCol; iCol++){
-        i += fts5GetVarint(&buf.p[i], (u64*)&p->aTotalSize[iCol]);
-      }
-    }
-    sqlite3_free(buf.p);
+    rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);
    p->bTotalsValid = bCache;
  }
  return rc;
@ -565,6 +554,7 @@ int sqlite3Fts5StorageSpecialDelete(
      if( pConfig->abUnindexed[iCol] ) continue;
      ctx.szCol = 0;
      rc = sqlite3Fts5Tokenize(pConfig, 
+        FTS5_TOKENIZE_DOCUMENT,
        (const char*)sqlite3_value_text(apVal[iCol]),
        sqlite3_value_bytes(apVal[iCol]),
        (void*)&ctx,
@ -654,6 +644,7 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){
      ctx.szCol = 0;
      if( pConfig->abUnindexed[ctx.iCol]==0 ){
        rc = sqlite3Fts5Tokenize(pConfig, 
+            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
            sqlite3_column_bytes(pScan, ctx.iCol+1),
            (void*)&ctx,
@ -771,6 +762,7 @@ int sqlite3Fts5StorageInsert(
    ctx.szCol = 0;
    if( pConfig->abUnindexed[ctx.iCol]==0 ){
      rc = sqlite3Fts5Tokenize(pConfig, 
+          FTS5_TOKENIZE_DOCUMENT,
          (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
          sqlite3_value_bytes(apVal[ctx.iCol+2]),
          (void*)&ctx,
@ -838,15 +830,18 @@ struct Fts5IntegrityCtx {
 */
 static int fts5StorageIntegrityCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
+  int tflags,
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
 ){
  Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
-  int iPos = pCtx->szCol++;
+  if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
+    pCtx->szCol++;
+  }
  pCtx->cksum ^= sqlite3Fts5IndexCksum(
-      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
+      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
  );
  return SQLITE_OK;
 }
@ -881,19 +876,23 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
      int i;
      ctx.iRowid = sqlite3_column_int64(pScan, 0);
      ctx.szCol = 0;
-      rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
+      if( pConfig->bColumnsize ){
+        rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
+      }
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i] ) continue;
        ctx.iCol = i;
        ctx.szCol = 0;
-        rc = sqlite3Fts5Tokenize(
-            pConfig, 
+        rc = sqlite3Fts5Tokenize(pConfig, 
+            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pScan, i+1),
            sqlite3_column_bytes(pScan, i+1),
            (void*)&ctx,
            fts5StorageIntegrityCallback
        );
-        if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
+        if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
+          rc = FTS5_CORRUPT;
+        }
        aTotalSize[i] += ctx.szCol;
      }
      if( rc!=SQLITE_OK ) break;
@ -918,7 +917,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
    rc = fts5StorageCount(p, "content", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }
-  if( rc==SQLITE_OK ){
+  if( rc==SQLITE_OK && pConfig->bColumnsize ){
    i64 nRow;
    rc = fts5StorageCount(p, "docsize", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
@ -1002,9 +1001,12 @@ static int fts5StorageDecodeSizeArray(
 ** otherwise.
 */
 int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
-  int nCol = p->pConfig->nCol;
-  sqlite3_stmt *pLookup = 0;
-  int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
+  int nCol = p->pConfig->nCol;    /* Number of user columns in table */
+  sqlite3_stmt *pLookup = 0;      /* Statement to query %_docsize */
+  int rc;                         /* Return Code */
+
+  assert( p->pConfig->bColumnsize );
+  rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
  if( rc==SQLITE_OK ){
    int bCorrupt = 1;
    sqlite3_bind_int64(pLookup, 1, iRowid);
--- a/ext/fts5/fts5_tcl.c
+++ b/ext/fts5/fts5_tcl.c
@ -141,6 +141,7 @@ struct F5tAuxData {

 static int xTokenizeCb(
  void *pCtx, 
+  int tflags,
  const char *zToken, int nToken, 
  int iStart, int iEnd
 ){
@ -584,6 +585,7 @@ struct F5tTokenizeCtx {

 static int xTokenizeCb2(
  void *pCtx, 
+  int tflags,
  const char *zToken, int nToken, 
  int iStart, int iEnd
 ){
@ -666,7 +668,9 @@ static int f5tTokenize(
  ctx.bSubst = (objc==5);
  ctx.pRet = pRet;
  ctx.zInput = zText;
-  rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);
+  rc = tokenizer.xTokenize(
+      pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2
+  );
  tokenizer.xDelete(pTok);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
@ -688,11 +692,11 @@ static int f5tTokenize(
 typedef struct F5tTokenizerContext F5tTokenizerContext;
 typedef struct F5tTokenizerCb F5tTokenizerCb;
 typedef struct F5tTokenizerModule F5tTokenizerModule;
-typedef struct F5tTokenizerModule F5tTokenizerInstance;
+typedef struct F5tTokenizerInstance F5tTokenizerInstance;

 struct F5tTokenizerContext {
  void *pCtx;
-  int (*xToken)(void*, const char*, int, int, int);
+  int (*xToken)(void*, int, const char*, int, int, int);
 };

 struct F5tTokenizerModule {
@ -701,6 +705,12 @@ struct F5tTokenizerModule {
  F5tTokenizerContext *pContext;
 };

+struct F5tTokenizerInstance {
+  Tcl_Interp *interp;
+  Tcl_Obj *pScript;
+  F5tTokenizerContext *pContext;
+};
+
 static int f5tTokenizerCreate(
  void *pCtx, 
  const char **azArg, 
@ -748,26 +758,53 @@ static void f5tTokenizerDelete(Fts5Tokenizer *p){
 static int f5tTokenizerTokenize(
  Fts5Tokenizer *p, 
  void *pCtx,
+  int flags,
  const char *pText, int nText, 
-  int (*xToken)(void*, const char*, int, int, int)
+  int (*xToken)(void*, int, const char*, int, int, int)
 ){
  F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
  void *pOldCtx;
-  int (*xOldToken)(void*, const char*, int, int, int);
+  int (*xOldToken)(void*, int, const char*, int, int, int);
  Tcl_Obj *pEval;
  int rc;
+  const char *zFlags;

  pOldCtx = pInst->pContext->pCtx;
  xOldToken = pInst->pContext->xToken;

+  pInst->pContext->pCtx = pCtx;
+  pInst->pContext->xToken = xToken;
+
+  assert( 
+      flags==FTS5_TOKENIZE_DOCUMENT
+   || flags==FTS5_TOKENIZE_AUX
+   || flags==FTS5_TOKENIZE_QUERY
+   || flags==(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)
+  );
  pEval = Tcl_DuplicateObj(pInst->pScript);
  Tcl_IncrRefCount(pEval);
-  rc = Tcl_ListObjAppendElement(
-      pInst->interp, pEval, Tcl_NewStringObj(pText, nText)
-  );
-  if( rc==TCL_OK ){
-    rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
+  switch( flags ){
+    case FTS5_TOKENIZE_DOCUMENT:
+      zFlags = "document";
+      break;
+    case FTS5_TOKENIZE_AUX:
+      zFlags = "aux";
+      break;
+    case FTS5_TOKENIZE_QUERY:
+      zFlags = "query";
+      break;
+    case (FTS5_TOKENIZE_PREFIX | FTS5_TOKENIZE_QUERY):
+      zFlags = "prefixquery";
+      break;
+    default:
+      assert( 0 );
+      zFlags = "invalid";
+      break;
  }
+
+  Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(zFlags, -1));
+  Tcl_ListObjAppendElement(pInst->interp, pEval, Tcl_NewStringObj(pText,nText));
+  rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
  Tcl_DecrRefCount(pEval);

  pInst->pContext->pCtx = pOldCtx;
@ -776,7 +813,7 @@ static int f5tTokenizerTokenize(
 }

 /*
-** sqlite3_fts5_token TEXT START END POS
+** sqlite3_fts5_token ?-colocated? TEXT START END
 */
 static int f5tTokenizerReturn(
  void * clientData,
@ -788,14 +825,29 @@ static int f5tTokenizerReturn(
  int iStart;
  int iEnd;
  int nToken;
+  int tflags = 0;
  char *zToken;
  int rc;

-  assert( p );
-  if( objc!=4 ){
-    Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END");
+  if( objc==5 ){
+    int nArg;
+    char *zArg = Tcl_GetStringFromObj(objv[1], &nArg);
+    if( nArg<=10 && nArg>=2 && memcmp("-colocated", zArg, nArg)==0 ){
+      tflags |= FTS5_TOKEN_COLOCATED;
+    }else{
+      goto usage;
+    }
+  }else if( objc!=4 ){
+    goto usage;
+  }
+
+  zToken = Tcl_GetStringFromObj(objv[objc-3], &nToken);
+  if( Tcl_GetIntFromObj(interp, objv[objc-2], &iStart) 
+   || Tcl_GetIntFromObj(interp, objv[objc-1], &iEnd) 
+  ){
    return TCL_ERROR;
  }
+
  if( p->xToken==0 ){
    Tcl_AppendResult(interp, 
        "sqlite3_fts5_token may only be used by tokenizer callback", 0
@ -803,16 +855,13 @@ static int f5tTokenizerReturn(
    return TCL_ERROR;
  }

-  zToken = Tcl_GetStringFromObj(objv[1], &nToken);
-  if( Tcl_GetIntFromObj(interp, objv[2], &iStart) 
-   || Tcl_GetIntFromObj(interp, objv[3], &iEnd) 
-  ){
-    return TCL_ERROR;
-  }
-
-  rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
+  rc = p->xToken(p->pCtx, tflags, zToken, nToken, iStart, iEnd);
  Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
  return TCL_OK;
+
+ usage:
+  Tcl_WrongNumArgs(interp, 1, objv, "?-colocated? TEXT START END");
+  return TCL_ERROR;
 }

 static void f5tDelTokenizer(void *pCtx){
--- a/ext/fts5/fts5_test_mi.c
+++ b/ext/fts5/fts5_test_mi.c
@ -352,7 +352,7 @@ static void fts5MatchinfoFunc(
 ){
  const char *zArg;
  Fts5MatchinfoCtx *p;
-  int rc;
+  int rc = SQLITE_OK;

  if( nVal>0 ){
    zArg = (const char*)sqlite3_value_text(apVal[0]);
@ -363,11 +363,16 @@ static void fts5MatchinfoFunc(
  p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
    p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
-    pApi->xSetAuxdata(pFts, p, sqlite3_free);
-    if( p==0 ) return;
+    if( p==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
+    }
  }

-  rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
+  if( rc==SQLITE_OK ){
+    rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
+  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
--- a/ext/fts5/fts5_tokenize.c
+++ b/ext/fts5/fts5_tokenize.c
@ -116,8 +116,9 @@ static void asciiFold(char *aOut, const char *aIn, int nByte){
 static int fts5AsciiTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
+  int flags,
  const char *pText, int nText,
-  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
+  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
 ){
  AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
  int rc = SQLITE_OK;
@ -158,7 +159,7 @@ static int fts5AsciiTokenize(
    asciiFold(pFold, &pText[is], nByte);

    /* Invoke the token callback */
-    rc = xToken(pCtx, pFold, nByte, is, ie);
+    rc = xToken(pCtx, 0, pFold, nByte, is, ie);
    is = ie+1;
  }
  
@ -385,8 +386,9 @@ static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
 static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
+  int flags,
  const char *pText, int nText,
-  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
+  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
 ){
  Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
  int rc = SQLITE_OK;
@ -475,7 +477,7 @@ static int fts5UnicodeTokenize(
    }

    /* Invoke the token callback */
-    rc = xToken(pCtx, aFold, zOut-aFold, is, ie);
+    rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); 
  }
  
 tokenize_done:
@ -553,7 +555,7 @@ static int fts5PorterCreate(
 typedef struct PorterContext PorterContext;
 struct PorterContext {
  void *pCtx;
-  int (*xToken)(void*, const char*, int, int, int);
+  int (*xToken)(void*, int, const char*, int, int, int);
  char *aBuf;
 };

@ -1118,6 +1120,7 @@ static void fts5PorterStep1A(char *aBuf, int *pnBuf){

 static int fts5PorterCb(
  void *pCtx, 
+  int tflags,
  const char *pToken, 
  int nToken, 
  int iStart, 
@ -1175,10 +1178,10 @@ static int fts5PorterCb(
    nBuf--;
  }

-  return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd);
+  return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);

 pass_through:
-  return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd);
+  return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
 }

 /*
@ -1187,8 +1190,9 @@ static int fts5PorterCb(
 static int fts5PorterTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
+  int flags,
  const char *pText, int nText,
-  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
+  int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
 ){
  PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
  PorterContext sCtx;
@ -1196,7 +1200,7 @@ static int fts5PorterTokenize(
  sCtx.pCtx = pCtx;
  sCtx.aBuf = p->aBuf;
  return p->tokenizer.xTokenize(
-      p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb
+      p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
  );
 }

@ -1225,7 +1229,7 @@ int sqlite3Fts5TokenizerInit(fts5_api *pApi){
    );
  }

-  return SQLITE_OK;
+  return rc;
 }


--- a/ext/fts5/test/fts5_common.tcl
+++ b/ext/fts5/test/fts5_common.tcl
@ -295,3 +295,36 @@ proc NOT {a b} {
  return $a
 }

+#-------------------------------------------------------------------------
+# This command is similar to [split], except that it also provides the
+# start and end offsets of each token. For example:
+#
+#   [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
+#
+
+proc gobble_whitespace {textvar} {
+  upvar $textvar t
+  regexp {([ ]*)(.*)} $t -> space t
+  return [string length $space]
+}
+
+proc gobble_text {textvar wordvar} {
+  upvar $textvar t
+  upvar $wordvar w
+  regexp {([^ ]*)(.*)} $t -> w t
+  return [string length $w]
+}
+
+proc fts5_tokenize_split {text} {
+  set token ""
+  set ret [list]
+  set iOff [gobble_whitespace text]
+  while {[set nToken [gobble_text text word]]} {
+    lappend ret $word $iOff [expr $iOff+$nToken]
+    incr iOff $nToken
+    incr iOff [gobble_whitespace text]
+  }
+
+  set ret
+}
+
--- a/ext/fts5/test/fts5aa.test
+++ b/ext/fts5/test/fts5aa.test
@ -51,7 +51,7 @@ do_execsql_test 2.1 {

 do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
-} {/{\(structure\) {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}
+} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}

 foreach w {a b c d e f} {
  do_execsql_test 2.3.$w.asc {
@ -343,7 +343,7 @@ do_execsql_test 13.5 {
 } {1}

 do_execsql_test 13.6 {
-  SELECT rowid FROM t1 WHERE t1 MATCH '.';
+  SELECT rowid FROM t1 WHERE t1 MATCH '""';
 } {}

 #-------------------------------------------------------------------------
@ -506,6 +506,36 @@ do_execsql_test 18.3 {
  SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
 } {1 1}

+#--------------------------------------------------------------------
+# fts5 table in the temp schema.
+#
+reset_db
+do_execsql_test 19.0 {
+  CREATE VIRTUAL TABLE temp.t1 USING fts5(x);
+  INSERT INTO t1 VALUES('x y z');
+  INSERT INTO t1 VALUES('w x 1');
+  SELECT rowid FROM t1 WHERE t1 MATCH 'x';
+} {1 2}
+
+#--------------------------------------------------------------------
+# Test that 6 and 7 byte varints can be read.
+#
+reset_db
+do_execsql_test 20.0 {
+  CREATE VIRTUAL TABLE temp.tmp USING fts5(x);
+}
+set ::ids [list \
+  0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43]
+]
+do_test 20.1 {
+  foreach id $::ids {
+    execsql { INSERT INTO tmp(rowid, x) VALUES($id, 'x y z') }
+  }
+  execsql { SELECT rowid FROM tmp WHERE tmp MATCH 'y' }
+} $::ids
+
+
+
 finish_test


--- a/ext/fts5/test/fts5ah.test
+++ b/ext/fts5/test/fts5ah.test
@ -90,13 +90,13 @@ foreach {tn q res} "

  do_test 1.6.$tn.1 {
    set n [execsql_reads $q]
-    puts -nonewline "(n=$n nReadX=$nReadX)"
+    #puts -nonewline "(n=$n nReadX=$nReadX)"
    expr {$n < ($nReadX / 8)}
  } {1}

  do_test 1.6.$tn.2 {
    set n [execsql_reads "$q ORDER BY rowid DESC"]
-    puts -nonewline "(n=$n nReadX=$nReadX)"
+    #puts -nonewline "(n=$n nReadX=$nReadX)"
    expr {$n < ($nReadX / 8)}
  } {1}

--- a/ext/fts5/test/fts5columnsize.test
+++ b/ext/fts5/test/fts5columnsize.test
@ -134,5 +134,18 @@ do_execsql_test 3.2.1 {
  1 {-1 0 -1} 2 {-1 0 -1}
 }

+#-------------------------------------------------------------------------
+# Test the integrity-check
+#
+do_execsql_test 4.1.1 {
+  CREATE VIRTUAL TABLE t5 USING fts5(x, columnsize=0);
+  INSERT INTO t5 VALUES('1 2 3 4');
+  INSERT INTO t5 VALUES('2 4 6 8');
+}
+
+breakpoint
+do_execsql_test 4.1.2 {
+  INSERT INTO t5(t5) VALUES('integrity-check');
+}

 finish_test
--- a/ext/fts5/test/fts5ea.test
+++ b/ext/fts5/test/fts5ea.test
@ -87,6 +87,12 @@ do_execsql_test 4.0 {
  SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
 } {{"a" AND """"}}

+#-------------------------------------------------------------------------
+# Experiment with a tokenizer that considers " to be a token character.
+#
+do_catchsql_test 5.0 {
+  SELECT fts5_expr('abc | def');
+} {1 {fts5: syntax error near "|"}}



--- a/ext/fts5/test/fts5eb.test
+++ b/ext/fts5/test/fts5eb.test
@ -30,18 +30,18 @@ proc do_syntax_test {tn expr res} {
 }

 foreach {tn expr res} {
-  1  {abc}                           {"abc"}
-  2  {abc .}                         {"abc"}
-  3  {.}                             {}
-  4  {abc OR .}                      {"abc"}
-  5  {abc NOT .}                     {"abc"}
-  6  {abc AND .}                     {"abc"}
-  7  {. OR abc}                      {"abc"}
-  8  {. NOT abc}                     {"abc"}
-  9  {. AND abc}                     {"abc"}
-  10 {abc + . + def}                 {"abc" + "def"}
-  11 {abc . def}                     {"abc" AND "def"}
-  12 {r+e OR w}                      {"r" + "e" OR "w"}
+  1  {abc}                            {"abc"}
+  2  {abc ""}                         {"abc"}
+  3  {""}                             {}
+  4  {abc OR ""}                      {"abc"}
+  5  {abc NOT ""}                     {"abc"}
+  6  {abc AND ""}                     {"abc"}
+  7  {"" OR abc}                      {"abc"}
+  8  {"" NOT abc}                     {"abc"}
+  9  {"" AND abc}                     {"abc"}
+  10 {abc + "" + def}                 {"abc" + "def"}
+  11 {abc "" def}                     {"abc" AND "def"}
+  12 {r+e OR w}                       {"r" + "e" OR "w"}
 } {
  do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
 }
--- a/ext/fts5/test/fts5fault6.test
+++ b/ext/fts5/test/fts5fault6.test
@ -22,6 +22,7 @@ ifcapable !fts5 {
  return
 }

+
 #-------------------------------------------------------------------------
 # OOM while rebuilding an FTS5 table.
 #
@ -148,5 +149,149 @@ do_faultsim_test 4.1 -faults oom-t* -prep {
  faultsim_test_result {0 {}}
 }

+#-------------------------------------------------------------------------
+#
+# 5.2.* OOM while running a query that includes synonyms and matchinfo().
+#
+# 5.3.* OOM while running a query that returns a row containing instances
+#       of more than 4 synonyms for a single term.
+#
+proc mit {blob} {
+  set scan(littleEndian) i*
+  set scan(bigEndian) I*
+  binary scan $blob $scan($::tcl_platform(byteOrder)) r
+  return $r
+}
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+    if {$tflags=="query" && [string length $w]==1} {
+      for {set i 2} {$i < 7} {incr i} {
+        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
+      }
+    }
+  }
+}
+proc tcl_create {args} { return "tcl_tokenize" }
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+db func mit mit
+sqlite3_fts5_register_matchinfo db
+do_test 5.0 {
+  execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl) }
+  execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 32) }
+  foreach {rowid text} {
+    1 {aaaa cc b aaaaa cc aa} 
+    2 {aa aa bb a bbb}
+    3 {bb aaaaa aaaaa b aaaa aaaaa}
+    4 {aa a b aaaa aa}
+    5 {aa b ccc aaaaa cc}
+    6 {aa aaaaa bbbb cc aaa}
+    7 {aaaaa aa aa ccccc bb}
+    8 {ccc bbbbb ccccc bbb c}
+    9 {cccccc bbbb a aaa cccc c}
+
+    20 {ddd f ddd eeeee fff ffff eeee ddd fff eeeee dddddd eeee}
+    21 {fffff eee dddd fffff dd ee ee eeeee eee eeeeee ee dd e}
+    22 {fffff d eeee dddd fffff dddddd ffff ddddd eeeee ee eee dddd ddddd}
+    23 {ddddd fff ddd eeeee ffff eeee ddd ff ff ffffff eeeeee dddd ffffff}
+    24 {eee dd ee dddd dddd eeeeee e eee fff ffff}
+    25 {ddddd ffffff dddddd fff ddd ddddd ddd f eeee fff dddd f}
+    26 {f ffff fff fff eeeeee dddd d dddddd ddddd eee ff eeeee}
+    27 {eee fff dddddd eeeee eeeee dddd ddddd ffff f eeeee eee dddddd ddddd d}
+    28 {dd ddddd d ddd d fff d dddd ee dddd ee ddd dddddd dddddd}
+    29 {eeee dddd ee dddd eeee dddd dd fffff f ddd eeeee ddd ee}
+    30 {ff ffffff eeeeee eeeee eee ffffff ff ffff f fffff eeeee}
+    31 {fffff eeeeee dddd eeee eeee eeeeee eee fffff d ddddd ffffff ffff dddddd}
+    32 {dddddd fffff ee eeeeee eeee ee fff dddd fff eeee ffffff eeeeee ffffff}
+    33 {ddddd eeee dd ffff dddddd fff eeee ddddd ffff eeee ddd}
+    34 {ee dddd ddddd dddddd eeee eeeeee f dd ee dddddd ffffff}
+    35 {ee dddd dd eeeeee ddddd eee d eeeeee dddddd eee dddd fffff}
+    36 {eee ffffff ffffff e fffff eeeee ff dddddd dddddd fff}
+    37 {eeeee fffff dddddd dddd ffffff fff f dd ee dd dd eeeee}
+    38 {eeeeee ee d ff eeeeee eeeeee eee eeeee ee ffffff dddd eeee dddddd ee}
+    39 {eeeeee ddd fffff e dddd ee eee eee ffffff ee f d dddd}
+    40 {ffffff dddddd eee ee ffffff eee eeee ddddd ee eeeeee f}
+    41 {ddd ddd fff fffff ee fffff f fff ddddd fffff}
+    42 {dddd ee ff d f ffffff fff ffffff ff dd dddddd f eeee}
+    43 {d dd fff fffff d f fff e dddd ee ee}
+    44 {ff ffff eee ddd d dd ffff dddd d eeee d eeeeee}
+    45 {eeee f eeeee ee e ffff f ddd e fff}
+    46 {ffff d ffff eeee ffff eeeee f ffff ddddd eee}
+    47 {dd dd dddddd ddddd fffff dddddd ddd ddddd eeeeee ffff eeee eee ee}
+    48 {ffff ffff e dddd ffffff dd dd dddd f fffff}
+    49 {ffffff d dddddd ffff eeeee f ffff ffff d dd fffff eeeee}
+
+    50 {x e}
+  } {
+    execsql { INSERT INTO t1(rowid, a) VALUES($rowid, $text) }
+  }
+} {}
+
+set res [list {*}{
+  1 {3 24 8 2 12 6}
+  5 {2 24 8 2 12 6}
+  6 {3 24 8 1 12 6}
+  7 {3 24 8 1 12 6}
+  9 {2 24 8 3 12 6}
+}]
+do_execsql_test 5.1.1 {
+  SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
+} $res
+do_execsql_test 5.1.2 {
+  SELECT count(*) FROM t1 WHERE t1 MATCH 'd e f'
+} 29
+
+faultsim_save_and_close
+do_faultsim_test 5.2 -faults oom* -prep {
+  faultsim_restore_and_reopen
+  sqlite3_fts5_create_tokenizer db tcl tcl_create
+  sqlite3_fts5_register_matchinfo db
+  db func mit mit
+} -body {
+  db eval { 
+    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH 'a AND c'
+  }
+} -test {
+  faultsim_test_result [list 0 $::res]
+}
+
+do_faultsim_test 5.3 -faults oom* -prep {
+  faultsim_restore_and_reopen
+  sqlite3_fts5_create_tokenizer db tcl tcl_create
+} -body {
+  db eval { 
+    SELECT count(*) FROM t1 WHERE t1 MATCH 'd AND e AND f'
+  }
+} -test {
+  faultsim_test_result {0 29}
+}
+
+do_faultsim_test 5.4 -faults oom* -prep {
+  faultsim_restore_and_reopen
+  sqlite3_fts5_create_tokenizer db tcl tcl_create
+} -body {
+  db eval { 
+    SELECT count(*) FROM t1 WHERE t1 MATCH 'x + e'
+  }
+} -test {
+  faultsim_test_result {0 1}
+}
+
+#-------------------------------------------------------------------------
+catch { db close }
+breakpoint
+do_faultsim_test 6 -faults oom* -prep {
+  sqlite_orig db test.db
+  sqlite3_db_config_lookaside db 0 0 0
+} -body {
+  load_static_extension db fts5
+} -test {
+  faultsim_test_result {0 {}} {1 {initialization of fts5 failed: }}
+  if {$testrc==0} {
+    db eval { CREATE VIRTUAL TABLE temp.t1 USING fts5(x) }
+  }
+  db close
+}
 finish_test

--- a/ext/fts5/test/fts5fault7.test
+++ b/ext/fts5/test/fts5fault7.test
@ -0,0 +1,45 @@
+# 2015 September 3
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#*************************************************************************
+#
+# This file is focused on OOM errors.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+source $testdir/malloc_common.tcl
+set testprefix fts5fault2
+
+# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+ifcapable !fts5 {
+  finish_test
+  return
+}
+
+#-------------------------------------------------------------------------
+# Test fault-injection on a query that uses xColumnSize() on columnsize=0
+# table.
+#
+do_execsql_test 1.0 {
+  CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0);
+  INSERT INTO t1 VALUES('a b c d e f g');
+  INSERT INTO t1 VALUES('a b c d');
+  INSERT INTO t1 VALUES('a b c d e f g h i j');
+}
+
+
+fts5_aux_test_functions db
+do_faultsim_test 1 -faults oom* -body {
+  execsql { SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH 'b' }
+} -test {
+  faultsim_test_result {0 {7 4 10}} {1 SQLITE_NOMEM}
+}
+
+finish_test
+
--- a/ext/fts5/test/fts5matchinfo.test
+++ b/ext/fts5/test/fts5matchinfo.test
@ -355,10 +355,10 @@ do_execsql_test 10.1 {
 #---------------------------------------------------------------------------
 # Test the 'y' matchinfo flag
 #
-set sqlite_fts3_enable_parentheses 1
 reset_db
+sqlite3_fts5_register_matchinfo db
 do_execsql_test 11.0 {
-  CREATE VIRTUAL TABLE tt USING fts3(x, y);
+  CREATE VIRTUAL TABLE tt USING fts5(x, y);
  INSERT INTO tt VALUES('c d a c d d', 'e a g b d a');   -- 1
  INSERT INTO tt VALUES('c c g a e b', 'c g d g e c');   -- 2
  INSERT INTO tt VALUES('b e f d e g', 'b a c b c g');   -- 3
@ -432,19 +432,18 @@ foreach {tn expr res} {
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2
 }
-set sqlite_fts3_enable_parentheses 0

 #---------------------------------------------------------------------------
 # Test the 'b' matchinfo flag
 #
-set sqlite_fts3_enable_parentheses 1
 reset_db
+sqlite3_fts5_register_matchinfo db
 db func mit mit

 do_test 12.0 {
  set cols [list]
  for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
-  execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])"
+  execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])"
 } {}

 do_execsql_test 12.1 {
@ -452,6 +451,5 @@ do_execsql_test 12.1 {
  SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
 } [list [list [expr 1<<4] [expr 1<<(45-32)]]]

-set sqlite_fts3_enable_parentheses 0
 finish_test

--- a/ext/fts5/test/fts5synonym.test
+++ b/ext/fts5/test/fts5synonym.test
@ -0,0 +1,460 @@
+# 2014 Dec 20
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on custom tokenizers that support synonyms.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5synonym
+
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+  finish_test
+  return
+}
+
+foreach S {
+  {zero 0}
+  {one 1 i}
+  {two 2 ii}
+  {three 3 iii}
+  {four 4 iv}
+  {five 5 v}
+  {six 6 vi}
+  {seven 7 vii}
+  {eight 8 viii}
+  {nine 9 ix}
+} {
+  foreach s $S {
+    set o [list]
+    foreach x $S {if {$x!=$s} {lappend o $x}}
+    set ::syn($s) $o
+  }
+}
+
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+  }
+}
+
+proc tcl_create {args} {
+  return "tcl_tokenize"
+}
+
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+
+#-------------------------------------------------------------------------
+# Warm body test for the code in fts5_tcl.c.
+#
+do_execsql_test 1.0 {
+  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
+  INSERT INTO ft VALUES('abc def ghi');
+  INSERT INTO ft VALUES('jkl mno pqr');
+  SELECT rowid, x FROM ft WHERE ft MATCH 'def';
+  SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
+} {1 {abc def ghi} {jkl mno pqr} 2}
+
+#-------------------------------------------------------------------------
+# Test a tokenizer that supports synonyms by adding extra entries to the
+# FTS index.
+#
+
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+    if {$tflags=="document" && [info exists ::syn($w)]} {
+      foreach s $::syn($w) {
+        sqlite3_fts5_token -colo $s $iStart $iEnd
+      }
+    }
+  }
+}
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+
+do_execsql_test 2.0 {
+  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
+  INSERT INTO ft VALUES('one two three');
+  INSERT INTO ft VALUES('four five six');
+  INSERT INTO ft VALUES('eight nine ten');
+} {}
+
+foreach {tn expr res} {
+  1 "3" 1
+  2 "eight OR 8 OR 5" {2 3}
+  3 "10" {}
+  4 "1*" {1}
+  5 "1 + 2" {1}
+} {
+  do_execsql_test 2.1.$tn {
+    SELECT rowid FROM ft WHERE ft MATCH $expr
+  } $res
+}
+
+#-------------------------------------------------------------------------
+# Test some broken tokenizers:
+#
+#   3.1.*: A tokenizer that declares the very first token to be colocated.
+#
+#   3.2.*: A tokenizer that reports two identical tokens at the same position.
+#          This is allowed.
+#
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+proc tcl_tokenize {tflags text} {
+  set bColo 1
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    if {$bColo} {
+      sqlite3_fts5_token -colo $w $iStart $iEnd
+      set bColo 0
+    } {
+      sqlite3_fts5_token $w $iStart $iEnd
+    }
+  }
+}
+do_execsql_test 3.1.0 {
+  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
+  INSERT INTO ft VALUES('one two three');
+  CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
+  SELECT * FROM vv;
+} {
+  one 1 1   three 1 1   two 1 1
+}
+
+do_execsql_test 3.1.1 {
+  INSERT INTO ft(ft) VALUES('integrity-check');
+} {}
+
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+  }
+}
+
+do_execsql_test 3.1.2 {
+  SELECT rowid FROM ft WHERE ft MATCH 'one two three'
+} {1}
+
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+    sqlite3_fts5_token -colo $w $iStart $iEnd
+  }
+}
+do_execsql_test 3.2.0 {
+  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
+  INSERT INTO ft VALUES('one one two three');
+  CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
+  SELECT * FROM vv;
+} {
+  one 1 4   three 1 2   two 1 2
+}
+do_execsql_test 3.2.1 {
+  SELECT rowid FROM ft WHERE ft MATCH 'one';
+} {1}
+do_execsql_test 3.2.2 {
+  SELECT rowid FROM ft WHERE ft MATCH 'one two three';
+} {1}
+do_execsql_test 3.2.3 {
+  SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
+} {1}
+do_execsql_test 3.2.4 {
+  SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
+} {1}
+do_execsql_test 3.2.5 {
+  SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
+} {}
+
+#-------------------------------------------------------------------------
+# Check that expressions with synonyms can be parsed and executed.
+#
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+    if {$tflags=="query" && [info exists ::syn($w)]} {
+      foreach s $::syn($w) {
+        sqlite3_fts5_token -colo $s $iStart $iEnd
+      }
+    }
+  }
+}
+
+foreach {tn expr res} {
+  1  {abc}                           {"abc"}
+  2  {one}                           {"one"|"i"|"1"}
+  3  {3}                             {"3"|"iii"|"three"}
+  4  {3*}                            {"3"|"iii"|"three" *}
+} {
+  do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res]
+}
+
+do_execsql_test 4.2.1 {
+  CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl);
+  INSERT INTO xx VALUES('one two');
+  INSERT INTO xx VALUES('three four');
+}
+
+do_execsql_test 4.2.2 {
+  SELECT rowid FROM xx WHERE xx MATCH '2'
+} {1}
+
+do_execsql_test 4.2.3 {
+  SELECT rowid FROM xx WHERE xx MATCH '3'
+} {2}
+
+do_test 5.0 {
+  execsql { 
+    CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl)
+  }
+  foreach {rowid a b} {
+    1 {four v 4 i three} {1 3 five five 4 one}
+    2 {5 1 3 4 i} {2 2 v two 4}
+    3 {5 i 5 2 four 4 1} {iii ii five two 1}
+    4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
+    5 {three i v i four 4 1} {ii five five five iii}
+    6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv}
+    7 {ii ii two three 2 5} {iii i ii iii iii one one}
+    8 {2 ii i two 3 three 2} {two iv v iii 3 five}
+    9 {i 2 iv 3 five four v} {iii 4 three i three ii 1}
+  } {
+    execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) }
+  }
+} {}
+
+
+foreach {tn q res} {
+  1 {one} {
+    1 {four v 4 [i] three} {[1] 3 five five 4 [one]}
+    2 {5 [1] 3 4 [i]} {2 2 v two 4}
+    3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]}
+    4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3}
+    5 {three [i] v [i] four 4 [1]} {ii five five five iii}
+    6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv}
+    7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]}
+    8 {2 ii [i] two 3 three 2} {two iv v iii 3 five}
+    9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]}
+  }
+  2 {five four} {
+    1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one}
+    2 {[5] 1 3 [4] i} {2 2 [v] two [4]}
+    3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1}
+    4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3}
+    5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii}
+    8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]}
+    9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1}
+  }
+  3 {one OR two OR iii OR 4 OR v} {
+    1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]}
+    2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]}
+    3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]}
+    4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]}
+    5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]}
+    6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]}
+    7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]}
+    8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]}
+    9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]}
+  }
+
+  4 {5 + 1} {
+    2 {[5 1] 3 4 i} {2 2 v two 4} 
+    3 {[5 i] 5 2 four 4 1} {iii ii five two 1} 
+    4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} 
+    5 {three i [v i] four 4 1} {ii five five five iii}
+  }
+
+  5 {one + two + three} {
+    7 {ii ii two three 2 5} {iii [i ii iii] iii one one}
+    8 {2 ii [i two 3] three 2} {two iv v iii 3 five}
+  }
+
+  6 {"v v"} {
+    1 {four v 4 i three} {1 3 [five five] 4 one}
+    5 {three i v i four 4 1} {ii [five five five] iii}
+  }
+} {
+  do_execsql_test 5.1.$tn {
+    SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
+    FROM t1 WHERE t1 MATCH $q
+  } $res
+}
+
+# Test that the xQueryPhrase() API works with synonyms.
+#
+proc mit {blob} {
+  set scan(littleEndian) i*
+  set scan(bigEndian) I*
+  binary scan $blob $scan($::tcl_platform(byteOrder)) r
+  return $r
+}
+db func mit mit
+sqlite3_fts5_register_matchinfo db
+
+foreach {tn q res} {
+  1 {one} {
+      1 {1 11 7 2 12 6}     2 {2 11 7 0 12 6} 
+      3 {2 11 7 1 12 6}     4 {1 11 7 2 12 6} 
+      5 {3 11 7 0 12 6}     6 {0 11 7 2 12 6} 
+      7 {0 11 7 3 12 6}     8 {1 11 7 0 12 6} 
+      9 {1 11 7 2 12 6}
+  }
+} {
+  do_execsql_test 5.2.$tn {
+    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
+  } $res
+}
+
+
+#-------------------------------------------------------------------------
+# Test terms with more than 4 synonyms.
+#
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+    if {$tflags=="query" && [string length $w]==1} {
+      for {set i 2} {$i<=10} {incr i} {
+        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
+      }
+    }
+  }
+}
+
+do_execsql_test 6.0.1 {
+  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl);
+  INSERT INTO t1 VALUES('yy xx qq');
+  INSERT INTO t1 VALUES('yy xx xx');
+}
+do_execsql_test 6.0.2 {
+  SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
+} {{yy xx qq}}
+
+do_test 6.0.3 {
+  execsql { 
+    CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl)
+  }
+  foreach {rowid a b} {
+    1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
+    2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
+    3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
+    4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
+    5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}
+    6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq}
+    7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu}
+    8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss}
+    9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
+  } {
+    execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) }
+  }
+} {}
+
+foreach {tn q res} {
+  1 {a} {
+    1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]}
+    3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj}
+    4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii}
+    6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq}
+  }
+
+  2 {a AND q} {
+    1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
+    6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]}
+  }
+
+  3 {o OR (q AND a)} {
+    1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
+    2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
+    5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n}
+    6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]}
+    9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
+  }
+
+  4 {NEAR(q y, 20)} {
+    1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
+    2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
+  }
+} {
+  do_execsql_test 6.1.$tn.asc {
+    SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
+    FROM t2 WHERE t2 MATCH $q
+  } $res
+
+  set res2 [list]
+  foreach {rowid a b} $res {
+    set res2 [concat [list $rowid $a $b] $res2]
+  }
+
+  do_execsql_test 6.1.$tn.desc {
+    SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
+    FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC
+  } $res2
+}
+
+do_execsql_test 6.2.1 {
+  INSERT INTO t2(rowid, a, b) VALUES(13,
+      'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy'
+  );
+  SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')')
+  FROM t2 WHERE t2 MATCH 'x OR y'
+} {
+  1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa}
+  2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq}
+  4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii}
+  13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>}
+     {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)}
+}
+
+#-------------------------------------------------------------------------
+# Test that the xColumnSize() API is not confused by colocated tokens.
+#
+reset_db
+sqlite3_fts5_create_tokenizer db tcl tcl_create
+fts5_aux_test_functions db
+proc tcl_tokenize {tflags text} {
+  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+    sqlite3_fts5_token $w $iStart $iEnd
+    if {[string length $w]==1} {
+      for {set i 2} {$i<=10} {incr i} {
+        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
+      }
+    }
+  }
+}
+
+do_execsql_test 7.0.1 {
+  CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl);
+  INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
+  INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
+  SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
+} {{3 4} {2 10}}
+
+do_execsql_test 7.0.2 {
+  INSERT INTO t1(t1) VALUES('integrity-check');
+}
+
+do_execsql_test 7.1.1 {
+  CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl);
+  INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
+  INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
+  SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
+} {{3 4} {2 10}}
+
+do_execsql_test 7.1.2 {
+  INSERT INTO t2(t2) VALUES('integrity-check');
+}
+
+finish_test
+
--- a/ext/fts5/tool/showfts5.tcl
+++ b/ext/fts5/tool/showfts5.tcl
@ -5,14 +5,52 @@
 # Process command line arguments.
 #
 proc usage {} {
-  puts stderr "usage: $::argv0 database table"
+  puts stderr "usage: $::argv0 ?OPTIONS? database table"
+  puts stderr ""
+  puts stderr "  -nterm                (count number of terms in each segment)"
  puts stderr ""
  exit 1
 }
-if {[llength $argv]!=2} usage
-set database [lindex $argv 0]
-set tbl [lindex $argv 1]

+set O(nterm) 0
+
+if {[llength $argv]<2} usage
+foreach a [lrange $argv 0 end-2] {
+  switch -- $a {
+    -nterm {
+      set O(nterm) 1
+    }
+
+    default {
+      usage
+    }
+  }
+}
+
+set database [lindex $argv end-1]
+set tbl [lindex $argv end]
+
+
+#-------------------------------------------------------------------------
+# Count the number of terms in each segment of fts5 table $tbl. Store the
+# counts in the array variable in the parent context named by parameter
+# $arrayname, indexed by segment-id. Example:
+#
+#   count_terms fts_tbl A
+#   foreach {k v} [array get A] { puts "segid=$k nTerm=$v" }
+#
+proc count_terms {tbl arrayname} {
+  upvar A $arrayname
+  array unset A
+  db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data" {
+    set desc [lindex $d 0]
+    if {[regexp {^segid=([0-9]*)} $desc -> id]} {
+      foreach i [lrange $d 1 end] {
+        if {[string match {term=*} $i]} { incr A($id) }
+      }
+    }
+  }
+}


 #-------------------------------------------------------------------------
@ -21,11 +59,21 @@ set tbl [lindex $argv 1]
 sqlite3 db $database
 catch { load_static_extension db fts5 }

+if {$O(nterm)} { count_terms $tbl A }
+
 db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id=10" {
  foreach lvl [lrange $d 1 end] {
    puts [lrange $lvl 0 2]
+
    foreach seg [lrange $lvl 3 end] {
-      puts "        $seg"
+      if {$::O(nterm)} {
+        regexp {^id=([0-9]*)} $seg -> id
+        set nTerm 0
+        catch { set nTerm $A($id) }
+        puts [format "        % -28s    nTerm=%d" $seg $nTerm]
+      } else {
+        puts [format "        % -28s" $seg]
+      }
    }
  }
 }
--- a/main.mk
+++ b/main.mk
@ -47,6 +47,7 @@
 TCCX =  $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) 
 TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3
 TCCX += -I$(TOP)/ext/async -I$(TOP)/ext/userauth
+TCCX += -I$(TOP)/ext/fts5

 # Object files for the SQLite library.
 #
@ -230,6 +231,29 @@ SRC += \
  $(TOP)/ext/rbu/sqlite3rbu.h


+# FTS5 things
+#
+FTS5_HDR = \
+   $(TOP)/ext/fts5/fts5.h \
+   $(TOP)/ext/fts5/fts5Int.h \
+   fts5parse.h
+	   
+FTS5_SRC = \
+   $(TOP)/ext/fts5/fts5_aux.c \
+   $(TOP)/ext/fts5/fts5_buffer.c \
+   $(TOP)/ext/fts5/fts5_main.c \
+   $(TOP)/ext/fts5/fts5_config.c \
+   $(TOP)/ext/fts5/fts5_expr.c \
+   $(TOP)/ext/fts5/fts5_hash.c \
+   $(TOP)/ext/fts5/fts5_index.c \
+   fts5parse.c \
+   $(TOP)/ext/fts5/fts5_storage.c \
+   $(TOP)/ext/fts5/fts5_tokenize.c \
+   $(TOP)/ext/fts5/fts5_unicode2.c \
+   $(TOP)/ext/fts5/fts5_varint.c \
+   $(TOP)/ext/fts5/fts5_vocab.c  \
+
+
 # Generated source code files
 #
 SRC += \
@ -636,25 +660,6 @@ fts3_write.o:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
 rtree.o:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
 	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c

-# FTS5 things
-#
-FTS5_SRC = \
-   $(TOP)/ext/fts5/fts5.h \
-   $(TOP)/ext/fts5/fts5Int.h \
-   $(TOP)/ext/fts5/fts5_aux.c \
-   $(TOP)/ext/fts5/fts5_buffer.c \
-   $(TOP)/ext/fts5/fts5_main.c \
-   $(TOP)/ext/fts5/fts5_config.c \
-   $(TOP)/ext/fts5/fts5_expr.c \
-   $(TOP)/ext/fts5/fts5_hash.c \
-   $(TOP)/ext/fts5/fts5_index.c \
-   fts5parse.c fts5parse.h \
-   $(TOP)/ext/fts5/fts5_storage.c \
-   $(TOP)/ext/fts5/fts5_tokenize.c \
-   $(TOP)/ext/fts5/fts5_unicode2.c \
-   $(TOP)/ext/fts5/fts5_varint.c \
-   $(TOP)/ext/fts5/fts5_vocab.c  \
-
 fts5parse.c:	$(TOP)/ext/fts5/fts5parse.y lemon 
 	cp $(TOP)/ext/fts5/fts5parse.y .
 	rm -f fts5parse.h
@ -662,11 +667,10 @@ fts5parse.c:	$(TOP)/ext/fts5/fts5parse.y lemon

 fts5parse.h: fts5parse.c

-fts5.c: $(FTS5_SRC)
+fts5.c: $(FTS5_SRC) $(FTS5_HDR)
 	tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl
 	cp $(TOP)/ext/fts5/fts5.h .

-
 userauth.o:	$(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR)
 	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c

--- a/72
+++ b/72
@ -1,5 +1,5 @@
-C Merge\senhancements\sfrom\strunk.
-D 2015-09-03T14:18:12.100
+C Merge\strunk\senhancements,\sand\sespeically\sthe\sfix\sfor\sallowing\sstrings\s\nas\scolumn\sidentifers\sin\sCREATE\sINDEX\sstatements.
+D 2015-09-04T13:02:21.922
 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
 F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239
 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -104,34 +104,34 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
 F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
 F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
-F ext/fts5/extract_api_docs.tcl 06583c935f89075ea0b32f85efa5dd7619fcbd03
-F ext/fts5/fts5.h 1950ec0544de667a24c1d8af9b2fde5db7db3bc9
-F ext/fts5/fts5Int.h 45f2ceb3c030f70e2cc4c199e9f700c2f2367f77
-F ext/fts5/fts5_aux.c 044cb176a815f4388308738437f6e130aa384fb0
+F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0
+F ext/fts5/fts5.h f04659e0df5af83731b102189a32280f74f4a6bc
+F ext/fts5/fts5Int.h f65d41f66accad0a289d6bd66b13c07d2932f9be
+F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e
 F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015
-F ext/fts5/fts5_config.c fdfa63ae8e527ecfaa50f94063c610429cc887cf
-F ext/fts5/fts5_expr.c d075d36c84975a1cfcf070442d28e28027b61c25
+F ext/fts5/fts5_config.c 80b61fd2c6844b64a3e72a64572d50a812da9384
+F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4
 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
-F ext/fts5/fts5_index.c 076c4995bf06a6d1559a6e31f9a86b90f2105374
-F ext/fts5/fts5_main.c fc47ad734dfb55765b7542a345cee981170e7caa
-F ext/fts5/fts5_storage.c 22ec9b5d35a39e2b5b65daf4ba7cd47fbb2d0df5
-F ext/fts5/fts5_tcl.c 96a3b9e982c4a64a242eefd752fa6669cd405a67
-F ext/fts5/fts5_test_mi.c 80a9e86fb4c5b6b58f8fefac05e9b96d1a6574e1
-F ext/fts5/fts5_tokenize.c 2836f6728bd74c7efac7487f5d9c27ca3e1b509c
+F ext/fts5/fts5_index.c 950e37028cc81ae21534819e79c73aea7efa6c8e
+F ext/fts5/fts5_main.c e9d0892424bb7f0a8b58613d4ff75cb650cf286e
+F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059
+F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37
+F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf
+F ext/fts5/fts5_tokenize.c f380f46f341af9c9a9908e1aade685ba1eaa157a
 F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
 F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1
 F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc
 F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452
 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
-F ext/fts5/test/fts5_common.tcl 3338968de1880ca12b0451ae8f9b8b12d14e0ba7
-F ext/fts5/test/fts5aa.test c6e680a0d1b6c2616a382f1006d5d91eca697bd0
+F ext/fts5/test/fts5_common.tcl b6e6a40ef5d069c8e86ca4fbad491e1195485dbc
+F ext/fts5/test/fts5aa.test f558e1e5ccffa75d69e9a4814245d468ec6b6608
 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad
 F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c
 F ext/fts5/test/fts5ad.test b2edee8b7de0c21d2c88f8a18c195034aad6952d
 F ext/fts5/test/fts5ae.test 0a9984fc3479f89f8c63d9848d6ed0c465dfcebe
 F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a
 F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505
-F ext/fts5/test/fts5ah.test b9e78fa986a7bd564ebadfb244de02c84d7ac3ae
+F ext/fts5/test/fts5ah.test e592c4978622dbc4de552cd0f9395df60ac5d54c
 F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37
 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8
 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592
@ -141,7 +141,7 @@ F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca
 F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f
 F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e
 F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb
-F ext/fts5/test/fts5columnsize.test 97dc6bd66c91009d00407aa078dd5e9e8eb22f99
+F ext/fts5/test/fts5columnsize.test a8cfef21ffa1c264b9f670a7d94eeaccb5341c07
 F ext/fts5/test/fts5config.test ad2ff42ddc856aed2d05bf89dc1c578c8a39ea3b
 F ext/fts5/test/fts5content.test 9a952c95518a14182dc3b59e3c8fa71cda82a4e1
 F ext/fts5/test/fts5corrupt.test 928c9c91d40690d301f943a7ed0ffc19e0d0e7b6
@ -149,18 +149,19 @@ F ext/fts5/test/fts5corrupt2.test 1a830ccd6dbe1b601c7e3f5bbc1cf77bd8c8803b
 F ext/fts5/test/fts5corrupt3.test 1ccf575f5126e79f9fec7979fd02a1f40a076be3
 F ext/fts5/test/fts5dlidx.test 59b80bbe34169a082c575d9c26f0a7019a7b79c1
 F ext/fts5/test/fts5doclist.test 8edb5b57e5f144030ed74ec00ef6fa4294fed79b
-F ext/fts5/test/fts5ea.test 451bb37310ee6df8ef72e4354fda5621b3b51448
-F ext/fts5/test/fts5eb.test 46f49497edc25ef3b2bff9fb6d75b6d201e2b39e
+F ext/fts5/test/fts5ea.test b01e3a18cdfabbff8104a96a5242a06a68a998a0
+F ext/fts5/test/fts5eb.test 3e5869af2008cbc4ad03a175a0b6f6e58134cd43
 F ext/fts5/test/fts5fault1.test 7a562367cb4a735b57b410dbdb62dcc8d971faec
 F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341
 F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3
 F ext/fts5/test/fts5fault4.test 762991d526ee67c2b374351a17248097ea38bee7
 F ext/fts5/test/fts5fault5.test 54da9fd4c3434a1d4f6abdcb6469299d91cf5875
-F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215
+F ext/fts5/test/fts5fault6.test 97bce1a36b7a64e3203fea504ae8e5cfd5ada423
+F ext/fts5/test/fts5fault7.test f4a9b796f8b20c78ec7cf9f4e11144d15d7c3fd4
 F ext/fts5/test/fts5full.test 6f6143af0c6700501d9fd597189dfab1555bb741
 F ext/fts5/test/fts5hash.test 42eb066f667e9a389a63437cb7038c51974d4fc6
 F ext/fts5/test/fts5integrity.test 29f41d2c7126c6122fbb5d54e556506456876145
-F ext/fts5/test/fts5matchinfo.test ee6e7b130096c708c12049fa9c1ceb628954c4f9
+F ext/fts5/test/fts5matchinfo.test 2163b0013e824bba65499da9e34ea4da41349cc2
 F ext/fts5/test/fts5merge.test 8f3cdba2ec9c5e7e568246e81b700ad37f764367
 F ext/fts5/test/fts5near.test b214cddb1c1f1bddf45c75af768f20145f7e71cc
 F ext/fts5/test/fts5optimize.test 42741e7c085ee0a1276140a752d4407d97c2c9f5
@ -172,6 +173,7 @@ F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1
 F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b
 F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17
 F ext/fts5/test/fts5rowid.test 6f9833b23b176dc4aa15b7fc02afeb2b220fd460
+F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671
 F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89
 F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841
 F ext/fts5/test/fts5unicode2.test c1dd890ba32b7609adba78e420faa847abe43b59
@ -181,7 +183,7 @@ F ext/fts5/test/fts5version.test 205beb2a67d9496af64df959e6a19238f69b83e8
 F ext/fts5/test/fts5vocab.test cdf97b9678484e9bad5062edf9c9106e5c3b0c5c
 F ext/fts5/tool/loadfts5.tcl 95edf0b6b92a09f9ed85595038b1108127987556
 F ext/fts5/tool/mkfts5c.tcl 5745072c7de346e18c7f491e4c3281fe8a1cfe51
-F ext/fts5/tool/showfts5.tcl fb62e8eae6d862afdd22f367e286fb886d5e1ab6
+F ext/fts5/tool/showfts5.tcl 9eaf6c3df352f98a2ab5ce1921dd94128ab1381d
 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
 F ext/icu/icu.c b2732aef0b076e4276d9b39b5a33cec7a05e1413
 F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
@ -258,7 +260,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e
 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
 F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
-F main.mk 8da13ed011a7ae19450b7554910ff4afa3bd22b7
+F main.mk 61821e43596648bfacce2d6283377bee35986131
 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea
 F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a
 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
@ -282,7 +284,7 @@ F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79
 F src/btree.c 4084d9eed2817331f6e6a82230ba30e448cad497
 F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1
 F src/btreeInt.h 8177c9ab90d772d6d2c6c517e05bed774b7c92c0
-F src/build.c 77da53936388346bc5864eab54066c6f3988770a
+F src/build.c 5566b3410080a54e5c302c55d3de53fd080cfc7d
 F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0
 F src/complete.c addcd8160b081131005d5bc2d34adf20c1c5c92f
 F src/ctime.c 5a0b735dc95604766f5dac73973658eef782ee8b
@ -324,12 +326,12 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa
 F src/os_unix.c 76f493ed71c4154338049dee1bf6e47f69c74a55
 F src/os_win.c 40b3af7a47eb1107d0d69e592bec345a3b7b798a
 F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca
-F src/pager.c aa916ca28606ccf4b6877dfc2b643ccbca86589f
+F src/pager.c 4784012f80b2197c61ff6eaf4f5c7026d93253fd
 F src/pager.h 6d435f563b3f7fcae4b84433b76a6ac2730036e2
 F src/parse.y f599aa5e871a493330d567ced93de696f61f48f7
-F src/pcache.c cde06aa50962595e412d497e22fd2e07878ba1f0
+F src/pcache.c 24be750c79272e0ca7b6e007bc94999700f3e5ef
 F src/pcache.h 9968603796240cdf83da7e7bef76edf90619cea9
-F src/pcache1.c b31af9dbc83b9c68e87d681b8453a9605f28e734
+F src/pcache1.c bf2afe64a3dedb8643c8dcbd94a145cc80ab2a67
 F src/pragma.c d71b813e67bf03f3116b9dd5164fbfd81ec673a2
 F src/pragma.h 631a91c8b0e6ca8f051a1d8a4a0da4150e04620a
 F src/prepare.c 82e5db1013846a819f198336fed72c44c974e7b1
@ -773,8 +775,8 @@ F test/incrvacuum2.test 676c41428765d58f1da7dbe659ef27726d3d30ac
 F test/incrvacuum3.test 75256fb1377e7c39ef2de62bfc42bbff67be295a
 F test/incrvacuum_ioerr.test 6ae2f783424e47a0033304808fe27789cf93e635
 F test/index.test fe3c7a1aad82af92623747e9c3f3aa94ccd51238
-F test/index2.test ee83c6b5e3173a3d7137140d945d9a5d4fdfb9d6
-F test/index3.test b6ec456cf3b81d9a32123fe7e449bde434db338b
+F test/index2.test f835d5e13ca163bd78c4459ca15fd2e4ed487407
+F test/index3.test fa3e49bbaa4f38091c9c742e36a1abe67c4ef1fc
 F test/index4.test ab92e736d5946840236cd61ac3191f91a7856bf6
 F test/index5.test 8621491915800ec274609e42e02a97d67e9b13e7
 F test/index6.test 7102ec371414c42dfb1d5ca37eb4519aa9edc23a
@ -1287,10 +1289,10 @@ F test/walro.test 34422d1d95aaff0388f0791ec20edb34e2a3ed57
 F test/walshared.test 0befc811dcf0b287efae21612304d15576e35417
 F test/walslow.test e7be6d9888f83aa5d3d3c7c08aa9b5c28b93609a
 F test/walthread.test de8dbaf6d9e41481c460ba31ca61e163d7348f8e
-F test/where.test 1ff3d9f8da0a6c0dc5ccfd38d9225b2cdb5b6afb
+F test/where.test 66d4c107e82dfe86c01a96277b77e7a8809aff0b
 F test/where2.test af78c55589cbc82d793449493adba0dc3d659f23
 F test/where3.test 1ad55ba900bd7747f98b6082e65bd3e442c5004e
-F test/where4.test 68aa5ad796e33816db2078bc0f6de719c7a0e21f
+F test/where4.test 44f506bf1737cf0fa4fc795e340208250f1fcd89
 F test/where5.test fdf66f96d29a064b63eb543e28da4dfdccd81ad2
 F test/where6.test 5da5a98cec820d488e82708301b96cb8c18a258b
 F test/where7.test 5a4b0abc207d71da4deecd734ad8579e8dd40aa8
@ -1381,7 +1383,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P ff5137a6dd8cb2a9b629b3a244f52665e9c9ebce 847387ec8e6fef283899578fb232b2c23b00ee5b
-R e19b7f210220f2833a87a0bc81a47608
+P 1ab10cbf27245961b40eda1ce70f35646f0a9966 3d3df79bfaf9dbc7aa711c08a19d2f6dbe744b32
+R 46fafd1e16150312779d43bb23b81793
 U drh
-Z 58f787bccb7c921ddef6f60f047a7162
+Z 1a60e3fe37bc43a63ce49e32494691dc
--- a/manifest.uuid
+++ b/manifest.uuid
@ -1 +1 @@
-1ab10cbf27245961b40eda1ce70f35646f0a9966
+5ff855293865c244ac632c630e8e7e8d7c05a5f6
--- a/src/build.c
+++ b/src/build.c
@ -2850,6 +2850,30 @@ Index *sqlite3AllocateIndexObject(
  return p;
 }

+/*
+** Backwards Compatibility Hack:
+** 
+** Historical versions of SQLite accepted strings as column names in
+** indexes and PRIMARY KEY constraints and in UNIQUE constraints.  Example:
+**
+**     CREATE TABLE xyz(a,b,c,d,e,PRIMARY KEY('a'),UNIQUE('b','c' COLLATE trim)
+**     CREATE INDEX abc ON xyz('c','d' DESC,'e' COLLATE nocase DESC);
+**
+** This is goofy.  But to preserve backwards compatibility we continue to
+** accept it.  This routine does the necessary conversion.  It converts
+** the expression given in its argument from a TK_STRING into a TK_ID
+** if the expression is just a TK_STRING with an optional COLLATE clause.
+** If the epxression is anything other than TK_STRING, the expression is
+** unchanged.
+*/
+static void sqlite3StringToId(Expr *p){
+  if( p->op==TK_STRING ){
+    p->op = TK_ID;
+  }else if( p->op==TK_COLLATE && p->pLeft->op==TK_STRING ){
+    p->pLeft->op = TK_ID;
+  }
+}
+
 /*
 ** Create a new index for an SQL table.  pName1.pName2 is the name of the index 
 ** and pTblList is the name of the table that is to be indexed.  Both will 
@ -3118,6 +3142,7 @@ Index *sqlite3CreateIndex(
    int requestedSortOrder;        /* ASC or DESC on the i-th expression */
    char *zColl;                   /* Collation sequence name */

+    sqlite3StringToId(pListItem->pExpr);
    sqlite3ResolveSelfReference(pParse, pTab, NC_IdxExpr, pListItem->pExpr, 0);
    if( pParse->nErr ) goto exit_create_index;
    pCExpr = sqlite3ExprSkipCollate(pListItem->pExpr);
--- a/src/pager.c
+++ b/src/pager.c
@ -647,7 +647,7 @@ struct Pager {
  u8 doNotSpill;              /* Do not spill the cache when non-zero */
  u8 subjInMemory;            /* True to use in-memory sub-journals */
  u8 bUseFetch;               /* True to use xFetch() */
-  u8 hasBeenUsed;             /* True if any content previously read */
+  u8 hasHeldSharedLock;       /* True if a shared lock has ever been held */
  Pgno dbSize;                /* Number of pages in the database */
  Pgno dbOrigSize;            /* dbSize before the current transaction */
  Pgno dbFileSize;            /* Number of pages in the database file */
@ -5097,10 +5097,10 @@ int sqlite3PagerSharedLock(Pager *pPager){
      );
    }

-    if( !pPager->tempFile && pPager->hasBeenUsed ){
+    if( !pPager->tempFile && pPager->hasHeldSharedLock ){
      /* The shared-lock has just been acquired then check to
      ** see if the database has been modified.  If the database has changed,
-      ** flush the cache.  The pPager->hasBeenUsed flag prevents this from
+      ** flush the cache.  The hasHeldSharedLock flag prevents this from
      ** occurring on the very first access to a file, in order to save a
      ** single unnecessary sqlite3OsRead() call at the start-up.
      **
@ -5170,6 +5170,7 @@ int sqlite3PagerSharedLock(Pager *pPager){
    assert( pPager->eState==PAGER_OPEN );
  }else{
    pPager->eState = PAGER_READER;
+    pPager->hasHeldSharedLock = 1;
  }
  return rc;
 }
@ -5253,21 +5254,25 @@ int sqlite3PagerAcquire(
  ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY
  ** flag was specified by the caller. And so long as the db is not a 
  ** temporary or in-memory database.  */
-  const int bMmapOk = (pgno!=1 && USEFETCH(pPager)
+  const int bMmapOk = (pgno>1 && USEFETCH(pPager)
   && (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY))
 #ifdef SQLITE_HAS_CODEC
   && pPager->xCodec==0
 #endif
  );

+  /* Optimization note:  Adding the "pgno<=1" term before "pgno==0" here
+  ** allows the compiler optimizer to reuse the results of the "pgno>1"
+  ** test in the previous statement, and avoid testing pgno==0 in the
+  ** common case where pgno is large. */
+  if( pgno<=1 && pgno==0 ){
+    return SQLITE_CORRUPT_BKPT;
+  }
  assert( pPager->eState>=PAGER_READER );
  assert( assert_pager_state(pPager) );
  assert( noContent==0 || bMmapOk==0 );

-  if( pgno==0 ){
-    return SQLITE_CORRUPT_BKPT;
-  }
-  pPager->hasBeenUsed = 1;
+  assert( pPager->hasHeldSharedLock==1 );

  /* If the pager is in the error state, return an error immediately. 
  ** Otherwise, request the page from the PCache layer. */
@ -5422,7 +5427,7 @@ DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
  assert( pgno!=0 );
  assert( pPager->pPCache!=0 );
  pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0);
-  assert( pPage==0 || pPager->hasBeenUsed );
+  assert( pPage==0 || pPager->hasHeldSharedLock );
  if( pPage==0 ) return 0;
  return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage);
 }
@ -6389,7 +6394,7 @@ u8 sqlite3PagerIsreadonly(Pager *pPager){

 #ifdef SQLITE_DEBUG
 /*
-** Return the number of references to the pager.
+** Return the sum of the reference counts for all pages held by pPager.
 */
 int sqlite3PagerRefcount(Pager *pPager){
  return sqlite3PcacheRefCount(pPager->pPCache);
--- a/src/pcache.c
+++ b/src/pcache.c
@ -19,7 +19,7 @@
 struct PCache {
  PgHdr *pDirty, *pDirtyTail;         /* List of dirty pages in LRU order */
  PgHdr *pSynced;                     /* Last synced page in dirty page list */
-  int nRef;                           /* Number of referenced pages */
+  int nRefSum;                        /* Sum of ref counts over all pages */
  int szCache;                        /* Configured cache size */
  int szPage;                         /* Size of every page in this cache */
  int szExtra;                        /* Size of extra space for each page */
@ -184,7 +184,7 @@ int sqlite3PcacheOpen(
 ** are no outstanding page references when this function is called.
 */
 int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
-  assert( pCache->nRef==0 && pCache->pDirty==0 );
+  assert( pCache->nRefSum==0 && pCache->pDirty==0 );
  if( pCache->szPage ){
    sqlite3_pcache *pNew;
    pNew = sqlite3GlobalConfig.pcache2.xCreate(
@ -351,9 +351,7 @@ PgHdr *sqlite3PcacheFetchFinish(
  if( !pPgHdr->pPage ){
    return pcacheFetchFinishWithInit(pCache, pgno, pPage);
  }
-  if( 0==pPgHdr->nRef ){
-    pCache->nRef++;
-  }
+  pCache->nRefSum++;
  pPgHdr->nRef++;
  return pPgHdr;
 }
@ -364,9 +362,8 @@ PgHdr *sqlite3PcacheFetchFinish(
 */
 void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
  assert( p->nRef>0 );
-  p->nRef--;
-  if( p->nRef==0 ){
-    p->pCache->nRef--;
+  p->pCache->nRefSum--;
+  if( (--p->nRef)==0 ){
    if( p->flags&PGHDR_CLEAN ){
      pcacheUnpin(p);
    }else if( p->pDirtyPrev!=0 ){
@ -382,6 +379,7 @@ void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
 void sqlite3PcacheRef(PgHdr *p){
  assert(p->nRef>0);
  p->nRef++;
+  p->pCache->nRefSum++;
 }

 /*
@ -394,7 +392,7 @@ void sqlite3PcacheDrop(PgHdr *p){
  if( p->flags&PGHDR_DIRTY ){
    pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE);
  }
-  p->pCache->nRef--;
+  p->pCache->nRefSum--;
  sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1);
 }

@ -490,11 +488,11 @@ void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){
        sqlite3PcacheMakeClean(p);
      }
    }
-    if( pgno==0 && pCache->nRef ){
+    if( pgno==0 && pCache->nRefSum ){
      sqlite3_pcache_page *pPage1;
      pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0);
      if( ALWAYS(pPage1) ){  /* Page 1 is always available in cache, because
-                             ** pCache->nRef>0 */
+                             ** pCache->nRefSum>0 */
        memset(pPage1->pBuf, 0, pCache->szPage);
        pgno = 1;
      }
@ -600,10 +598,13 @@ PgHdr *sqlite3PcacheDirtyList(PCache *pCache){
 }

 /* 
-** Return the total number of referenced pages held by the cache.
+** Return the total number of references to all pages held by the cache.
+**
+** This is not the total number of pages referenced, but the sum of the
+** reference count for all pages.
 */
 int sqlite3PcacheRefCount(PCache *pCache){
-  return pCache->nRef;
+  return pCache->nRefSum;
 }

 /*
--- a/src/pcache1.c
+++ b/src/pcache1.c
@ -87,6 +87,24 @@ typedef struct PgHdr1 PgHdr1;
 typedef struct PgFreeslot PgFreeslot;
 typedef struct PGroup PGroup;

+/*
+** Each cache entry is represented by an instance of the following 
+** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of
+** PgHdr1.pCache->szPage bytes is allocated directly before this structure 
+** in memory.
+*/
+struct PgHdr1 {
+  sqlite3_pcache_page page;      /* Base class. Must be first. pBuf & pExtra */
+  unsigned int iKey;             /* Key value (page number) */
+  u8 isPinned;                   /* Page in use, not on the LRU list */
+  u8 isBulkLocal;                /* This page from bulk local storage */
+  u8 isAnchor;                   /* This is the PGroup.lru element */
+  PgHdr1 *pNext;                 /* Next in hash table chain */
+  PCache1 *pCache;               /* Cache that currently owns this page */
+  PgHdr1 *pLruNext;              /* Next in LRU list of unpinned pages */
+  PgHdr1 *pLruPrev;              /* Previous in LRU list of unpinned pages */
+};
+
 /* Each page cache (or PCache) belongs to a PGroup.  A PGroup is a set 
 ** of one or more PCaches that are able to recycle each other's unpinned
 ** pages when they are under memory pressure.  A PGroup is an instance of
@ -115,7 +133,7 @@ struct PGroup {
  unsigned int nMinPage;         /* Sum of nMin for purgeable caches */
  unsigned int mxPinned;         /* nMaxpage + 10 - nMinPage */
  unsigned int nCurrentPage;     /* Number of purgeable pages allocated */
-  PgHdr1 *pLruHead, *pLruTail;   /* LRU list of unpinned pages */
+  PgHdr1 lru;                    /* The beginning and end of the LRU list */
 };

 /* Each page cache is an instance of the following object.  Every
@ -153,23 +171,6 @@ struct PCache1 {
  void *pBulk;                        /* Bulk memory used by pcache-local */
 };

-/*
-** Each cache entry is represented by an instance of the following 
-** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of
-** PgHdr1.pCache->szPage bytes is allocated directly before this structure 
-** in memory.
-*/
-struct PgHdr1 {
-  sqlite3_pcache_page page;
-  unsigned int iKey;             /* Key value (page number) */
-  u8 isPinned;                   /* Page in use, not on the LRU list */
-  u8 isBulkLocal;                /* This page from bulk local storage */
-  PgHdr1 *pNext;                 /* Next in hash table chain */
-  PCache1 *pCache;               /* Cache that currently owns this page */
-  PgHdr1 *pLruNext;              /* Next in LRU list of unpinned pages */
-  PgHdr1 *pLruPrev;              /* Previous in LRU list of unpinned pages */
-};
-
 /*
 ** Free slots in the allocator used to divide up the global page cache
 ** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism.
@ -230,6 +231,7 @@ static SQLITE_WSD struct PCacheGlobal {
 /******************************************************************************/
 /******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/

+
 /*
 ** This function is called during initialization if a static buffer is 
 ** supplied to use for the page-cache by passing the SQLITE_CONFIG_PAGECACHE
@ -289,6 +291,7 @@ static int pcache1InitBulk(PCache1 *pCache){
      pX->page.pBuf = zBulk;
      pX->page.pExtra = &pX[1];
      pX->isBulkLocal = 1;
+      pX->isAnchor = 0;
      pX->pNext = pCache->pFree;
      pCache->pFree = pX;
      zBulk += pCache->szAlloc;
@ -431,6 +434,7 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache, int benignMalloc){
    p->page.pBuf = pPg;
    p->page.pExtra = &p[1];
    p->isBulkLocal = 0;
+    p->isAnchor = 0;
  }
  if( pCache->bPurgeable ){
    pCache->pGroup->nCurrentPage++;
@ -557,22 +561,16 @@ static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){
  assert( pPage!=0 );
  assert( pPage->isPinned==0 );
  pCache = pPage->pCache;
-  assert( pPage->pLruNext || pPage==pCache->pGroup->pLruTail );
-  assert( pPage->pLruPrev || pPage==pCache->pGroup->pLruHead );
+  assert( pPage->pLruNext );
+  assert( pPage->pLruPrev );
  assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
-  if( pPage->pLruPrev ){
-    pPage->pLruPrev->pLruNext = pPage->pLruNext;
-  }else{
-    pCache->pGroup->pLruHead = pPage->pLruNext;
-  }
-  if( pPage->pLruNext ){
-    pPage->pLruNext->pLruPrev = pPage->pLruPrev;
-  }else{
-    pCache->pGroup->pLruTail = pPage->pLruPrev;
-  }
+  pPage->pLruPrev->pLruNext = pPage->pLruNext;
+  pPage->pLruNext->pLruPrev = pPage->pLruPrev;
  pPage->pLruNext = 0;
  pPage->pLruPrev = 0;
  pPage->isPinned = 1;
+  assert( pPage->isAnchor==0 );
+  assert( pCache->pGroup->lru.isAnchor==1 );
  pCache->nRecyclable--;
  return pPage;
 }
@ -605,9 +603,11 @@ static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){
 */
 static void pcache1EnforceMaxPage(PCache1 *pCache){
  PGroup *pGroup = pCache->pGroup;
+  PgHdr1 *p;
  assert( sqlite3_mutex_held(pGroup->mutex) );
-  while( pGroup->nCurrentPage>pGroup->nMaxPage && pGroup->pLruTail ){
-    PgHdr1 *p = pGroup->pLruTail;
+  while( pGroup->nCurrentPage>pGroup->nMaxPage
+      && (p=pGroup->lru.pLruPrev)->isAnchor==0
+  ){
    assert( p->pCache->pGroup==pGroup );
    assert( p->isPinned==0 );
    pcache1PinPage(p);
@ -741,6 +741,10 @@ static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){
    }else{
      pGroup = &pcache1.grp;
    }
+    if( pGroup->lru.isAnchor==0 ){
+      pGroup->lru.isAnchor = 1;
+      pGroup->lru.pLruPrev = pGroup->lru.pLruNext = &pGroup->lru;
+    }
    pCache->pGroup = pGroup;
    pCache->szPage = szPage;
    pCache->szExtra = szExtra;
@ -848,11 +852,11 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2(

  /* Step 4. Try to recycle a page. */
  if( pCache->bPurgeable
-   && pGroup->pLruTail
+   && !pGroup->lru.pLruPrev->isAnchor
   && ((pCache->nPage+1>=pCache->nMax) || pcache1UnderMemoryPressure(pCache))
  ){
    PCache1 *pOther;
-    pPage = pGroup->pLruTail;
+    pPage = pGroup->lru.pLruPrev;
    assert( pPage->isPinned==0 );
    pcache1RemoveFromHash(pPage, 0);
    pcache1PinPage(pPage);
@ -961,7 +965,10 @@ static PgHdr1 *pcache1FetchNoMutex(
  pPage = pCache->apHash[iKey % pCache->nHash];
  while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; }

-  /* Step 2: Abort if no existing page is found and createFlag is 0 */
+  /* Step 2: If the page was found in the hash table, then return it.
+  ** If the page was not in the hash table and createFlag is 0, abort.
+  ** Otherwise (page not in hash and createFlag!=0) continue with
+  ** subsequent steps to try to create the page. */
  if( pPage ){
    if( !pPage->isPinned ){
      return pcache1PinPage(pPage);
@ -1038,21 +1045,16 @@ static void pcache1Unpin(
  ** part of the PGroup LRU list.
  */
  assert( pPage->pLruPrev==0 && pPage->pLruNext==0 );
-  assert( pGroup->pLruHead!=pPage && pGroup->pLruTail!=pPage );
  assert( pPage->isPinned==1 );

  if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){
    pcache1RemoveFromHash(pPage, 1);
  }else{
    /* Add the page to the PGroup LRU list. */
-    if( pGroup->pLruHead ){
-      pGroup->pLruHead->pLruPrev = pPage;
-      pPage->pLruNext = pGroup->pLruHead;
-      pGroup->pLruHead = pPage;
-    }else{
-      pGroup->pLruTail = pPage;
-      pGroup->pLruHead = pPage;
-    }
+    PgHdr1 **ppFirst = &pGroup->lru.pLruNext;
+    pPage->pLruPrev = &pGroup->lru;
+    (pPage->pLruNext = *ppFirst)->pLruPrev = pPage;
+    *ppFirst = pPage;
    pCache->nRecyclable++;
    pPage->isPinned = 0;
  }
@ -1190,7 +1192,9 @@ int sqlite3PcacheReleaseMemory(int nReq){
  if( sqlite3GlobalConfig.nPage==0 ){
    PgHdr1 *p;
    pcache1EnterMutex(&pcache1.grp);
-    while( (nReq<0 || nFree<nReq) && ((p=pcache1.grp.pLruTail)!=0) ){
+    while( (nReq<0 || nFree<nReq)
+       &&  (p=pcache1.grp.lru.pLruPrev)->isAnchor==0
+    ){
      nFree += pcache1MemSize(p->page.pBuf);
 #ifdef SQLITE_PCACHE_SEPARATE_HEADER
      nFree += sqlite3MemSize(p);
@ -1218,7 +1222,7 @@ void sqlite3PcacheStats(
 ){
  PgHdr1 *p;
  int nRecyclable = 0;
-  for(p=pcache1.grp.pLruHead; p; p=p->pLruNext){
+  for(p=pcache1.grp.lru.pLruNext; !p->isAnchor; p=p->pLruNext){
    assert( p->isPinned==0 );
    nRecyclable++;
  }
--- a/test/index2.test
+++ b/test/index2.test
@ -1,4 +1,4 @@
-# 2005 January 11
+# 2005-01-11
 #
 # The author disclaims copyright to this source code.  In place of
 # a legal notice, here is a blessing:
@ -11,7 +11,6 @@
 # This file implements regression tests for SQLite library.  The
 # focus of this file is testing the CREATE INDEX statement.
 #
-# $Id: index2.test,v 1.3 2006/03/03 19:12:30 drh Exp $

 set testdir [file dirname $argv0]
 source $testdir/tester.tcl
--- a/test/index3.test
+++ b/test/index3.test
@ -1,4 +1,4 @@
-# 2005 February 14
+# 2005-02-14
 #
 # The author disclaims copyright to this source code.  In place of
 # a legal notice, here is a blessing:
@ -11,7 +11,6 @@
 # This file implements regression tests for SQLite library.  The
 # focus of this file is testing the CREATE INDEX statement.
 #
-# $Id: index3.test,v 1.3 2008/03/19 13:03:34 drh Exp $


 set testdir [file dirname $argv0]
@ -40,17 +39,42 @@ do_test index3-1.3 {
 } {0 {}}
 integrity_check index3-1.4

+# Backwards compatibility test:
+#
+# Verify that CREATE INDEX statements that use strings instead of 
+# identifiers for the the column names continue to work correctly.
+# This is undocumented behavior retained for backwards compatiblity.
+#
+do_execsql_test index3-2.1 {
+  DROP TABLE t1;
+  CREATE TABLE t1(a, b, c, d, e, 
+                  PRIMARY KEY('a'), UNIQUE('b' COLLATE nocase DESC));
+  CREATE INDEX t1c ON t1('c');
+  CREATE INDEX t1d ON t1('d' COLLATE binary ASC);
+  WITH RECURSIVE c(x) AS (VALUES(1) UNION SELECT x+1 FROM c WHERE x<30)
+    INSERT INTO t1(a,b,c,d,e) 
+      SELECT x, printf('ab%03xxy',x), x, x, x FROM c;
+} {}
+do_execsql_test index3-2.2 {
+  SELECT a FROM t1 WHERE b='ab005xy' COLLATE nocase;
+} {5}
+do_execsql_test index3-2.2eqp {
+  EXPLAIN QUERY PLAN
+  SELECT a FROM t1 WHERE b='ab005xy' COLLATE nocase;
+} {/USING INDEX/}
+
+
 # This test corrupts the database file so it must be the last test
 # in the series.
 #
 do_test index3-99.1 {
  execsql {
    PRAGMA writable_schema=on;
-    UPDATE sqlite_master SET sql='nonsense';
+    UPDATE sqlite_master SET sql='nonsense' WHERE name='t1d'
  }
  db close
  catch { sqlite3 db test.db }
-  catchsql { DROP INDEX i1 }
-} {1 {malformed database schema (t1)}}
+  catchsql { DROP INDEX t1c }
+} {1 {malformed database schema (t1d)}}

 finish_test
--- a/test/where.test
+++ b/test/where.test
@ -42,8 +42,8 @@ do_test where-1.0 {
  }

  execsql {
-    CREATE INDEX i1w ON t1(w);
-    CREATE INDEX i1xy ON t1(x,y);
+    CREATE INDEX i1w ON t1("w");  -- Verify quoted identifier names
+    CREATE INDEX i1xy ON t1(`x`,'y' ASC); -- Old MySQL compatibility
    CREATE INDEX i2p ON t2(p);
    CREATE INDEX i2r ON t2(r);
    CREATE INDEX i2qs ON t2(q, s);
--- a/test/where4.test
+++ b/test/where4.test
@ -136,7 +136,7 @@ do_test where4-3.1 {
    INSERT INTO t2 VALUES(1);
    INSERT INTO t2 VALUES(2);
    INSERT INTO t2 VALUES(3);
-    CREATE TABLE t3(x,y,UNIQUE(x,y));
+    CREATE TABLE t3(x,y,UNIQUE("x",'y' ASC)); -- Goofy syntax allowed
    INSERT INTO t3 VALUES(1,11);
    INSERT INTO t3 VALUES(2,NULL);
 
@ -200,7 +200,8 @@ do_test where4-4.4 {
 ifcapable subquery {
 do_test where4-5.1 {
  execsql {
-    CREATE TABLE t4(x,y,z,PRIMARY KEY(x,y));
+    -- Allow the 'x' syntax for backwards compatibility
+    CREATE TABLE t4(x,y,z,PRIMARY KEY('x' ASC, "y" ASC));
  }
  execsql {
    SELECT *
@ -304,4 +305,3 @@ do_execsql_test 8.2 { SELECT * FROM u9 WHERE a IS $null } {{} 1 {} 2}


 finish_test
-