2009-11-13 13:36:20 +03:00
|
|
|
/*
|
|
|
|
** 2009 Nov 12
|
|
|
|
**
|
|
|
|
** The author disclaims copyright to this source code. In place of
|
|
|
|
** a legal notice, here is a blessing:
|
|
|
|
**
|
|
|
|
** May you do good and not evil.
|
|
|
|
** May you find forgiveness for yourself and forgive others.
|
|
|
|
** May you share freely, never taking more than you give.
|
|
|
|
**
|
|
|
|
******************************************************************************
|
|
|
|
**
|
|
|
|
*/
|
|
|
|
#ifndef _FTSINT_H
|
|
|
|
#define _FTSINT_H
|
|
|
|
|
2009-11-14 14:41:00 +03:00
|
|
|
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
|
|
|
|
# define NDEBUG 1
|
|
|
|
#endif
|
|
|
|
|
2016-02-12 01:41:04 +03:00
|
|
|
/* FTS3/FTS4 require virtual tables */
|
|
|
|
#ifdef SQLITE_OMIT_VIRTUALTABLE
|
|
|
|
# undef SQLITE_ENABLE_FTS3
|
|
|
|
# undef SQLITE_ENABLE_FTS4
|
|
|
|
#endif
|
|
|
|
|
2011-06-16 04:54:45 +04:00
|
|
|
/*
|
|
|
|
** FTS4 is really an extension for FTS3. It is enabled using the
|
|
|
|
** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all
|
|
|
|
** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3.
|
|
|
|
*/
|
|
|
|
#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
|
|
|
|
# define SQLITE_ENABLE_FTS3
|
|
|
|
#endif
|
|
|
|
|
2011-06-28 11:15:43 +04:00
|
|
|
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
|
|
|
|
|
|
|
/* If not building as part of the core, include sqlite3ext.h. */
|
|
|
|
#ifndef SQLITE_CORE
|
|
|
|
# include "sqlite3ext.h"
|
2013-07-05 03:53:56 +04:00
|
|
|
SQLITE_EXTENSION_INIT3
|
2011-06-28 11:15:43 +04:00
|
|
|
#endif
|
|
|
|
|
2009-12-01 20:08:09 +03:00
|
|
|
#include "sqlite3.h"
|
2009-11-13 13:36:20 +03:00
|
|
|
#include "fts3_tokenizer.h"
|
|
|
|
#include "fts3_hash.h"
|
|
|
|
|
2013-06-11 18:22:11 +04:00
|
|
|
/*
|
|
|
|
** This constant determines the maximum depth of an FTS expression tree
|
|
|
|
** that the library will create and use. FTS uses recursion to perform
|
|
|
|
** various operations on the query tree, so the disadvantage of a large
|
|
|
|
** limit is that it may allow very large queries to use large amounts
|
|
|
|
** of stack space (perhaps causing a stack overflow).
|
|
|
|
*/
|
|
|
|
#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH
|
|
|
|
# define SQLITE_FTS3_MAX_EXPR_DEPTH 12
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
/*
|
|
|
|
** This constant controls how often segments are merged. Once there are
|
|
|
|
** FTS3_MERGE_COUNT segments of level N, they are merged into a single
|
|
|
|
** segment of level N+1.
|
|
|
|
*/
|
|
|
|
#define FTS3_MERGE_COUNT 16
|
|
|
|
|
|
|
|
/*
|
|
|
|
** This is the maximum amount of data (in bytes) to store in the
|
|
|
|
** Fts3Table.pendingTerms hash table. Normally, the hash table is
|
|
|
|
** populated as documents are inserted/updated/deleted in a transaction
|
|
|
|
** and used to create a new segment when the transaction is committed.
|
|
|
|
** However if this limit is reached midway through a transaction, a new
|
|
|
|
** segment is created and the hash table cleared immediately.
|
|
|
|
*/
|
|
|
|
#define FTS3_MAX_PENDING_DATA (1*1024*1024)
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Macro to return the number of elements in an array. SQLite has a
|
|
|
|
** similar macro called ArraySize(). Use a different name to avoid
|
|
|
|
** a collision when building an amalgamation with built-in FTS3.
|
|
|
|
*/
|
|
|
|
#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
|
|
|
|
|
2011-06-02 23:57:24 +04:00
|
|
|
|
|
|
|
#ifndef MIN
|
|
|
|
# define MIN(x,y) ((x)<(y)?(x):(y))
|
|
|
|
#endif
|
2012-03-08 22:39:03 +04:00
|
|
|
#ifndef MAX
|
|
|
|
# define MAX(x,y) ((x)>(y)?(x):(y))
|
|
|
|
#endif
|
2011-06-02 23:57:24 +04:00
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
/*
|
|
|
|
** Maximum length of a varint encoded integer. The varint format is different
|
|
|
|
** from that used by SQLite, so the maximum length is 10, not 9.
|
|
|
|
*/
|
|
|
|
#define FTS3_VARINT_MAX 10
|
|
|
|
|
2011-05-24 22:49:45 +04:00
|
|
|
/*
|
2011-05-25 22:34:53 +04:00
|
|
|
** FTS4 virtual tables may maintain multiple indexes - one index of all terms
|
|
|
|
** in the document set and zero or more prefix indexes. All indexes are stored
|
|
|
|
** as one or more b+-trees in the %_segments and %_segdir tables.
|
|
|
|
**
|
|
|
|
** It is possible to determine which index a b+-tree belongs to based on the
|
|
|
|
** value stored in the "%_segdir.level" column. Given this value L, the index
|
|
|
|
** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with
|
|
|
|
** level values between 0 and 1023 (inclusive) belong to index 0, all levels
|
|
|
|
** between 1024 and 2047 to index 1, and so on.
|
2011-05-24 22:49:45 +04:00
|
|
|
**
|
2011-05-25 22:34:53 +04:00
|
|
|
** It is considered impossible for an index to use more than 1024 levels. In
|
|
|
|
** theory though this may happen, but only after at least
|
|
|
|
** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables.
|
2011-05-24 22:49:45 +04:00
|
|
|
*/
|
2011-05-25 22:34:53 +04:00
|
|
|
#define FTS3_SEGDIR_MAXLEVEL 1024
|
|
|
|
#define FTS3_SEGDIR_MAXLEVEL_STR "1024"
|
2011-05-24 22:49:45 +04:00
|
|
|
|
2010-03-23 21:24:06 +03:00
|
|
|
/*
|
|
|
|
** The testcase() macro is only used by the amalgamation. If undefined,
|
|
|
|
** make it a no-op.
|
|
|
|
*/
|
|
|
|
#ifndef testcase
|
|
|
|
# define testcase(X)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Terminator values for position-lists and column-lists.
|
|
|
|
*/
|
|
|
|
#define POS_COLUMN (1) /* Column-list terminator */
|
|
|
|
#define POS_END (0) /* Position-list terminator */
|
|
|
|
|
2009-12-03 09:26:46 +03:00
|
|
|
/*
|
|
|
|
** This section provides definitions to allow the
|
|
|
|
** FTS3 extension to be compiled outside of the
|
|
|
|
** amalgamation.
|
|
|
|
*/
|
|
|
|
#ifndef SQLITE_AMALGAMATION
|
2009-11-30 22:48:16 +03:00
|
|
|
/*
|
|
|
|
** Macros indicating that conditional expressions are always true or
|
|
|
|
** false.
|
|
|
|
*/
|
2010-10-26 11:14:25 +04:00
|
|
|
#ifdef SQLITE_COVERAGE_TEST
|
|
|
|
# define ALWAYS(x) (1)
|
|
|
|
# define NEVER(X) (0)
|
2015-04-15 12:16:39 +03:00
|
|
|
#elif defined(SQLITE_DEBUG)
|
|
|
|
# define ALWAYS(x) sqlite3Fts3Always((x)!=0)
|
|
|
|
# define NEVER(x) sqlite3Fts3Never((x)!=0)
|
|
|
|
int sqlite3Fts3Always(int b);
|
|
|
|
int sqlite3Fts3Never(int b);
|
2010-10-26 11:14:25 +04:00
|
|
|
#else
|
2009-11-30 22:48:16 +03:00
|
|
|
# define ALWAYS(x) (x)
|
2012-03-15 00:01:52 +04:00
|
|
|
# define NEVER(x) (x)
|
2010-10-26 11:14:25 +04:00
|
|
|
#endif
|
|
|
|
|
2009-12-03 09:26:46 +03:00
|
|
|
/*
|
|
|
|
** Internal types used by SQLite.
|
|
|
|
*/
|
2009-12-01 20:05:50 +03:00
|
|
|
typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */
|
|
|
|
typedef short int i16; /* 2-byte (or larger) signed integer */
|
2010-01-02 22:02:02 +03:00
|
|
|
typedef unsigned int u32; /* 4-byte unsigned integer */
|
|
|
|
typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */
|
2012-03-26 14:36:55 +04:00
|
|
|
typedef sqlite3_int64 i64; /* 8-byte signed integer */
|
2011-05-24 19:36:01 +04:00
|
|
|
|
2009-12-03 09:26:46 +03:00
|
|
|
/*
|
|
|
|
** Macro used to suppress compiler warnings for unused parameters.
|
|
|
|
*/
|
|
|
|
#define UNUSED_PARAMETER(x) (void)(x)
|
2011-05-24 19:36:01 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
** Activate assert() only if SQLITE_TEST is enabled.
|
|
|
|
*/
|
|
|
|
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
|
|
|
|
# define NDEBUG 1
|
2009-11-30 22:48:16 +03:00
|
|
|
#endif
|
|
|
|
|
2011-05-24 19:36:01 +04:00
|
|
|
/*
|
|
|
|
** The TESTONLY macro is used to enclose variable declarations or
|
|
|
|
** other bits of code that are needed to support the arguments
|
|
|
|
** within testcase() and assert() macros.
|
|
|
|
*/
|
|
|
|
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
|
|
|
|
# define TESTONLY(X) X
|
|
|
|
#else
|
|
|
|
# define TESTONLY(X)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* SQLITE_AMALGAMATION */
|
|
|
|
|
2011-10-13 21:16:45 +04:00
|
|
|
#ifdef SQLITE_DEBUG
|
|
|
|
int sqlite3Fts3Corrupt(void);
|
|
|
|
# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
|
|
|
|
#else
|
|
|
|
# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
|
|
|
|
#endif
|
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
typedef struct Fts3Table Fts3Table;
|
|
|
|
typedef struct Fts3Cursor Fts3Cursor;
|
|
|
|
typedef struct Fts3Expr Fts3Expr;
|
|
|
|
typedef struct Fts3Phrase Fts3Phrase;
|
2010-10-19 18:07:59 +04:00
|
|
|
typedef struct Fts3PhraseToken Fts3PhraseToken;
|
|
|
|
|
2011-06-02 23:57:24 +04:00
|
|
|
typedef struct Fts3Doclist Fts3Doclist;
|
2009-11-17 15:52:10 +03:00
|
|
|
typedef struct Fts3SegFilter Fts3SegFilter;
|
2010-10-19 18:07:59 +04:00
|
|
|
typedef struct Fts3DeferredToken Fts3DeferredToken;
|
|
|
|
typedef struct Fts3SegReader Fts3SegReader;
|
2011-06-02 23:57:24 +04:00
|
|
|
typedef struct Fts3MultiSegReader Fts3MultiSegReader;
|
2009-11-13 13:36:20 +03:00
|
|
|
|
2015-05-05 22:37:07 +03:00
|
|
|
typedef struct MatchinfoBuffer MatchinfoBuffer;
|
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
/*
|
|
|
|
** A connection to a fulltext index is an instance of the following
|
|
|
|
** structure. The xCreate and xConnect methods create an instance
|
|
|
|
** of this structure and xDestroy and xDisconnect free that instance.
|
|
|
|
** All other methods receive a pointer to the structure as one of their
|
|
|
|
** arguments.
|
|
|
|
*/
|
|
|
|
struct Fts3Table {
|
2009-11-20 05:24:15 +03:00
|
|
|
sqlite3_vtab base; /* Base class used by SQLite core */
|
|
|
|
sqlite3 *db; /* The database connection */
|
|
|
|
const char *zDb; /* logical database name */
|
|
|
|
const char *zName; /* virtual table name */
|
2009-12-01 16:57:48 +03:00
|
|
|
int nColumn; /* number of named columns in virtual table */
|
2009-11-20 05:24:15 +03:00
|
|
|
char **azColumn; /* column names. malloced */
|
2013-06-21 21:30:47 +04:00
|
|
|
u8 *abNotindexed; /* True for 'notindexed' columns */
|
2009-11-20 05:24:15 +03:00
|
|
|
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
|
2011-10-04 15:22:59 +04:00
|
|
|
char *zContentTbl; /* content=xxx option, or NULL */
|
2012-03-01 23:44:20 +04:00
|
|
|
char *zLanguageid; /* languageid=xxx option, or NULL */
|
2014-05-14 19:58:47 +04:00
|
|
|
int nAutoincrmerge; /* Value configured by 'automerge' */
|
2012-03-24 06:20:43 +04:00
|
|
|
u32 nLeafAdd; /* Number of leaf blocks added this trans */
|
2009-11-13 13:36:20 +03:00
|
|
|
|
|
|
|
/* Precompiled statements used by the implementation. Each of these
|
|
|
|
** statements is run and reset within a single virtual table API call.
|
|
|
|
*/
|
2014-05-13 00:04:48 +04:00
|
|
|
sqlite3_stmt *aStmt[40];
|
2009-11-13 13:36:20 +03:00
|
|
|
|
2011-02-01 19:34:32 +03:00
|
|
|
char *zReadExprlist;
|
|
|
|
char *zWriteExprlist;
|
|
|
|
|
2009-12-03 20:36:22 +03:00
|
|
|
int nNodeSize; /* Soft limit for node size */
|
2012-03-27 19:00:06 +04:00
|
|
|
u8 bFts4; /* True for FTS4, false for FTS3 */
|
2014-03-12 23:20:36 +04:00
|
|
|
u8 bHasStat; /* True if %_stat table exists (2==unknown) */
|
2010-02-03 22:55:13 +03:00
|
|
|
u8 bHasDocsize; /* True if %_docsize table exists */
|
2011-06-05 00:04:35 +04:00
|
|
|
u8 bDescIdx; /* True if doclists are in reverse order */
|
2012-03-23 22:26:11 +04:00
|
|
|
u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
|
2010-10-22 20:44:39 +04:00
|
|
|
int nPgsz; /* Page size for host database */
|
|
|
|
char *zSegmentsTbl; /* Name of %_segments table */
|
2010-10-20 22:56:04 +04:00
|
|
|
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
|
|
|
|
|
2012-03-17 20:56:57 +04:00
|
|
|
/*
|
2012-03-01 23:44:20 +04:00
|
|
|
** The following array of hash tables is used to buffer pending index
|
2012-03-17 20:56:57 +04:00
|
|
|
** updates during transactions. All pending updates buffered at any one
|
|
|
|
** time must share a common language-id (see the FTS4 langid= feature).
|
|
|
|
** The current language id is stored in variable iPrevLangid.
|
2011-05-25 22:34:53 +04:00
|
|
|
**
|
|
|
|
** A single FTS4 table may have multiple full-text indexes. For each index
|
|
|
|
** there is an entry in the aIndex[] array. Index 0 is an index of all the
|
|
|
|
** terms that appear in the document set. Each subsequent index in aIndex[]
|
|
|
|
** is an index of prefixes of a specific length.
|
2012-03-17 20:56:57 +04:00
|
|
|
**
|
|
|
|
** Variable nPendingData contains an estimate the memory consumed by the
|
|
|
|
** pending data structures, including hash table overhead, but not including
|
|
|
|
** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash
|
|
|
|
** tables are flushed to disk. Variable iPrevDocid is the docid of the most
|
|
|
|
** recently inserted record.
|
2009-11-13 13:36:20 +03:00
|
|
|
*/
|
2011-05-25 22:34:53 +04:00
|
|
|
int nIndex; /* Size of aIndex[] */
|
|
|
|
struct Fts3Index {
|
|
|
|
int nPrefix; /* Prefix length (0 for main terms index) */
|
|
|
|
Fts3Hash hPending; /* Pending terms table for this index */
|
|
|
|
} *aIndex;
|
|
|
|
int nMaxPendingData; /* Max pending data before flush to disk */
|
|
|
|
int nPendingData; /* Current bytes of pending data */
|
|
|
|
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
|
2012-03-01 23:44:20 +04:00
|
|
|
int iPrevLangid; /* Langid of recently inserted document */
|
2015-09-28 18:23:29 +03:00
|
|
|
int bPrevDelete; /* True if last operation was a delete */
|
2011-05-24 19:36:01 +04:00
|
|
|
|
2011-10-04 20:37:35 +04:00
|
|
|
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
|
2011-05-24 19:36:01 +04:00
|
|
|
/* State variables used for validating that the transaction control
|
|
|
|
** methods of the virtual table are called at appropriate times. These
|
2012-03-01 23:44:20 +04:00
|
|
|
** values do not contribute to FTS functionality; they are used for
|
|
|
|
** verifying the operation of the SQLite core.
|
2011-05-24 19:36:01 +04:00
|
|
|
*/
|
|
|
|
int inTransaction; /* True after xBegin but before xCommit/xRollback */
|
|
|
|
int mxSavepoint; /* Largest valid xSavepoint integer */
|
|
|
|
#endif
|
2013-10-02 12:04:27 +04:00
|
|
|
|
|
|
|
#ifdef SQLITE_TEST
|
|
|
|
/* True to disable the incremental doclist optimization. This is controled
|
|
|
|
** by special insert command 'test-no-incr-doclist'. */
|
|
|
|
int bNoIncrDoclist;
|
|
|
|
#endif
|
2009-11-13 13:36:20 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
** When the core wants to read from the virtual table, it creates a
|
|
|
|
** virtual table cursor (an instance of the following structure) using
|
|
|
|
** the xOpen method. Cursors are destroyed using the xClose method.
|
|
|
|
*/
|
|
|
|
struct Fts3Cursor {
|
|
|
|
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
2009-12-01 16:57:48 +03:00
|
|
|
i16 eSearch; /* Search strategy (see below) */
|
|
|
|
u8 isEof; /* True if at End Of Results */
|
|
|
|
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
|
2009-11-13 13:36:20 +03:00
|
|
|
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
|
|
|
|
Fts3Expr *pExpr; /* Parsed MATCH query string */
|
2012-03-01 23:44:20 +04:00
|
|
|
int iLangid; /* Language being queried for */
|
2010-11-23 22:16:47 +03:00
|
|
|
int nPhrase; /* Number of matchable phrases in query */
|
2010-10-19 18:07:59 +04:00
|
|
|
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
|
2009-11-13 13:36:20 +03:00
|
|
|
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
|
|
|
|
char *pNextId; /* Pointer into the body of aDoclist */
|
|
|
|
char *aDoclist; /* List of docids for full-text queries */
|
|
|
|
int nDoclist; /* Size of buffer at aDoclist */
|
2011-06-05 00:04:35 +04:00
|
|
|
u8 bDesc; /* True to sort in descending order */
|
2010-10-23 23:07:30 +04:00
|
|
|
int eEvalmode; /* An FTS3_EVAL_XX constant */
|
2010-10-19 18:07:59 +04:00
|
|
|
int nRowAvg; /* Average size of database rows, in pages */
|
2011-06-20 23:00:30 +04:00
|
|
|
sqlite3_int64 nDoc; /* Documents in table */
|
2013-09-30 15:42:19 +04:00
|
|
|
i64 iMinDocid; /* Minimum docid to return */
|
|
|
|
i64 iMaxDocid; /* Maximum docid to return */
|
2010-11-23 22:16:47 +03:00
|
|
|
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
|
2015-05-05 22:37:07 +03:00
|
|
|
MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */
|
2009-11-13 13:36:20 +03:00
|
|
|
};
|
|
|
|
|
2010-10-23 23:07:30 +04:00
|
|
|
#define FTS3_EVAL_FILTER 0
|
|
|
|
#define FTS3_EVAL_NEXT 1
|
|
|
|
#define FTS3_EVAL_MATCHINFO 2
|
|
|
|
|
2009-12-01 16:57:48 +03:00
|
|
|
/*
|
|
|
|
** The Fts3Cursor.eSearch member is always set to one of the following.
|
|
|
|
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
|
|
|
|
** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index
|
|
|
|
** of the column to be searched. For example, in
|
|
|
|
**
|
|
|
|
** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
|
|
|
|
** SELECT docid FROM ex1 WHERE b MATCH 'one two three';
|
|
|
|
**
|
|
|
|
** Because the LHS of the MATCH operator is 2nd column "b",
|
|
|
|
** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a,
|
|
|
|
** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1"
|
|
|
|
** indicating that all columns should be searched,
|
|
|
|
** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
|
|
|
|
*/
|
|
|
|
#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */
|
|
|
|
#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
|
|
|
|
#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
|
|
|
|
|
2013-09-30 15:42:19 +04:00
|
|
|
/*
|
|
|
|
** The lower 16-bits of the sqlite3_index_info.idxNum value set by
|
|
|
|
** the xBestIndex() method contains the Fts3Cursor.eSearch value described
|
|
|
|
** above. The upper 16-bits contain a combination of the following
|
|
|
|
** bits, used to describe extra constraints on full-text searches.
|
|
|
|
*/
|
|
|
|
#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */
|
|
|
|
#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */
|
|
|
|
#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */
|
2011-06-02 23:57:24 +04:00
|
|
|
|
|
|
|
struct Fts3Doclist {
|
|
|
|
char *aAll; /* Array containing doclist (or NULL) */
|
|
|
|
int nAll; /* Size of a[] in bytes */
|
|
|
|
char *pNextDocid; /* Pointer to next docid */
|
2011-06-03 22:00:19 +04:00
|
|
|
|
|
|
|
sqlite3_int64 iDocid; /* Current docid (if pList!=0) */
|
|
|
|
int bFreeList; /* True if pList should be sqlite3_free()d */
|
2011-06-02 23:57:24 +04:00
|
|
|
char *pList; /* Pointer to position list following iDocid */
|
|
|
|
int nList; /* Length of position list */
|
2011-07-07 11:37:53 +04:00
|
|
|
};
|
2011-06-02 23:57:24 +04:00
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
/*
|
|
|
|
** A "phrase" is a sequence of one or more tokens that must match in
|
|
|
|
** sequence. A single token is the base case and the most common case.
|
2010-10-19 18:07:59 +04:00
|
|
|
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
|
|
|
|
** nToken will be the number of tokens in the string.
|
2009-11-13 13:36:20 +03:00
|
|
|
*/
|
2010-10-19 18:07:59 +04:00
|
|
|
struct Fts3PhraseToken {
|
|
|
|
char *z; /* Text of the token */
|
|
|
|
int n; /* Number of bytes in buffer z */
|
|
|
|
int isPrefix; /* True if token ends with a "*" character */
|
2011-10-18 23:39:41 +04:00
|
|
|
int bFirst; /* True if token must appear at position 0 */
|
2011-05-28 19:57:40 +04:00
|
|
|
|
|
|
|
/* Variables above this point are populated when the expression is
|
|
|
|
** parsed (by code in fts3_expr.c). Below this point the variables are
|
|
|
|
** used when evaluating the expression. */
|
2010-10-22 20:44:39 +04:00
|
|
|
Fts3DeferredToken *pDeferred; /* Deferred token object for this token */
|
2011-06-02 23:57:24 +04:00
|
|
|
Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */
|
2010-10-19 18:07:59 +04:00
|
|
|
};
|
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
struct Fts3Phrase {
|
2011-06-02 23:57:24 +04:00
|
|
|
/* Cache of doclist for this phrase. */
|
|
|
|
Fts3Doclist doclist;
|
|
|
|
int bIncr; /* True if doclist is loaded incrementally */
|
2011-06-17 21:37:31 +04:00
|
|
|
int iDoclistToken;
|
2011-05-28 19:57:40 +04:00
|
|
|
|
2015-01-27 21:43:02 +03:00
|
|
|
/* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an
|
|
|
|
** OR condition. */
|
|
|
|
char *pOrPoslist;
|
|
|
|
i64 iOrDocid;
|
|
|
|
|
2011-06-02 23:57:24 +04:00
|
|
|
/* Variables below this point are populated by fts3_expr.c when parsing
|
|
|
|
** a MATCH expression. Everything above is part of the evaluation phase.
|
|
|
|
*/
|
|
|
|
int nToken; /* Number of tokens in the phrase */
|
|
|
|
int iColumn; /* Index of column this phrase must match */
|
2010-10-19 18:07:59 +04:00
|
|
|
Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
|
2009-11-13 13:36:20 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
** A tree of these objects forms the RHS of a MATCH operator.
|
2009-12-22 21:56:19 +03:00
|
|
|
**
|
2011-05-28 19:57:40 +04:00
|
|
|
** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist
|
|
|
|
** points to a malloced buffer, size nDoclist bytes, containing the results
|
|
|
|
** of this phrase query in FTS3 doclist format. As usual, the initial
|
|
|
|
** "Length" field found in doclists stored on disk is omitted from this
|
|
|
|
** buffer.
|
2009-12-22 21:56:19 +03:00
|
|
|
**
|
2011-06-08 22:39:07 +04:00
|
|
|
** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
|
|
|
|
** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
|
|
|
|
** where nCol is the number of columns in the queried FTS table. The array
|
|
|
|
** is populated as follows:
|
|
|
|
**
|
|
|
|
** aMI[iCol*3 + 0] = Undefined
|
|
|
|
** aMI[iCol*3 + 1] = Number of occurrences
|
|
|
|
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
|
|
|
|
**
|
|
|
|
** The aMI array is allocated using sqlite3_malloc(). It should be freed
|
|
|
|
** when the expression node is.
|
2009-11-13 13:36:20 +03:00
|
|
|
*/
|
|
|
|
struct Fts3Expr {
|
|
|
|
int eType; /* One of the FTSQUERY_XXX values defined below */
|
|
|
|
int nNear; /* Valid if eType==FTSQUERY_NEAR */
|
|
|
|
Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */
|
|
|
|
Fts3Expr *pLeft; /* Left operand */
|
|
|
|
Fts3Expr *pRight; /* Right operand */
|
|
|
|
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
|
2011-06-02 23:57:24 +04:00
|
|
|
|
|
|
|
/* The following are used by the fts3_eval.c module. */
|
|
|
|
sqlite3_int64 iDocid; /* Current docid */
|
|
|
|
u8 bEof; /* True this expression is at EOF already */
|
|
|
|
u8 bStart; /* True if iDocid is valid */
|
|
|
|
u8 bDeferred; /* True if this expression is entirely deferred */
|
2011-06-08 22:39:07 +04:00
|
|
|
|
2015-05-06 20:41:19 +03:00
|
|
|
/* The following are used by the fts3_snippet.c module. */
|
|
|
|
int iPhrase; /* Index of this phrase in matchinfo() results */
|
|
|
|
u32 *aMI; /* See above */
|
2009-11-13 13:36:20 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
** Candidate values for Fts3Query.eType. Note that the order of the first
|
|
|
|
** four values is in order of precedence when parsing expressions. For
|
|
|
|
** example, the following:
|
|
|
|
**
|
|
|
|
** "a OR b AND c NOT d NEAR e"
|
|
|
|
**
|
|
|
|
** is equivalent to:
|
|
|
|
**
|
|
|
|
** "a OR (b AND (c NOT (d NEAR e)))"
|
|
|
|
*/
|
|
|
|
#define FTSQUERY_NEAR 1
|
|
|
|
#define FTSQUERY_NOT 2
|
|
|
|
#define FTSQUERY_AND 3
|
|
|
|
#define FTSQUERY_OR 4
|
|
|
|
#define FTSQUERY_PHRASE 5
|
|
|
|
|
|
|
|
|
|
|
|
/* fts3_write.c */
|
|
|
|
int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
|
|
|
|
int sqlite3Fts3PendingTermsFlush(Fts3Table *);
|
|
|
|
void sqlite3Fts3PendingTermsClear(Fts3Table *);
|
|
|
|
int sqlite3Fts3Optimize(Fts3Table *);
|
2012-01-25 20:29:45 +04:00
|
|
|
int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
|
2009-11-18 18:35:58 +03:00
|
|
|
sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
|
2011-05-24 22:49:45 +04:00
|
|
|
int sqlite3Fts3SegReaderPending(
|
2011-05-25 22:34:53 +04:00
|
|
|
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
|
2010-11-26 19:49:59 +03:00
|
|
|
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
|
2012-03-01 23:44:20 +04:00
|
|
|
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
|
2011-06-03 22:00:19 +04:00
|
|
|
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
|
2009-11-13 13:36:20 +03:00
|
|
|
|
2010-11-23 22:16:47 +03:00
|
|
|
int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
|
|
|
|
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
|
|
|
|
|
2012-08-20 21:24:48 +04:00
|
|
|
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
|
2010-10-19 18:07:59 +04:00
|
|
|
void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
|
|
|
|
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
|
|
|
|
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
|
|
|
|
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
|
2012-08-20 21:24:48 +04:00
|
|
|
int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
|
|
|
|
#else
|
|
|
|
# define sqlite3Fts3FreeDeferredTokens(x)
|
|
|
|
# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK
|
|
|
|
# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK
|
|
|
|
# define sqlite3Fts3FreeDeferredDoclists(x)
|
|
|
|
# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK
|
|
|
|
#endif
|
|
|
|
|
2010-10-20 22:56:04 +04:00
|
|
|
void sqlite3Fts3SegmentsClose(Fts3Table *);
|
2012-03-24 18:45:19 +04:00
|
|
|
int sqlite3Fts3MaxLevel(Fts3Table *, int *);
|
2010-10-20 22:56:04 +04:00
|
|
|
|
2011-05-24 22:49:45 +04:00
|
|
|
/* Special values interpreted by sqlite3SegReaderCursor() */
|
|
|
|
#define FTS3_SEGCURSOR_PENDING -1
|
2011-05-25 22:34:53 +04:00
|
|
|
#define FTS3_SEGCURSOR_ALL -2
|
2011-02-01 19:34:32 +03:00
|
|
|
|
2011-06-02 23:57:24 +04:00
|
|
|
int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
|
|
|
|
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
|
|
|
|
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
|
|
|
|
|
2012-03-01 23:44:20 +04:00
|
|
|
int sqlite3Fts3SegReaderCursor(Fts3Table *,
|
|
|
|
int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
|
2011-02-01 19:34:32 +03:00
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
|
|
|
|
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
|
|
|
|
#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002
|
|
|
|
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
|
2009-11-17 15:52:10 +03:00
|
|
|
#define FTS3_SEGMENT_PREFIX 0x00000008
|
2011-02-02 20:30:43 +03:00
|
|
|
#define FTS3_SEGMENT_SCAN 0x00000010
|
2011-10-19 15:57:13 +04:00
|
|
|
#define FTS3_SEGMENT_FIRST 0x00000020
|
2009-11-17 15:52:10 +03:00
|
|
|
|
2009-11-18 18:35:58 +03:00
|
|
|
/* Type passed as 4th argument to SegmentReaderIterate() */
|
2009-11-17 15:52:10 +03:00
|
|
|
struct Fts3SegFilter {
|
|
|
|
const char *zTerm;
|
|
|
|
int nTerm;
|
|
|
|
int iCol;
|
|
|
|
int flags;
|
|
|
|
};
|
2009-11-13 13:36:20 +03:00
|
|
|
|
2011-06-02 23:57:24 +04:00
|
|
|
struct Fts3MultiSegReader {
|
2011-02-01 19:34:32 +03:00
|
|
|
/* Used internally by sqlite3Fts3SegReaderXXX() calls */
|
|
|
|
Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */
|
|
|
|
int nSegment; /* Size of apSegment array */
|
|
|
|
int nAdvance; /* How many seg-readers to advance */
|
|
|
|
Fts3SegFilter *pFilter; /* Pointer to filter object */
|
|
|
|
char *aBuffer; /* Buffer to merge doclists in */
|
|
|
|
int nBuffer; /* Allocated size of aBuffer[] in bytes */
|
|
|
|
|
2011-06-02 23:57:24 +04:00
|
|
|
int iColFilter; /* If >=0, filter for this column */
|
2011-06-16 20:06:05 +04:00
|
|
|
int bRestart;
|
2011-06-02 23:57:24 +04:00
|
|
|
|
|
|
|
/* Used by fts3.c only. */
|
|
|
|
int nCost; /* Cost of running iterator */
|
|
|
|
int bLookup; /* True if a lookup of a single entry. */
|
2011-02-01 19:34:32 +03:00
|
|
|
|
|
|
|
/* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
|
|
|
|
char *zTerm; /* Pointer to term buffer */
|
|
|
|
int nTerm; /* Size of zTerm in bytes */
|
|
|
|
char *aDoclist; /* Pointer to doclist buffer */
|
|
|
|
int nDoclist; /* Size of aDoclist[] in bytes */
|
|
|
|
};
|
|
|
|
|
2012-03-24 06:20:43 +04:00
|
|
|
int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
|
|
|
|
|
2013-11-12 21:46:44 +04:00
|
|
|
#define fts3GetVarint32(p, piVal) ( \
|
|
|
|
(*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \
|
|
|
|
)
|
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
/* fts3.c */
|
2015-05-01 17:07:30 +03:00
|
|
|
void sqlite3Fts3ErrMsg(char**,const char*,...);
|
2009-11-13 13:36:20 +03:00
|
|
|
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
|
|
|
|
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
|
|
|
|
int sqlite3Fts3GetVarint32(const char *, int *);
|
|
|
|
int sqlite3Fts3VarintLen(sqlite3_uint64);
|
|
|
|
void sqlite3Fts3Dequote(char *);
|
2011-06-05 00:04:35 +04:00
|
|
|
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
|
2011-06-08 22:39:07 +04:00
|
|
|
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
|
2011-10-19 15:57:13 +04:00
|
|
|
int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
|
2012-03-24 21:29:05 +04:00
|
|
|
void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
|
2015-05-25 13:57:13 +03:00
|
|
|
int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc);
|
2010-01-02 22:02:02 +03:00
|
|
|
|
2009-11-13 13:36:20 +03:00
|
|
|
/* fts3_tokenizer.c */
|
|
|
|
const char *sqlite3Fts3NextToken(const char *, int *);
|
|
|
|
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
|
2010-11-02 20:41:52 +03:00
|
|
|
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
|
|
|
|
sqlite3_tokenizer **, char **
|
2009-11-13 13:36:20 +03:00
|
|
|
);
|
2010-11-02 20:41:52 +03:00
|
|
|
int sqlite3Fts3IsIdChar(char);
|
2009-11-13 13:36:20 +03:00
|
|
|
|
|
|
|
/* fts3_snippet.c */
|
|
|
|
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
|
2010-01-06 20:19:21 +03:00
|
|
|
void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
|
2010-01-02 22:02:02 +03:00
|
|
|
const char *, const char *, int, int
|
|
|
|
);
|
2010-11-23 22:16:47 +03:00
|
|
|
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
|
2015-05-05 22:37:07 +03:00
|
|
|
void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p);
|
2009-11-13 13:36:20 +03:00
|
|
|
|
|
|
|
/* fts3_expr.c */
|
2012-03-03 22:46:41 +04:00
|
|
|
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
|
2013-04-29 22:07:37 +04:00
|
|
|
char **, int, int, int, const char *, int, Fts3Expr **, char **
|
2009-11-13 13:36:20 +03:00
|
|
|
);
|
|
|
|
void sqlite3Fts3ExprFree(Fts3Expr *);
|
|
|
|
#ifdef SQLITE_TEST
|
2009-12-08 22:05:53 +03:00
|
|
|
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
|
2011-05-17 18:41:36 +04:00
|
|
|
int sqlite3Fts3InitTerm(sqlite3 *db);
|
2009-11-13 13:36:20 +03:00
|
|
|
#endif
|
|
|
|
|
2012-03-03 22:46:41 +04:00
|
|
|
int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
|
|
|
|
sqlite3_tokenizer_cursor **
|
|
|
|
);
|
|
|
|
|
2011-02-01 19:34:32 +03:00
|
|
|
/* fts3_aux.c */
|
|
|
|
int sqlite3Fts3InitAux(sqlite3 *db);
|
|
|
|
|
2011-06-02 23:57:24 +04:00
|
|
|
void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
|
|
|
|
|
|
|
|
int sqlite3Fts3MsrIncrStart(
|
|
|
|
Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
|
|
|
|
int sqlite3Fts3MsrIncrNext(
|
|
|
|
Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
|
2012-05-10 21:43:14 +04:00
|
|
|
int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **);
|
2011-06-02 23:57:24 +04:00
|
|
|
int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
|
2011-06-17 20:04:39 +04:00
|
|
|
int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
|
2011-06-02 23:57:24 +04:00
|
|
|
|
2013-04-22 21:07:56 +04:00
|
|
|
/* fts3_tokenize_vtab.c */
|
2013-05-07 16:16:48 +04:00
|
|
|
int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
|
2013-04-22 21:07:56 +04:00
|
|
|
|
2012-05-25 21:50:19 +04:00
|
|
|
/* fts3_unicode2.c (functions generated by parsing unicode text files) */
|
2014-07-03 16:18:22 +04:00
|
|
|
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
2012-06-06 23:30:38 +04:00
|
|
|
int sqlite3FtsUnicodeFold(int, int);
|
2012-05-25 21:50:19 +04:00
|
|
|
int sqlite3FtsUnicodeIsalnum(int);
|
2012-06-06 23:30:38 +04:00
|
|
|
int sqlite3FtsUnicodeIsdiacritic(int);
|
|
|
|
#endif
|
2012-05-25 21:50:19 +04:00
|
|
|
|
2011-06-28 11:15:43 +04:00
|
|
|
#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
|
2009-11-13 13:36:20 +03:00
|
|
|
#endif /* _FTSINT_H */
|