Start reworking fts3 code to match the rest of SQLite (code conventions, malloc-failure handling etc.).
FossilOrigin-Name: 30a92f1132801c7582007ee625c577ea2ac31cdf
This commit is contained in:
parent
c54055bd25
commit
09977bb9f0
7695
ext/fts3/fts3.c
7695
ext/fts3/fts3.c
File diff suppressed because it is too large
Load Diff
220
ext/fts3/fts3Int.h
Normal file
220
ext/fts3/fts3Int.h
Normal file
@ -0,0 +1,220 @@
|
||||
/*
|
||||
** 2009 Nov 12
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
|
||||
#ifndef _FTSINT_H
|
||||
#define _FTSINT_H
|
||||
|
||||
#include <sqlite3.h>
|
||||
#include "fts3_tokenizer.h"
|
||||
#include "fts3_hash.h"
|
||||
|
||||
/*
|
||||
** This constant controls how often segments are merged. Once there are
|
||||
** FTS3_MERGE_COUNT segments of level N, they are merged into a single
|
||||
** segment of level N+1.
|
||||
*/
|
||||
#define FTS3_MERGE_COUNT 16
|
||||
|
||||
/*
|
||||
** This is the maximum amount of data (in bytes) to store in the
|
||||
** Fts3Table.pendingTerms hash table. Normally, the hash table is
|
||||
** populated as documents are inserted/updated/deleted in a transaction
|
||||
** and used to create a new segment when the transaction is committed.
|
||||
** However if this limit is reached midway through a transaction, a new
|
||||
** segment is created and the hash table cleared immediately.
|
||||
*/
|
||||
#define FTS3_MAX_PENDING_DATA (1*1024*1024)
|
||||
|
||||
/*
|
||||
** Macro to return the number of elements in an array. SQLite has a
|
||||
** similar macro called ArraySize(). Use a different name to avoid
|
||||
** a collision when building an amalgamation with built-in FTS3.
|
||||
*/
|
||||
#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
|
||||
|
||||
/*
|
||||
** Maximum length of a varint encoded integer. The varint format is different
|
||||
** from that used by SQLite, so the maximum length is 10, not 9.
|
||||
*/
|
||||
#define FTS3_VARINT_MAX 10
|
||||
|
||||
typedef struct Fts3Table Fts3Table;
|
||||
typedef struct Fts3Cursor Fts3Cursor;
|
||||
typedef struct Fts3Expr Fts3Expr;
|
||||
typedef struct Fts3Phrase Fts3Phrase;
|
||||
typedef struct Fts3SegReader Fts3SegReader;
|
||||
|
||||
/*
|
||||
** A connection to a fulltext index is an instance of the following
|
||||
** structure. The xCreate and xConnect methods create an instance
|
||||
** of this structure and xDestroy and xDisconnect free that instance.
|
||||
** All other methods receive a pointer to the structure as one of their
|
||||
** arguments.
|
||||
*/
|
||||
struct Fts3Table {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
sqlite3 *db; /* The database connection */
|
||||
const char *zDb; /* logical database name */
|
||||
const char *zName; /* virtual table name */
|
||||
int nColumn; /* number of columns in virtual table */
|
||||
char **azColumn; /* column names. malloced */
|
||||
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
|
||||
|
||||
/* Precompiled statements used by the implementation. Each of these
|
||||
** statements is run and reset within a single virtual table API call.
|
||||
*/
|
||||
sqlite3_stmt *aStmt[18];
|
||||
|
||||
/* Pointer to string containing the SQL:
|
||||
**
|
||||
** "SELECT block FROM %_segments WHERE blockid BETWEEN ? AND ?
|
||||
** ORDER BY blockid"
|
||||
*/
|
||||
char *zSelectLeaves;
|
||||
|
||||
/* The following hash table is used to buffer pending index updates during
|
||||
** transactions. Variable nPendingData estimates the memory size of the
|
||||
** pending data, including hash table overhead, but not malloc overhead.
|
||||
** When nPendingData exceeds FTS3_MAX_PENDING_DATA, the buffer is flushed
|
||||
** automatically. Variable iPrevDocid is the docid of the most recently
|
||||
** inserted record.
|
||||
*/
|
||||
int nPendingData;
|
||||
sqlite_int64 iPrevDocid;
|
||||
Fts3Hash pendingTerms;
|
||||
};
|
||||
|
||||
/*
|
||||
** When the core wants to read from the virtual table, it creates a
|
||||
** virtual table cursor (an instance of the following structure) using
|
||||
** the xOpen method. Cursors are destroyed using the xClose method.
|
||||
*/
|
||||
struct Fts3Cursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
int eType; /* Search strategy (see below) */
|
||||
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
|
||||
int isEof; /* True if at End Of Results */
|
||||
Fts3Expr *pExpr; /* Parsed MATCH query string */
|
||||
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
|
||||
char *pNextId; /* Pointer into the body of aDoclist */
|
||||
char *aDoclist; /* List of docids for full-text queries */
|
||||
int nDoclist; /* Size of buffer at aDoclist */
|
||||
};
|
||||
|
||||
/*
|
||||
** A "phrase" is a sequence of one or more tokens that must match in
|
||||
** sequence. A single token is the base case and the most common case.
|
||||
** For a sequence of tokens contained in "...", nToken will be the number
|
||||
** of tokens in the string.
|
||||
*/
|
||||
struct Fts3Phrase {
|
||||
int nToken; /* Number of tokens in the phrase */
|
||||
int iColumn; /* Index of column this phrase must match */
|
||||
int isNot; /* Phrase prefixed by unary not (-) operator */
|
||||
struct PhraseToken {
|
||||
char *z; /* Text of the token */
|
||||
int n; /* Number of bytes in buffer pointed to by z */
|
||||
int isPrefix; /* True if token ends in with a "*" character */
|
||||
} aToken[1]; /* One entry for each token in the phrase */
|
||||
};
|
||||
|
||||
/*
|
||||
** A tree of these objects forms the RHS of a MATCH operator.
|
||||
*/
|
||||
struct Fts3Expr {
|
||||
int eType; /* One of the FTSQUERY_XXX values defined below */
|
||||
int nNear; /* Valid if eType==FTSQUERY_NEAR */
|
||||
Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */
|
||||
Fts3Expr *pLeft; /* Left operand */
|
||||
Fts3Expr *pRight; /* Right operand */
|
||||
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
|
||||
};
|
||||
|
||||
/*
|
||||
** Candidate values for Fts3Query.eType. Note that the order of the first
|
||||
** four values is in order of precedence when parsing expressions. For
|
||||
** example, the following:
|
||||
**
|
||||
** "a OR b AND c NOT d NEAR e"
|
||||
**
|
||||
** is equivalent to:
|
||||
**
|
||||
** "a OR (b AND (c NOT (d NEAR e)))"
|
||||
*/
|
||||
#define FTSQUERY_NEAR 1
|
||||
#define FTSQUERY_NOT 2
|
||||
#define FTSQUERY_AND 3
|
||||
#define FTSQUERY_OR 4
|
||||
#define FTSQUERY_PHRASE 5
|
||||
|
||||
|
||||
/* fts3_init.c */
|
||||
int sqlite3Fts3DeleteVtab(int, sqlite3_vtab *);
|
||||
int sqlite3Fts3InitVtab(int, sqlite3*, void*, int, const char*const*,
|
||||
sqlite3_vtab **, char **);
|
||||
|
||||
/* fts3_write.c */
|
||||
int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
|
||||
int sqlite3Fts3PendingTermsFlush(Fts3Table *);
|
||||
void sqlite3Fts3PendingTermsClear(Fts3Table *);
|
||||
int sqlite3Fts3Optimize(Fts3Table *);
|
||||
|
||||
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
|
||||
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
|
||||
#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002
|
||||
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
|
||||
|
||||
int sqlite3Fts3SegReaderNew(Fts3Table *,int, sqlite3_int64,
|
||||
sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
|
||||
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
|
||||
int sqlite3Fts3SegReaderIterate(
|
||||
Fts3Table *, Fts3SegReader **, int, int, int,
|
||||
int (*)(Fts3Table *, void *, char *, int, char *, int), void *
|
||||
);
|
||||
|
||||
/* fts3.c */
|
||||
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
|
||||
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
|
||||
int sqlite3Fts3GetVarint32(const char *, int *);
|
||||
int sqlite3Fts3VarintLen(sqlite3_uint64);
|
||||
void sqlite3Fts3Dequote(char *);
|
||||
|
||||
/* Valid arguments for the second argument to sqlite3Fts3SqlStmt() */
|
||||
#define FTS3_SQL_GET_ALL_SEGDIRS 11
|
||||
#define FTS3_SQL_GET_BLOCK 17
|
||||
int sqlite3Fts3SqlStmt(Fts3Table *, int, sqlite3_stmt **);
|
||||
|
||||
/* fts3_tokenizer.c */
|
||||
const char *sqlite3Fts3NextToken(const char *, int *);
|
||||
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
|
||||
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash,
|
||||
const char *, sqlite3_tokenizer **, const char **, char **
|
||||
);
|
||||
|
||||
/* fts3_snippet.c */
|
||||
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
|
||||
void sqlite3Fts3Snippet(sqlite3_context*, Fts3Cursor*,
|
||||
const char *, const char *, const char *
|
||||
);
|
||||
|
||||
/* fts3_expr.c */
|
||||
int sqlite3Fts3ExprParse(sqlite3_tokenizer *,
|
||||
char **, int, int, const char *, int, Fts3Expr **
|
||||
);
|
||||
void sqlite3Fts3ExprFree(Fts3Expr *);
|
||||
#ifdef SQLITE_TEST
|
||||
void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
|
||||
#endif
|
||||
|
||||
#endif /* _FTSINT_H */
|
@ -13,8 +13,7 @@
|
||||
** This module contains code that implements a parser for fts3 query strings
|
||||
** (the right-hand argument to the MATCH operator). Because the supported
|
||||
** syntax is relatively simple, the whole tokenizer/parser system is
|
||||
** hand-coded. The public interface to this module is declared in source
|
||||
** code file "fts3_expr.h".
|
||||
** hand-coded.
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
@ -40,7 +39,29 @@
|
||||
** to zero causes the module to use the old syntax. If it is set to
|
||||
** non-zero the new syntax is activated. This is so both syntaxes can
|
||||
** be tested using a single build of testfixture.
|
||||
**
|
||||
** The following describes the syntax supported by the fts3 MATCH
|
||||
** operator in a similar format to that used by the lemon parser
|
||||
** generator. This module does not use actually lemon, it uses a
|
||||
** custom parser.
|
||||
**
|
||||
** query ::= andexpr (OR andexpr)*.
|
||||
**
|
||||
** andexpr ::= notexpr (AND? notexpr)*.
|
||||
**
|
||||
** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
|
||||
** notexpr ::= LP query RP.
|
||||
**
|
||||
** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
|
||||
**
|
||||
** distance_opt ::= .
|
||||
** distance_opt ::= / INTEGER.
|
||||
**
|
||||
** phrase ::= TOKEN.
|
||||
** phrase ::= COLUMN:TOKEN.
|
||||
** phrase ::= "TOKEN TOKEN TOKEN...".
|
||||
*/
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
int sqlite3_fts3_enable_parentheses = 0;
|
||||
#else
|
||||
@ -56,8 +77,7 @@ int sqlite3_fts3_enable_parentheses = 0;
|
||||
*/
|
||||
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
|
||||
|
||||
#include "fts3_expr.h"
|
||||
#include "sqlite3.h"
|
||||
#include "fts3Int.h"
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
@ -354,6 +374,9 @@ static int getNextNode(
|
||||
|| cNext=='"' || cNext=='(' || cNext==')' || cNext==0
|
||||
){
|
||||
pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
|
||||
if( !pRet ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pRet, 0, sizeof(Fts3Expr));
|
||||
pRet->eType = pKey->eType;
|
||||
pRet->nNear = nNear;
|
||||
|
@ -1,96 +0,0 @@
|
||||
/*
|
||||
** 2008 Nov 28
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
|
||||
#include "fts3_tokenizer.h"
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** The following describes the syntax supported by the fts3 MATCH
|
||||
** operator in a similar format to that used by the lemon parser
|
||||
** generator. This module does not use actually lemon, it uses a
|
||||
** custom parser.
|
||||
**
|
||||
** query ::= andexpr (OR andexpr)*.
|
||||
**
|
||||
** andexpr ::= notexpr (AND? notexpr)*.
|
||||
**
|
||||
** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
|
||||
** notexpr ::= LP query RP.
|
||||
**
|
||||
** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
|
||||
**
|
||||
** distance_opt ::= .
|
||||
** distance_opt ::= / INTEGER.
|
||||
**
|
||||
** phrase ::= TOKEN.
|
||||
** phrase ::= COLUMN:TOKEN.
|
||||
** phrase ::= "TOKEN TOKEN TOKEN...".
|
||||
*/
|
||||
|
||||
typedef struct Fts3Expr Fts3Expr;
|
||||
typedef struct Fts3Phrase Fts3Phrase;
|
||||
|
||||
/*
|
||||
** A "phrase" is a sequence of one or more tokens that must match in
|
||||
** sequence. A single token is the base case and the most common case.
|
||||
** For a sequence of tokens contained in "...", nToken will be the number
|
||||
** of tokens in the string.
|
||||
*/
|
||||
struct Fts3Phrase {
|
||||
int nToken; /* Number of tokens in the phrase */
|
||||
int iColumn; /* Index of column this phrase must match */
|
||||
int isNot; /* Phrase prefixed by unary not (-) operator */
|
||||
struct PhraseToken {
|
||||
char *z; /* Text of the token */
|
||||
int n; /* Number of bytes in buffer pointed to by z */
|
||||
int isPrefix; /* True if token ends in with a "*" character */
|
||||
} aToken[1]; /* One entry for each token in the phrase */
|
||||
};
|
||||
|
||||
/*
|
||||
** A tree of these objects forms the RHS of a MATCH operator.
|
||||
*/
|
||||
struct Fts3Expr {
|
||||
int eType; /* One of the FTSQUERY_XXX values defined below */
|
||||
int nNear; /* Valid if eType==FTSQUERY_NEAR */
|
||||
Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */
|
||||
Fts3Expr *pLeft; /* Left operand */
|
||||
Fts3Expr *pRight; /* Right operand */
|
||||
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
|
||||
};
|
||||
|
||||
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, char **, int, int,
|
||||
const char *, int, Fts3Expr **);
|
||||
void sqlite3Fts3ExprFree(Fts3Expr *);
|
||||
|
||||
/*
|
||||
** Candidate values for Fts3Query.eType. Note that the order of the first
|
||||
** four values is in order of precedence when parsing expressions. For
|
||||
** example, the following:
|
||||
**
|
||||
** "a OR b AND c NOT d NEAR e"
|
||||
**
|
||||
** is equivalent to:
|
||||
**
|
||||
** "a OR (b AND (c NOT (d NEAR e)))"
|
||||
*/
|
||||
#define FTSQUERY_NEAR 1
|
||||
#define FTSQUERY_NOT 2
|
||||
#define FTSQUERY_AND 3
|
||||
#define FTSQUERY_OR 4
|
||||
#define FTSQUERY_PHRASE 5
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
|
||||
#endif
|
@ -56,7 +56,7 @@ static void fts3HashFree(void *p){
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts3HashInit(fts3Hash *pNew, int keyClass, int copyKey){
|
||||
void sqlite3Fts3HashInit(Fts3Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
@ -71,8 +71,8 @@ void sqlite3Fts3HashInit(fts3Hash *pNew, int keyClass, int copyKey){
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts3HashClear(fts3Hash *pH){
|
||||
fts3HashElem *elem; /* For looping over all elements of the table */
|
||||
void sqlite3Fts3HashClear(Fts3Hash *pH){
|
||||
Fts3HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
@ -81,7 +81,7 @@ void sqlite3Fts3HashClear(fts3Hash *pH){
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
fts3HashElem *next_elem = elem->next;
|
||||
Fts3HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts3HashFree(elem->pKey);
|
||||
}
|
||||
@ -164,11 +164,11 @@ static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void fts3HashInsertElement(
|
||||
fts3Hash *pH, /* The complete hash table */
|
||||
Fts3Hash *pH, /* The complete hash table */
|
||||
struct _fts3ht *pEntry, /* The entry into which pNew is inserted */
|
||||
fts3HashElem *pNew /* The element to be inserted */
|
||||
Fts3HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
fts3HashElem *pHead; /* First element already in pEntry */
|
||||
Fts3HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
@ -191,9 +191,9 @@ static void fts3HashInsertElement(
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void fts3Rehash(fts3Hash *pH, int new_size){
|
||||
static void fts3Rehash(Fts3Hash *pH, int new_size){
|
||||
struct _fts3ht *new_ht; /* The new hash table */
|
||||
fts3HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
Fts3HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
@ -214,13 +214,13 @@ static void fts3Rehash(fts3Hash *pH, int new_size){
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static fts3HashElem *fts3FindElementByHash(
|
||||
const fts3Hash *pH, /* The pH to be searched */
|
||||
static Fts3HashElem *fts3FindElementByHash(
|
||||
const Fts3Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
fts3HashElem *elem; /* Used to loop thru the element list */
|
||||
Fts3HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
@ -243,8 +243,8 @@ static fts3HashElem *fts3FindElementByHash(
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void fts3RemoveElementByHash(
|
||||
fts3Hash *pH, /* The pH containing "elem" */
|
||||
fts3HashElem* elem, /* The element to be removed from the pH */
|
||||
Fts3Hash *pH, /* The pH containing "elem" */
|
||||
Fts3HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts3ht *pEntry;
|
||||
@ -280,9 +280,9 @@ static void fts3RemoveElementByHash(
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts3HashFind(const fts3Hash *pH, const void *pKey, int nKey){
|
||||
void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
fts3HashElem *elem; /* The element that matches key */
|
||||
Fts3HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
@ -310,15 +310,15 @@ void *sqlite3Fts3HashFind(const fts3Hash *pH, const void *pKey, int nKey){
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts3HashInsert(
|
||||
fts3Hash *pH, /* The hash table to insert into */
|
||||
Fts3Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
fts3HashElem *elem; /* Used to loop thru the element list */
|
||||
fts3HashElem *new_elem; /* New element added to the pH */
|
||||
Fts3HashElem *elem; /* Used to loop thru the element list */
|
||||
Fts3HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
@ -345,7 +345,7 @@ void *sqlite3Fts3HashInsert(
|
||||
return data;
|
||||
}
|
||||
}
|
||||
new_elem = (fts3HashElem*)fts3HashMalloc( sizeof(fts3HashElem) );
|
||||
new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = fts3HashMalloc( nKey );
|
||||
|
@ -18,8 +18,8 @@
|
||||
#define _FTS3_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct fts3Hash fts3Hash;
|
||||
typedef struct fts3HashElem fts3HashElem;
|
||||
typedef struct Fts3Hash Fts3Hash;
|
||||
typedef struct Fts3HashElem Fts3HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
@ -29,15 +29,15 @@ typedef struct fts3HashElem fts3HashElem;
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct fts3Hash {
|
||||
struct Fts3Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
fts3HashElem *first; /* The first element of the array */
|
||||
Fts3HashElem *first; /* The first element of the array */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts3ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
fts3HashElem *chain; /* Pointer to first entry with this hash */
|
||||
Fts3HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
@ -47,8 +47,8 @@ struct fts3Hash {
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct fts3HashElem {
|
||||
fts3HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
struct Fts3HashElem {
|
||||
Fts3HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
@ -71,10 +71,10 @@ struct fts3HashElem {
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts3HashInit(fts3Hash*, int keytype, int copyKey);
|
||||
void *sqlite3Fts3HashInsert(fts3Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts3HashFind(const fts3Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts3HashClear(fts3Hash*);
|
||||
void sqlite3Fts3HashInit(Fts3Hash*, int keytype, int copyKey);
|
||||
void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts3HashClear(Fts3Hash*);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
@ -88,8 +88,8 @@ void sqlite3Fts3HashClear(fts3Hash*);
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** fts3Hash h;
|
||||
** fts3HashElem *p;
|
||||
** Fts3Hash h;
|
||||
** Fts3HashElem *p;
|
||||
** ...
|
||||
** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
|
||||
** SomeStructure *pData = fts3HashData(p);
|
||||
|
750
ext/fts3/fts3_snippet.c
Normal file
750
ext/fts3/fts3_snippet.c
Normal file
@ -0,0 +1,750 @@
|
||||
/*
|
||||
** 2009 Oct 23
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "fts3Int.h"
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
|
||||
typedef struct Snippet Snippet;
|
||||
|
||||
/*
|
||||
** An instance of the following structure keeps track of generated
|
||||
** matching-word offset information and snippets.
|
||||
*/
|
||||
struct Snippet {
|
||||
int nMatch; /* Total number of matches */
|
||||
int nAlloc; /* Space allocated for aMatch[] */
|
||||
struct snippetMatch { /* One entry for each matching term */
|
||||
char snStatus; /* Status flag for use while constructing snippets */
|
||||
short int iCol; /* The column that contains the match */
|
||||
short int iTerm; /* The index in Query.pTerms[] of the matching term */
|
||||
int iToken; /* The index of the matching document token */
|
||||
short int nByte; /* Number of bytes in the term */
|
||||
int iStart; /* The offset to the first character of the term */
|
||||
} *aMatch; /* Points to space obtained from malloc */
|
||||
char *zOffset; /* Text rendering of aMatch[] */
|
||||
int nOffset; /* strlen(zOffset) */
|
||||
char *zSnippet; /* Snippet text */
|
||||
int nSnippet; /* strlen(zSnippet) */
|
||||
};
|
||||
|
||||
|
||||
/* It is not safe to call isspace(), tolower(), or isalnum() on
|
||||
** hi-bit-set characters. This is the same solution used in the
|
||||
** tokenizer.
|
||||
*/
|
||||
/* TODO(shess) The snippet-generation code should be using the
|
||||
** tokenizer-generated tokens rather than doing its own local
|
||||
** tokenization.
|
||||
*/
|
||||
/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
|
||||
static int safe_isspace(char c){
|
||||
return (c&0x80)==0 ? isspace(c) : 0;
|
||||
}
|
||||
static int safe_isalnum(char c){
|
||||
return (c&0x80)==0 ? isalnum(c) : 0;
|
||||
}
|
||||
|
||||
/*******************************************************************/
|
||||
/* DataBuffer is used to collect data into a buffer in piecemeal
|
||||
** fashion. It implements the usual distinction between amount of
|
||||
** data currently stored (nData) and buffer capacity (nCapacity).
|
||||
**
|
||||
** dataBufferInit - create a buffer with given initial capacity.
|
||||
** dataBufferReset - forget buffer's data, retaining capacity.
|
||||
** dataBufferSwap - swap contents of two buffers.
|
||||
** dataBufferExpand - expand capacity without adding data.
|
||||
** dataBufferAppend - append data.
|
||||
** dataBufferAppend2 - append two pieces of data at once.
|
||||
** dataBufferReplace - replace buffer's data.
|
||||
*/
|
||||
typedef struct DataBuffer {
|
||||
char *pData; /* Pointer to malloc'ed buffer. */
|
||||
int nCapacity; /* Size of pData buffer. */
|
||||
int nData; /* End of data loaded into pData. */
|
||||
} DataBuffer;
|
||||
|
||||
static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){
|
||||
assert( nCapacity>=0 );
|
||||
pBuffer->nData = 0;
|
||||
pBuffer->nCapacity = nCapacity;
|
||||
pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity);
|
||||
}
|
||||
static void dataBufferReset(DataBuffer *pBuffer){
|
||||
pBuffer->nData = 0;
|
||||
}
|
||||
static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){
|
||||
assert( nAddCapacity>0 );
|
||||
/* TODO(shess) Consider expanding more aggressively. Note that the
|
||||
** underlying malloc implementation may take care of such things for
|
||||
** us already.
|
||||
*/
|
||||
if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){
|
||||
pBuffer->nCapacity = pBuffer->nData+nAddCapacity;
|
||||
pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity);
|
||||
}
|
||||
}
|
||||
static void dataBufferAppend(DataBuffer *pBuffer,
|
||||
const char *pSource, int nSource){
|
||||
assert( nSource>0 && pSource!=NULL );
|
||||
dataBufferExpand(pBuffer, nSource);
|
||||
memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource);
|
||||
pBuffer->nData += nSource;
|
||||
}
|
||||
static void dataBufferAppend2(DataBuffer *pBuffer,
|
||||
const char *pSource1, int nSource1,
|
||||
const char *pSource2, int nSource2){
|
||||
assert( nSource1>0 && pSource1!=NULL );
|
||||
assert( nSource2>0 && pSource2!=NULL );
|
||||
dataBufferExpand(pBuffer, nSource1+nSource2);
|
||||
memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1);
|
||||
memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2);
|
||||
pBuffer->nData += nSource1+nSource2;
|
||||
}
|
||||
static void dataBufferReplace(DataBuffer *pBuffer,
|
||||
const char *pSource, int nSource){
|
||||
dataBufferReset(pBuffer);
|
||||
dataBufferAppend(pBuffer, pSource, nSource);
|
||||
}
|
||||
|
||||
|
||||
/* StringBuffer is a null-terminated version of DataBuffer. */
|
||||
typedef struct StringBuffer {
|
||||
DataBuffer b; /* Includes null terminator. */
|
||||
} StringBuffer;
|
||||
|
||||
static void initStringBuffer(StringBuffer *sb){
|
||||
dataBufferInit(&sb->b, 100);
|
||||
dataBufferReplace(&sb->b, "", 1);
|
||||
}
|
||||
static int stringBufferLength(StringBuffer *sb){
|
||||
return sb->b.nData-1;
|
||||
}
|
||||
static char *stringBufferData(StringBuffer *sb){
|
||||
return sb->b.pData;
|
||||
}
|
||||
|
||||
static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
|
||||
assert( sb->b.nData>0 );
|
||||
if( nFrom>0 ){
|
||||
sb->b.nData--;
|
||||
dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1);
|
||||
}
|
||||
}
|
||||
static void append(StringBuffer *sb, const char *zFrom){
|
||||
nappend(sb, zFrom, strlen(zFrom));
|
||||
}
|
||||
|
||||
static int endsInWhiteSpace(StringBuffer *p){
|
||||
return stringBufferLength(p)>0 &&
|
||||
safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]);
|
||||
}
|
||||
|
||||
/* If the StringBuffer ends in something other than white space, add a
|
||||
** single space character to the end.
|
||||
*/
|
||||
static void appendWhiteSpace(StringBuffer *p){
|
||||
if( stringBufferLength(p)==0 ) return;
|
||||
if( !endsInWhiteSpace(p) ) append(p, " ");
|
||||
}
|
||||
|
||||
/* Remove white space from the end of the StringBuffer */
|
||||
static void trimWhiteSpace(StringBuffer *p){
|
||||
while( endsInWhiteSpace(p) ){
|
||||
p->b.pData[--p->b.nData-1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Release all memory associated with the Snippet structure passed as
|
||||
** an argument.
|
||||
*/
|
||||
static void fts3SnippetFree(Snippet *p){
|
||||
sqlite3_free(p->aMatch);
|
||||
sqlite3_free(p->zOffset);
|
||||
sqlite3_free(p->zSnippet);
|
||||
sqlite3_free(p);
|
||||
}
|
||||
|
||||
/*
|
||||
** Append a single entry to the p->aMatch[] log.
|
||||
*/
|
||||
static void snippetAppendMatch(
|
||||
Snippet *p, /* Append the entry to this snippet */
|
||||
int iCol, int iTerm, /* The column and query term */
|
||||
int iToken, /* Matching token in document */
|
||||
int iStart, int nByte /* Offset and size of the match */
|
||||
){
|
||||
int i;
|
||||
struct snippetMatch *pMatch;
|
||||
if( p->nMatch+1>=p->nAlloc ){
|
||||
p->nAlloc = p->nAlloc*2 + 10;
|
||||
p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
|
||||
if( p->aMatch==0 ){
|
||||
p->nMatch = 0;
|
||||
p->nAlloc = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
i = p->nMatch++;
|
||||
pMatch = &p->aMatch[i];
|
||||
pMatch->iCol = iCol;
|
||||
pMatch->iTerm = iTerm;
|
||||
pMatch->iToken = iToken;
|
||||
pMatch->iStart = iStart;
|
||||
pMatch->nByte = nByte;
|
||||
}
|
||||
|
||||
/*
|
||||
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
|
||||
*/
|
||||
#define FTS3_ROTOR_SZ (32)
|
||||
#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)
|
||||
|
||||
/*
|
||||
** Function to iterate through the tokens of a compiled expression.
|
||||
**
|
||||
** Except, skip all tokens on the right-hand side of a NOT operator.
|
||||
** This function is used to find tokens as part of snippet and offset
|
||||
** generation and we do nt want snippets and offsets to report matches
|
||||
** for tokens on the RHS of a NOT.
|
||||
*/
|
||||
static int fts3NextExprToken(Fts3Expr **ppExpr, int *piToken){
|
||||
Fts3Expr *p = *ppExpr;
|
||||
int iToken = *piToken;
|
||||
if( iToken<0 ){
|
||||
/* In this case the expression p is the root of an expression tree.
|
||||
** Move to the first token in the expression tree.
|
||||
*/
|
||||
while( p->pLeft ){
|
||||
p = p->pLeft;
|
||||
}
|
||||
iToken = 0;
|
||||
}else{
|
||||
assert(p && p->eType==FTSQUERY_PHRASE );
|
||||
if( iToken<(p->pPhrase->nToken-1) ){
|
||||
iToken++;
|
||||
}else{
|
||||
iToken = 0;
|
||||
while( p->pParent && p->pParent->pLeft!=p ){
|
||||
assert( p->pParent->pRight==p );
|
||||
p = p->pParent;
|
||||
}
|
||||
p = p->pParent;
|
||||
if( p ){
|
||||
assert( p->pRight!=0 );
|
||||
p = p->pRight;
|
||||
while( p->pLeft ){
|
||||
p = p->pLeft;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*ppExpr = p;
|
||||
*piToken = iToken;
|
||||
return p?1:0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the expression node pExpr is located beneath the
|
||||
** RHS of a NOT operator.
|
||||
*/
|
||||
static int fts3ExprBeneathNot(Fts3Expr *p){
|
||||
Fts3Expr *pParent;
|
||||
while( p ){
|
||||
pParent = p->pParent;
|
||||
if( pParent && pParent->eType==FTSQUERY_NOT && pParent->pRight==p ){
|
||||
return 1;
|
||||
}
|
||||
p = pParent;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Add entries to pSnippet->aMatch[] for every match that occurs against
|
||||
** document zDoc[0..nDoc-1] which is stored in column iColumn.
|
||||
*/
|
||||
static void snippetOffsetsOfColumn(
|
||||
Fts3Cursor *pCur, /* The fulltest search cursor */
|
||||
Snippet *pSnippet, /* The Snippet object to be filled in */
|
||||
int iColumn, /* Index of fulltext table column */
|
||||
const char *zDoc, /* Text of the fulltext table column */
|
||||
int nDoc /* Length of zDoc in bytes */
|
||||
){
|
||||
const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */
|
||||
sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */
|
||||
sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */
|
||||
Fts3Table *pVtab; /* The full text index */
|
||||
int nColumn; /* Number of columns in the index */
|
||||
int i, j; /* Loop counters */
|
||||
int rc; /* Return code */
|
||||
unsigned int match, prevMatch; /* Phrase search bitmasks */
|
||||
const char *zToken; /* Next token from the tokenizer */
|
||||
int nToken; /* Size of zToken */
|
||||
int iBegin, iEnd, iPos; /* Offsets of beginning and end */
|
||||
|
||||
/* The following variables keep a circular buffer of the last
|
||||
** few tokens */
|
||||
unsigned int iRotor = 0; /* Index of current token */
|
||||
int iRotorBegin[FTS3_ROTOR_SZ]; /* Beginning offset of token */
|
||||
int iRotorLen[FTS3_ROTOR_SZ]; /* Length of token */
|
||||
|
||||
pVtab = (Fts3Table *)pCur->base.pVtab;
|
||||
nColumn = pVtab->nColumn;
|
||||
pTokenizer = pVtab->pTokenizer;
|
||||
pTModule = pTokenizer->pModule;
|
||||
rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
|
||||
if( rc ) return;
|
||||
pTCursor->pTokenizer = pTokenizer;
|
||||
|
||||
prevMatch = 0;
|
||||
while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){
|
||||
Fts3Expr *pIter = pCur->pExpr;
|
||||
int iIter = -1;
|
||||
iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
|
||||
iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
|
||||
match = 0;
|
||||
for(i=0; i<(FTS3_ROTOR_SZ-1) && fts3NextExprToken(&pIter, &iIter); i++){
|
||||
int nPhrase; /* Number of tokens in current phrase */
|
||||
struct PhraseToken *pToken; /* Current token */
|
||||
int iCol; /* Column index */
|
||||
|
||||
if( fts3ExprBeneathNot(pIter) ) continue;
|
||||
nPhrase = pIter->pPhrase->nToken;
|
||||
pToken = &pIter->pPhrase->aToken[iIter];
|
||||
iCol = pIter->pPhrase->iColumn;
|
||||
if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
|
||||
if( pToken->n>nToken ) continue;
|
||||
if( !pToken->isPrefix && pToken->n<nToken ) continue;
|
||||
assert( pToken->n<=nToken );
|
||||
if( memcmp(pToken->z, zToken, pToken->n) ) continue;
|
||||
if( iIter>0 && (prevMatch & (1<<i))==0 ) continue;
|
||||
match |= 1<<i;
|
||||
if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){
|
||||
for(j=nPhrase-1; j>=0; j--){
|
||||
int k = (iRotor-j) & FTS3_ROTOR_MASK;
|
||||
snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
|
||||
iRotorBegin[k], iRotorLen[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
prevMatch = match<<1;
|
||||
iRotor++;
|
||||
}
|
||||
pTModule->xClose(pTCursor);
|
||||
}
|
||||
|
||||
/*
|
||||
** Remove entries from the pSnippet structure to account for the NEAR
|
||||
** operator. When this is called, pSnippet contains the list of token
|
||||
** offsets produced by treating all NEAR operators as AND operators.
|
||||
** This function removes any entries that should not be present after
|
||||
** accounting for the NEAR restriction. For example, if the queried
|
||||
** document is:
|
||||
**
|
||||
** "A B C D E A"
|
||||
**
|
||||
** and the query is:
|
||||
**
|
||||
** A NEAR/0 E
|
||||
**
|
||||
** then when this function is called the Snippet contains token offsets
|
||||
** 0, 4 and 5. This function removes the "0" entry (because the first A
|
||||
** is not near enough to an E).
|
||||
**
|
||||
** When this function is called, the value pointed to by parameter piLeft is
|
||||
** the integer id of the left-most token in the expression tree headed by
|
||||
** pExpr. This function increments *piLeft by the total number of tokens
|
||||
** in the expression tree headed by pExpr.
|
||||
**
|
||||
** Return 1 if any trimming occurs. Return 0 if no trimming is required.
|
||||
*/
|
||||
static int trimSnippetOffsets(
|
||||
Fts3Expr *pExpr, /* The search expression */
|
||||
Snippet *pSnippet, /* The set of snippet offsets to be trimmed */
|
||||
int *piLeft /* Index of left-most token in pExpr */
|
||||
){
|
||||
if( pExpr ){
|
||||
if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
|
||||
return 1;
|
||||
}
|
||||
|
||||
switch( pExpr->eType ){
|
||||
case FTSQUERY_PHRASE:
|
||||
*piLeft += pExpr->pPhrase->nToken;
|
||||
break;
|
||||
case FTSQUERY_NEAR: {
|
||||
/* The right-hand-side of a NEAR operator is always a phrase. The
|
||||
** left-hand-side is either a phrase or an expression tree that is
|
||||
** itself headed by a NEAR operator. The following initializations
|
||||
** set local variable iLeft to the token number of the left-most
|
||||
** token in the right-hand phrase, and iRight to the right most
|
||||
** token in the same phrase. For example, if we had:
|
||||
**
|
||||
** <col> MATCH '"abc def" NEAR/2 "ghi jkl"'
|
||||
**
|
||||
** then iLeft will be set to 2 (token number of ghi) and nToken will
|
||||
** be set to 4.
|
||||
*/
|
||||
Fts3Expr *pLeft = pExpr->pLeft;
|
||||
Fts3Expr *pRight = pExpr->pRight;
|
||||
int iLeft = *piLeft;
|
||||
int nNear = pExpr->nNear;
|
||||
int nToken = pRight->pPhrase->nToken;
|
||||
int jj, ii;
|
||||
if( pLeft->eType==FTSQUERY_NEAR ){
|
||||
pLeft = pLeft->pRight;
|
||||
}
|
||||
assert( pRight->eType==FTSQUERY_PHRASE );
|
||||
assert( pLeft->eType==FTSQUERY_PHRASE );
|
||||
nToken += pLeft->pPhrase->nToken;
|
||||
|
||||
for(ii=0; ii<pSnippet->nMatch; ii++){
|
||||
struct snippetMatch *p = &pSnippet->aMatch[ii];
|
||||
if( p->iTerm==iLeft ){
|
||||
int isOk = 0;
|
||||
/* Snippet ii is an occurence of query term iLeft in the document.
|
||||
** It occurs at position (p->iToken) of the document. We now
|
||||
** search for an instance of token (iLeft-1) somewhere in the
|
||||
** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within
|
||||
** the set of snippetMatch structures. If one is found, proceed.
|
||||
** If one cannot be found, then remove snippets ii..(ii+N-1)
|
||||
** from the matching snippets, where N is the number of tokens
|
||||
** in phrase pRight->pPhrase.
|
||||
*/
|
||||
for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
|
||||
struct snippetMatch *p2 = &pSnippet->aMatch[jj];
|
||||
if( p2->iTerm==(iLeft-1) ){
|
||||
if( p2->iToken>=(p->iToken-nNear-1)
|
||||
&& p2->iToken<(p->iToken+nNear+nToken)
|
||||
){
|
||||
isOk = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if( !isOk ){
|
||||
int kk;
|
||||
for(kk=0; kk<pRight->pPhrase->nToken; kk++){
|
||||
pSnippet->aMatch[kk+ii].iTerm = -2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if( p->iTerm==(iLeft-1) ){
|
||||
int isOk = 0;
|
||||
for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
|
||||
struct snippetMatch *p2 = &pSnippet->aMatch[jj];
|
||||
if( p2->iTerm==iLeft ){
|
||||
if( p2->iToken<=(p->iToken+nNear+1)
|
||||
&& p2->iToken>(p->iToken-nNear-nToken)
|
||||
){
|
||||
isOk = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if( !isOk ){
|
||||
int kk;
|
||||
for(kk=0; kk<pLeft->pPhrase->nToken; kk++){
|
||||
pSnippet->aMatch[ii-kk].iTerm = -2;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Compute all offsets for the current row of the query.
|
||||
** If the offsets have already been computed, this routine is a no-op.
|
||||
*/
|
||||
static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){
|
||||
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
|
||||
int nColumn;
|
||||
int iColumn, i;
|
||||
int iFirst, iLast;
|
||||
int iTerm = 0;
|
||||
Snippet *pSnippet;
|
||||
|
||||
if( pCsr->pExpr==0 ){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
pSnippet = (Snippet *)sqlite3_malloc(sizeof(Snippet));
|
||||
*ppSnippet = pSnippet;
|
||||
if( !pSnippet ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pSnippet, 0, sizeof(Snippet));
|
||||
|
||||
nColumn = p->nColumn;
|
||||
iColumn = (pCsr->eType - 2);
|
||||
if( iColumn<0 || iColumn>=nColumn ){
|
||||
/* Look for matches over all columns of the full-text index */
|
||||
iFirst = 0;
|
||||
iLast = nColumn-1;
|
||||
}else{
|
||||
/* Look for matches in the iColumn-th column of the index only */
|
||||
iFirst = iColumn;
|
||||
iLast = iColumn;
|
||||
}
|
||||
for(i=iFirst; i<=iLast; i++){
|
||||
const char *zDoc;
|
||||
int nDoc;
|
||||
zDoc = (const char*)sqlite3_column_text(pCsr->pStmt, i+1);
|
||||
nDoc = sqlite3_column_bytes(pCsr->pStmt, i+1);
|
||||
snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc);
|
||||
}
|
||||
|
||||
while( trimSnippetOffsets(pCsr->pExpr, pSnippet, &iTerm) ){
|
||||
iTerm = 0;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Convert the information in the aMatch[] array of the snippet
|
||||
** into the string zOffset[0..nOffset-1]. This string is used as
|
||||
** the return of the SQL offsets() function.
|
||||
*/
|
||||
static void snippetOffsetText(Snippet *p){
|
||||
int i;
|
||||
int cnt = 0;
|
||||
StringBuffer sb;
|
||||
char zBuf[200];
|
||||
if( p->zOffset ) return;
|
||||
initStringBuffer(&sb);
|
||||
for(i=0; i<p->nMatch; i++){
|
||||
struct snippetMatch *pMatch = &p->aMatch[i];
|
||||
if( pMatch->iTerm>=0 ){
|
||||
/* If snippetMatch.iTerm is less than 0, then the match was
|
||||
** discarded as part of processing the NEAR operator (see the
|
||||
** trimSnippetOffsetsForNear() function for details). Ignore
|
||||
** it in this case
|
||||
*/
|
||||
zBuf[0] = ' ';
|
||||
sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
|
||||
pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
|
||||
append(&sb, zBuf);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
p->zOffset = stringBufferData(&sb);
|
||||
p->nOffset = stringBufferLength(&sb);
|
||||
}
|
||||
|
||||
/*
|
||||
** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set
|
||||
** of matching words some of which might be in zDoc. zDoc is column
|
||||
** number iCol.
|
||||
**
|
||||
** iBreak is suggested spot in zDoc where we could begin or end an
|
||||
** excerpt. Return a value similar to iBreak but possibly adjusted
|
||||
** to be a little left or right so that the break point is better.
|
||||
*/
|
||||
static int wordBoundary(
|
||||
int iBreak, /* The suggested break point */
|
||||
const char *zDoc, /* Document text */
|
||||
int nDoc, /* Number of bytes in zDoc[] */
|
||||
struct snippetMatch *aMatch, /* Matching words */
|
||||
int nMatch, /* Number of entries in aMatch[] */
|
||||
int iCol /* The column number for zDoc[] */
|
||||
){
|
||||
int i;
|
||||
if( iBreak<=10 ){
|
||||
return 0;
|
||||
}
|
||||
if( iBreak>=nDoc-10 ){
|
||||
return nDoc;
|
||||
}
|
||||
for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
|
||||
while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
|
||||
if( i<nMatch ){
|
||||
if( aMatch[i].iStart<iBreak+10 ){
|
||||
return aMatch[i].iStart;
|
||||
}
|
||||
if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
|
||||
return aMatch[i-1].iStart;
|
||||
}
|
||||
}
|
||||
for(i=1; i<=10; i++){
|
||||
if( safe_isspace(zDoc[iBreak-i]) ){
|
||||
return iBreak - i + 1;
|
||||
}
|
||||
if( safe_isspace(zDoc[iBreak+i]) ){
|
||||
return iBreak + i + 1;
|
||||
}
|
||||
}
|
||||
return iBreak;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** Allowed values for Snippet.aMatch[].snStatus
|
||||
*/
|
||||
#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */
|
||||
#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */
|
||||
|
||||
/*
|
||||
** Generate the text of a snippet.
|
||||
*/
|
||||
static void snippetText(
|
||||
Fts3Cursor *pCursor, /* The cursor we need the snippet for */
|
||||
Snippet *pSnippet,
|
||||
const char *zStartMark, /* Markup to appear before each match */
|
||||
const char *zEndMark, /* Markup to appear after each match */
|
||||
const char *zEllipsis /* Ellipsis mark */
|
||||
){
|
||||
int i, j;
|
||||
struct snippetMatch *aMatch;
|
||||
int nMatch;
|
||||
int nDesired;
|
||||
StringBuffer sb;
|
||||
int tailCol;
|
||||
int tailOffset;
|
||||
int iCol;
|
||||
int nDoc;
|
||||
const char *zDoc;
|
||||
int iStart, iEnd;
|
||||
int tailEllipsis = 0;
|
||||
int iMatch;
|
||||
|
||||
|
||||
sqlite3_free(pSnippet->zSnippet);
|
||||
pSnippet->zSnippet = 0;
|
||||
aMatch = pSnippet->aMatch;
|
||||
nMatch = pSnippet->nMatch;
|
||||
initStringBuffer(&sb);
|
||||
|
||||
for(i=0; i<nMatch; i++){
|
||||
aMatch[i].snStatus = SNIPPET_IGNORE;
|
||||
}
|
||||
nDesired = 0;
|
||||
for(i=0; i<FTS3_ROTOR_SZ; i++){
|
||||
for(j=0; j<nMatch; j++){
|
||||
if( aMatch[j].iTerm==i ){
|
||||
aMatch[j].snStatus = SNIPPET_DESIRED;
|
||||
nDesired++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
iMatch = 0;
|
||||
tailCol = -1;
|
||||
tailOffset = 0;
|
||||
for(i=0; i<nMatch && nDesired>0; i++){
|
||||
if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
|
||||
nDesired--;
|
||||
iCol = aMatch[i].iCol;
|
||||
zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
|
||||
nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
|
||||
iStart = aMatch[i].iStart - 40;
|
||||
iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
|
||||
if( iStart<=10 ){
|
||||
iStart = 0;
|
||||
}
|
||||
if( iCol==tailCol && iStart<=tailOffset+20 ){
|
||||
iStart = tailOffset;
|
||||
}
|
||||
if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
|
||||
trimWhiteSpace(&sb);
|
||||
appendWhiteSpace(&sb);
|
||||
append(&sb, zEllipsis);
|
||||
appendWhiteSpace(&sb);
|
||||
}
|
||||
iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
|
||||
iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
|
||||
if( iEnd>=nDoc-10 ){
|
||||
iEnd = nDoc;
|
||||
tailEllipsis = 0;
|
||||
}else{
|
||||
tailEllipsis = 1;
|
||||
}
|
||||
while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
|
||||
while( iStart<iEnd ){
|
||||
while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
|
||||
&& aMatch[iMatch].iCol<=iCol ){
|
||||
iMatch++;
|
||||
}
|
||||
if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
|
||||
&& aMatch[iMatch].iCol==iCol ){
|
||||
nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
|
||||
iStart = aMatch[iMatch].iStart;
|
||||
append(&sb, zStartMark);
|
||||
nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
|
||||
append(&sb, zEndMark);
|
||||
iStart += aMatch[iMatch].nByte;
|
||||
for(j=iMatch+1; j<nMatch; j++){
|
||||
if( aMatch[j].iTerm==aMatch[iMatch].iTerm
|
||||
&& aMatch[j].snStatus==SNIPPET_DESIRED ){
|
||||
nDesired--;
|
||||
aMatch[j].snStatus = SNIPPET_IGNORE;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
nappend(&sb, &zDoc[iStart], iEnd - iStart);
|
||||
iStart = iEnd;
|
||||
}
|
||||
}
|
||||
tailCol = iCol;
|
||||
tailOffset = iEnd;
|
||||
}
|
||||
trimWhiteSpace(&sb);
|
||||
if( tailEllipsis ){
|
||||
appendWhiteSpace(&sb);
|
||||
append(&sb, zEllipsis);
|
||||
}
|
||||
pSnippet->zSnippet = stringBufferData(&sb);
|
||||
pSnippet->nSnippet = stringBufferLength(&sb);
|
||||
}
|
||||
|
||||
void sqlite3Fts3Offsets(
|
||||
sqlite3_context *pCtx, /* SQLite function call context */
|
||||
Fts3Cursor *pCsr /* Cursor object */
|
||||
){
|
||||
Snippet *p; /* Snippet structure */
|
||||
int rc = snippetAllOffsets(pCsr, &p);
|
||||
snippetOffsetText(p);
|
||||
sqlite3_result_text(pCtx, p->zOffset, p->nOffset, SQLITE_TRANSIENT);
|
||||
fts3SnippetFree(p);
|
||||
}
|
||||
|
||||
void sqlite3Fts3Snippet(
|
||||
sqlite3_context *pCtx, /* SQLite function call context */
|
||||
Fts3Cursor *pCsr, /* Cursor object */
|
||||
const char *zStart, /* Snippet start text - "<b>" */
|
||||
const char *zEnd, /* Snippet end text - "</b>" */
|
||||
const char *zEllipsis /* Snippet ellipsis text - "<b>...</b>" */
|
||||
){
|
||||
Snippet *p; /* Snippet structure */
|
||||
int rc = snippetAllOffsets(pCsr, &p);
|
||||
snippetText(pCsr, p, zStart, zEnd, zEllipsis);
|
||||
sqlite3_result_text(pCtx, p->zSnippet, p->nSnippet, SQLITE_TRANSIENT);
|
||||
fts3SnippetFree(p);
|
||||
}
|
||||
|
@ -30,9 +30,14 @@
|
||||
SQLITE_EXTENSION_INIT1
|
||||
#endif
|
||||
|
||||
#include "fts3_hash.h"
|
||||
#include "fts3_tokenizer.h"
|
||||
#include "fts3Int.h"
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
static int safe_isspace(char c){
|
||||
return (c&0x80)==0 ? isspace(c) : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of the SQL scalar function for accessing the underlying
|
||||
@ -59,14 +64,14 @@ static void scalarFunc(
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
fts3Hash *pHash;
|
||||
Fts3Hash *pHash;
|
||||
void *pPtr = 0;
|
||||
const unsigned char *zName;
|
||||
int nName;
|
||||
|
||||
assert( argc==1 || argc==2 );
|
||||
|
||||
pHash = (fts3Hash *)sqlite3_user_data(context);
|
||||
pHash = (Fts3Hash *)sqlite3_user_data(context);
|
||||
|
||||
zName = sqlite3_value_text(argv[0]);
|
||||
nName = sqlite3_value_bytes(argv[0])+1;
|
||||
@ -97,6 +102,128 @@ static void scalarFunc(
|
||||
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
|
||||
}
|
||||
|
||||
static int fts3IsIdChar(char c){
|
||||
static const char isFtsIdChar[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
return (c&0x80 || isFtsIdChar[(int)(c)]);
|
||||
}
|
||||
|
||||
const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
|
||||
const char *z1;
|
||||
const char *z2 = 0;
|
||||
|
||||
/* Find the start of the next token. */
|
||||
z1 = zStr;
|
||||
while( z2==0 ){
|
||||
switch( *z1 ){
|
||||
case '\0': return 0; /* No more tokens here */
|
||||
case '\'':
|
||||
case '"':
|
||||
case '`': {
|
||||
z2 = &z1[1];
|
||||
while( *z2 && (z2[0]!=*z1 || z2[1]==*z1) ) z2++;
|
||||
if( *z2 ) z2++;
|
||||
break;
|
||||
}
|
||||
case '[':
|
||||
z2 = &z1[1];
|
||||
while( *z2 && z2[0]!=']' ) z2++;
|
||||
if( *z2 ) z2++;
|
||||
break;
|
||||
|
||||
default:
|
||||
if( fts3IsIdChar(*z1) ){
|
||||
z2 = &z1[1];
|
||||
while( fts3IsIdChar(*z2) ) z2++;
|
||||
}else{
|
||||
z1++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*pn = (z2-z1);
|
||||
return z1;
|
||||
}
|
||||
|
||||
int sqlite3Fts3InitTokenizer(
|
||||
Fts3Hash *pHash, /* Tokenizer hash table */
|
||||
const char *zArg, /* Possible tokenizer specification */
|
||||
sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */
|
||||
const char **pzTokenizer, /* OUT: Set to zArg if is tokenizer */
|
||||
char **pzErr /* OUT: Set to malloced error message */
|
||||
){
|
||||
int rc;
|
||||
char *z = (char *)zArg;
|
||||
int n;
|
||||
char *zCopy;
|
||||
char *zEnd; /* Pointer to nul-term of zCopy */
|
||||
sqlite3_tokenizer_module *m;
|
||||
|
||||
if( !z ){
|
||||
zCopy = sqlite3_mprintf("simple");
|
||||
}else{
|
||||
while( safe_isspace(*z) ) z++;
|
||||
if( sqlite3_strnicmp(z, "tokenize", 8) || fts3IsIdChar(z[8])){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
zCopy = sqlite3_mprintf("%s", &z[8]);
|
||||
*pzTokenizer = zArg;
|
||||
}
|
||||
if( !zCopy ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
|
||||
zEnd = &zCopy[strlen(zCopy)];
|
||||
|
||||
z = (char *)sqlite3Fts3NextToken(zCopy, &n);
|
||||
z[n] = '\0';
|
||||
sqlite3Fts3Dequote(z);
|
||||
|
||||
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, z, strlen(z)+1);
|
||||
if( !m ){
|
||||
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
|
||||
rc = SQLITE_ERROR;
|
||||
}else{
|
||||
char const **aArg = 0;
|
||||
int iArg = 0;
|
||||
z = &z[n+1];
|
||||
while( z<zEnd && (z = (char *)sqlite3Fts3NextToken(z, &n)) ){
|
||||
int nNew = sizeof(char *)*(iArg+1);
|
||||
char const **aNew = (const char **)sqlite3_realloc(aArg, nNew);
|
||||
if( !aNew ){
|
||||
sqlite3_free(zCopy);
|
||||
sqlite3_free(aArg);
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
aArg = aNew;
|
||||
aArg[iArg++] = z;
|
||||
z[n] = '\0';
|
||||
sqlite3Fts3Dequote(z);
|
||||
z = &z[n+1];
|
||||
}
|
||||
rc = m->xCreate(iArg, aArg, ppTok);
|
||||
assert( rc!=SQLITE_OK || *ppTok );
|
||||
if( rc!=SQLITE_OK ){
|
||||
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
|
||||
}else{
|
||||
(*ppTok)->pModule = m;
|
||||
}
|
||||
sqlite3_free(aArg);
|
||||
}
|
||||
|
||||
sqlite3_free(zCopy);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#include <tcl.h>
|
||||
@ -133,7 +260,7 @@ static void testFunc(
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
fts3Hash *pHash;
|
||||
Fts3Hash *pHash;
|
||||
sqlite3_tokenizer_module *p;
|
||||
sqlite3_tokenizer *pTokenizer = 0;
|
||||
sqlite3_tokenizer_cursor *pCsr = 0;
|
||||
@ -166,7 +293,7 @@ static void testFunc(
|
||||
zArg = (const char *)sqlite3_value_text(argv[1]);
|
||||
}
|
||||
|
||||
pHash = (fts3Hash *)sqlite3_user_data(context);
|
||||
pHash = (Fts3Hash *)sqlite3_user_data(context);
|
||||
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
|
||||
|
||||
if( !p ){
|
||||
@ -335,7 +462,7 @@ static void intTestFunc(
|
||||
*/
|
||||
int sqlite3Fts3InitHashTable(
|
||||
sqlite3 *db,
|
||||
fts3Hash *pHash,
|
||||
Fts3Hash *pHash,
|
||||
const char *zName
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
|
1802
ext/fts3/fts3_write.c
Normal file
1802
ext/fts3/fts3_write.c
Normal file
File diff suppressed because it is too large
Load Diff
14
main.mk
14
main.mk
@ -54,7 +54,7 @@ LIBOBJ+= alter.o analyze.o attach.o auth.o \
|
||||
backup.o bitvec.o btmutex.o btree.o build.o \
|
||||
callback.o complete.o date.o delete.o expr.o fault.o fkey.o \
|
||||
fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
|
||||
fts3_tokenizer.o fts3_tokenizer1.o \
|
||||
fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o fts3_write.o \
|
||||
func.o global.o hash.o \
|
||||
icu.o insert.o journal.o legacy.o loadext.o \
|
||||
main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
|
||||
@ -183,14 +183,14 @@ SRC += \
|
||||
$(TOP)/ext/fts3/fts3.c \
|
||||
$(TOP)/ext/fts3/fts3.h \
|
||||
$(TOP)/ext/fts3/fts3_expr.c \
|
||||
$(TOP)/ext/fts3/fts3_expr.h \
|
||||
$(TOP)/ext/fts3/fts3_hash.c \
|
||||
$(TOP)/ext/fts3/fts3_hash.h \
|
||||
$(TOP)/ext/fts3/fts3_icu.c \
|
||||
$(TOP)/ext/fts3/fts3_porter.c \
|
||||
$(TOP)/ext/fts3/fts3_tokenizer.h \
|
||||
$(TOP)/ext/fts3/fts3_tokenizer.c \
|
||||
$(TOP)/ext/fts3/fts3_tokenizer1.c
|
||||
$(TOP)/ext/fts3/fts3_tokenizer1.c \
|
||||
$(TOP)/ext/fts3/fts3_write.c
|
||||
SRC += \
|
||||
$(TOP)/ext/icu/sqliteicu.h \
|
||||
$(TOP)/ext/icu/icu.c
|
||||
@ -296,7 +296,7 @@ EXTHDR += \
|
||||
$(TOP)/ext/fts2/fts2_tokenizer.h
|
||||
EXTHDR += \
|
||||
$(TOP)/ext/fts3/fts3.h \
|
||||
$(TOP)/ext/fts3/fts3_expr.h \
|
||||
$(TOP)/ext/fts3/fts3Int.h \
|
||||
$(TOP)/ext/fts3/fts3_hash.h \
|
||||
$(TOP)/ext/fts3/fts3_tokenizer.h
|
||||
EXTHDR += \
|
||||
@ -435,6 +435,9 @@ fts3_hash.o: $(TOP)/ext/fts3/fts3_hash.c $(HDR) $(EXTHDR)
|
||||
fts3_icu.o: $(TOP)/ext/fts3/fts3_icu.c $(HDR) $(EXTHDR)
|
||||
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_icu.c
|
||||
|
||||
fts3_snippet.o: $(TOP)/ext/fts3/fts3_snippet.c $(HDR) $(EXTHDR)
|
||||
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_snippet.c
|
||||
|
||||
fts3_porter.o: $(TOP)/ext/fts3/fts3_porter.c $(HDR) $(EXTHDR)
|
||||
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_porter.c
|
||||
|
||||
@ -444,6 +447,9 @@ fts3_tokenizer.o: $(TOP)/ext/fts3/fts3_tokenizer.c $(HDR) $(EXTHDR)
|
||||
fts3_tokenizer1.o: $(TOP)/ext/fts3/fts3_tokenizer1.c $(HDR) $(EXTHDR)
|
||||
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer1.c
|
||||
|
||||
fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
|
||||
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c
|
||||
|
||||
rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
|
||||
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c
|
||||
|
||||
|
61
manifest
61
manifest
@ -1,8 +1,5 @@
|
||||
-----BEGIN PGP SIGNED MESSAGE-----
|
||||
Hash: SHA1
|
||||
|
||||
C Force\s8-byte\smemory\salignment\son\smemory\sallocated\sfor\sVDBE\scursors.
|
||||
D 2009-11-13T17:05:54
|
||||
C Start\sreworking\sfts3\scode\sto\smatch\sthe\srest\sof\sSQLite\s(code\sconventions,\smalloc-failure\shandling\setc.).
|
||||
D 2009-11-13T10:36:21
|
||||
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
|
||||
F Makefile.in 53f3dfa49f28ab5b80cb083fb7c9051e596bcfa1
|
||||
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
|
||||
@ -59,17 +56,19 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
|
||||
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
|
||||
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
|
||||
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
|
||||
F ext/fts3/fts3.c 35bfa67d9cd659b799b8498895fe60b1e8bd3500
|
||||
F ext/fts3/fts3.c 835061e6c5324f80f13396418f9294b4691ac813
|
||||
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
|
||||
F ext/fts3/fts3_expr.c 0bfdae44d0d8ea2cb3ccad32bb6d6843d78d1a2d
|
||||
F ext/fts3/fts3_expr.h b5412dcf565c6d90d6a8c22090ceb9ed8c745634
|
||||
F ext/fts3/fts3_hash.c e15e84d18f8df149ab290029872d4559c4c7c15a
|
||||
F ext/fts3/fts3_hash.h 004b759e1602ff16dfa02fea3ca1c77336ad6798
|
||||
F ext/fts3/fts3Int.h a6aa1a4ad280adf6487fbccacbbe986a2fabcb82
|
||||
F ext/fts3/fts3_expr.c bdf11f3602f62f36f0e42823680bf22033dae0de
|
||||
F ext/fts3/fts3_hash.c 1af1833a4d581ee8d668bb71f5a500f7a0104982
|
||||
F ext/fts3/fts3_hash.h 39524725425078bf9e814e9569c74a8e5a21b9fb
|
||||
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
|
||||
F ext/fts3/fts3_porter.c 3063da945fb0a935781c135f7575f39166173eca
|
||||
F ext/fts3/fts3_tokenizer.c fcc8fdb5c161df7d61c77285ec2991da131f0181
|
||||
F ext/fts3/fts3_snippet.c 8ea9619247ac61c79aca650fc3307b8b4097b5f3
|
||||
F ext/fts3/fts3_tokenizer.c 185a212670a9bbdeb5cad6942305e681bce5c87b
|
||||
F ext/fts3/fts3_tokenizer.h 7ff73caa3327589bf6550f60d93ebdd1f6a0fb5c
|
||||
F ext/fts3/fts3_tokenizer1.c 0a5bcc579f35de5d24a9345d7908dc25ae403ee7
|
||||
F ext/fts3/fts3_write.c 4285a2804ef308ed2eef946dae20d9d0361554d0
|
||||
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
|
||||
F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33
|
||||
F ext/icu/icu.c 12e763d288d23b5a49de37caa30737b971a2f1e2
|
||||
@ -89,7 +88,7 @@ F ext/rtree/tkt3363.test 2bf324f7908084a5f463de3109db9c6e607feb1b
|
||||
F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024
|
||||
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
|
||||
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
|
||||
F main.mk 0320def78eed84285c273f3d84dd20d8f26a0139
|
||||
F main.mk 5b9fc534b96fe16b6bb57883bb0e4e28cc902df6
|
||||
F mkdll.sh 7d09b23c05d56532e9d44a50868eb4b12ff4f74a
|
||||
F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f
|
||||
F mkextw.sh 4123480947681d9b434a5e7b1ee08135abe409ac
|
||||
@ -188,7 +187,7 @@ F src/test_btree.c 47cd771250f09cdc6e12dda5bc71bc0b3abc96e2
|
||||
F src/test_config.c 220a67047af393756f55760fdf442d935d0d88f3
|
||||
F src/test_devsym.c de3c9af2bb9a8b1e44525c449e4ec3f88e3d4110
|
||||
F src/test_func.c 1c94388a23d4a9e7cd62ec79d612d1bae2451fa2
|
||||
F src/test_hexio.c 1c0f4238c6fb36c890ce7c07d9c8e1cecedad9ad
|
||||
F src/test_hexio.c 160dba2ad21d164cade8fd6a12db0926ca574ac8
|
||||
F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c
|
||||
F src/test_intarray.c 25b3df15cca9ddb83927e002f4885d98a63bba0d
|
||||
F src/test_intarray.h 489edb9068bb926583445cb02589344961054207
|
||||
@ -375,29 +374,30 @@ F test/fts2p.test 4b48c35c91e6a7dbf5ac8d1e5691823cc999aafb
|
||||
F test/fts2q.test b2fbbe038b7a31a52a6079b215e71226d8c6a682
|
||||
F test/fts2r.test b154c30b63061d8725e320fba1a39e2201cadd5e
|
||||
F test/fts2token.test d8070b241a15ff13592a9ae4a8b7c171af6f445a
|
||||
F test/fts3.test efb41507c90f47e8af2a9101d7460cddeb84656b
|
||||
F test/fts3aa.test 432d1d5c41939bb5405d4d6c80a9ec759b363393
|
||||
F test/fts3ab.test 7f6cf260ae80dda064023df8e8e503e9a412b91f
|
||||
F test/fts3.test f4f380d3717493605270dfa3b0fa893ea0afb18d
|
||||
F test/fts3aa.test 5327d4c1d9b6c61021696746cc9a6cdc5bf159c0
|
||||
F test/fts3ab.test 09aeaa162aee6513d9ff336b6932211008b9d1f9
|
||||
F test/fts3ac.test 356280144a2c92aa7b11474afadfe62a437fcd69
|
||||
F test/fts3ad.test 32a114c6f214081f244f642bde9fd5517938788e
|
||||
F test/fts3ae.test 31d8137fc7c14b5b991e3c4fa041ad2ac1255c7b
|
||||
F test/fts3ae.test ce32a13b34b0260928e4213b4481acf801533bda
|
||||
F test/fts3af.test d394978c534eabf22dd0837e718b913fd66b499c
|
||||
F test/fts3ag.test 1c316bedb40a7c962e38998df854ea3ae26a3daa
|
||||
F test/fts3ag.test 38d9c7dd4b607929498e8e0b32299af5665da1ab
|
||||
F test/fts3ah.test ba181d6a3dee0c929f0d69df67cac9c47cda6bff
|
||||
F test/fts3ai.test d29cee6ed653e30de478066881cec8aa766531b2
|
||||
F test/fts3aj.test 584facbc9ac4381a7ec624bfde677340ffc2a5a4
|
||||
F test/fts3ak.test bd14deafe9d1586e8e9bf032411026ac4f8c925d
|
||||
F test/fts3al.test 6d19619402d2133773262652fc3f185cdf6be667
|
||||
F test/fts3am.test 218aa6ba0dfc50c7c16b2022aac5c6be593d08d8
|
||||
F test/fts3an.test 4b4fdab5abe2f308bdc47f6e822df2bcae30361c
|
||||
F test/fts3an.test 931fa21bd80641ca594bfa32e105250a8a07918b
|
||||
F test/fts3ao.test 0aa29dd4fc1c8d46b1f7cfe5926f7ac97551bea9
|
||||
F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa
|
||||
F test/fts3b.test b3a25180a633873d37d86e1ccd00ed690d37237a
|
||||
F test/fts3c.test 4c7ef29b37aca3e8ebb6a39b57910caa6506034e
|
||||
F test/fts3d.test d92a47fe8ed59c9e53d2d8e6d2685bb380aadadc
|
||||
F test/fts3c.test fc723a9cf10b397fdfc2b32e73c53c8b1ec02958
|
||||
F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52
|
||||
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
|
||||
F test/fts3expr.test 05dab77387801e4900009917bb18f556037d82da
|
||||
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
|
||||
F test/fts3malloc.test 92dbea5665b6f333dd32886366481aa95ffaeb50
|
||||
F test/fts3near.test dc196dd17b4606f440c580d45b3d23aa975fd077
|
||||
F test/func.test af106ed834001738246d276659406823e35cde7b
|
||||
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
|
||||
@ -771,14 +771,11 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
||||
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
|
||||
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
||||
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
||||
P f0599d28fabe9e67a7150a91c266cb7655a2002e
|
||||
R b787bcf50272d59401284264a4dcbb30
|
||||
U drh
|
||||
Z 255150de2174a2682813e546e8d990c7
|
||||
-----BEGIN PGP SIGNATURE-----
|
||||
Version: GnuPG v1.4.6 (GNU/Linux)
|
||||
|
||||
iD8DBQFK/ZH1oxKgR168RlERAuF9AJ9hHIIG7PFtXPqnbvCge9luG/0VIACfc7kV
|
||||
v+sk467/hW51kXF6lY7carY=
|
||||
=mwCb
|
||||
-----END PGP SIGNATURE-----
|
||||
P bdc45ba77fb77771c8ff46b8d6c2dd29e6d3b019
|
||||
R a0400ee87fd3b17fac8e469e29fd58ca
|
||||
T *bgcolor * #f3f4f6
|
||||
T *branch * fts3-refactor
|
||||
T *sym-fts3-refactor *
|
||||
T -sym-trunk *
|
||||
U dan
|
||||
Z d56027263e4b0769a9172f5a73a4a788
|
||||
|
@ -1 +1 @@
|
||||
bdc45ba77fb77771c8ff46b8d6c2dd29e6d3b019
|
||||
30a92f1132801c7582007ee625c577ea2ac31cdf
|
@ -316,6 +316,35 @@ static int utf8_to_utf8(
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** USAGE: read_varint BLOB VARNAME
|
||||
**
|
||||
** Read a varint from the start of BLOB. Set variable VARNAME to contain
|
||||
** the interpreted value. Return the number of bytes of BLOB consumed.
|
||||
*/
|
||||
static int read_varint(
|
||||
void * clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
int nBlob;
|
||||
unsigned char *zBlob;
|
||||
sqlite3_int64 iVal;
|
||||
int nVal;
|
||||
|
||||
if( objc!=3 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "BLOB VARNAME");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
zBlob = Tcl_GetByteArrayFromObj(objv[1], &nBlob);
|
||||
|
||||
nVal = sqlite3GetVarint(zBlob, (sqlite3_uint64 *)(&iVal));
|
||||
Tcl_ObjSetVar2(interp, objv[2], 0, Tcl_NewWideIntObj(iVal), 0);
|
||||
Tcl_SetObjResult(interp, Tcl_NewIntObj(nVal));
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Register commands with the TCL interpreter.
|
||||
@ -331,6 +360,7 @@ int Sqlitetest_hexio_Init(Tcl_Interp *interp){
|
||||
{ "hexio_render_int16", hexio_render_int16 },
|
||||
{ "hexio_render_int32", hexio_render_int32 },
|
||||
{ "utf8_to_utf8", utf8_to_utf8 },
|
||||
{ "read_varint", read_varint },
|
||||
};
|
||||
int i;
|
||||
for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){
|
||||
|
@ -42,6 +42,7 @@ set ISQUICK 1
|
||||
|
||||
set EXCLUDE {
|
||||
fts3.test
|
||||
fts3malloc.test
|
||||
}
|
||||
|
||||
# Files to include in the test. If this list is empty then everything
|
||||
|
@ -146,6 +146,7 @@ do_test fts3aa-3.3 {
|
||||
execsql {SELECT rowid FROM t1 WHERE content MATCH '-two one'}
|
||||
} {1 5 9 13 17 21 25 29}
|
||||
|
||||
breakpoint
|
||||
do_test fts3aa-4.1 {
|
||||
execsql {SELECT rowid FROM t1 WHERE content MATCH 'one OR two'}
|
||||
} {1 2 3 5 6 7 9 10 11 13 14 15 17 18 19 21 22 23 25 26 27 29 30 31}
|
||||
@ -195,6 +196,7 @@ do_test fts3aa-6.2 {
|
||||
do_test fts3aa-6.3 {
|
||||
execsql {SELECT content FROM t1 WHERE rowid = -1}
|
||||
} {{three four}}
|
||||
breakpoint
|
||||
do_test fts3aa-6.4 {
|
||||
execsql {SELECT rowid FROM t1 WHERE t1 MATCH 'four'}
|
||||
} {-1 0 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31}
|
||||
|
@ -115,6 +115,7 @@ for {set i 1} {$i<=15} {incr i} {
|
||||
db eval "INSERT INTO t4(norm,plusone,invert) VALUES([join $vset ,]);"
|
||||
}
|
||||
|
||||
breakpoint
|
||||
do_test fts3ab-4.1 {
|
||||
execsql {SELECT rowid FROM t4 WHERE t4 MATCH 'norm:one'}
|
||||
} {1 3 5 7 9 11 13 15}
|
||||
|
@ -58,7 +58,7 @@ db eval {
|
||||
DELETE FROM t1 WHERE rowid = 22;
|
||||
}
|
||||
|
||||
do_test fts3af-1.1 {
|
||||
do_test fts3ae-1.1 {
|
||||
execsql {SELECT COUNT(*) FROM t1}
|
||||
} {14}
|
||||
|
||||
|
@ -78,6 +78,7 @@ do_test fts3ag-1.10 {
|
||||
# Test that docListOrMerge() correctly handles reaching the end of one
|
||||
# doclist before it reaches the end of the other.
|
||||
do_test fts3ag-1.11 {
|
||||
breakpoint
|
||||
execsql {SELECT rowid FROM t1 WHERE t1 MATCH 'this OR also'}
|
||||
} {1 2}
|
||||
do_test fts3ag-1.12 {
|
||||
|
@ -169,7 +169,7 @@ db eval {
|
||||
INSERT INTO t3(rowid, c) VALUES(1, $text);
|
||||
INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
|
||||
}
|
||||
for {set i 0} {$i<100} {incr i} {
|
||||
for {set i 0} {$i<68} {incr i} {
|
||||
db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
|
||||
lappend ret 192
|
||||
}
|
||||
|
116
test/fts3c.test
116
test/fts3c.test
@ -12,11 +12,10 @@
|
||||
# and then uses them to do some basic tests that FTS3 is internally
|
||||
# working as expected.
|
||||
#
|
||||
# $Id: fts3c.test,v 1.1 2008/07/03 19:53:22 shess Exp $
|
||||
#
|
||||
|
||||
set testdir [file dirname $argv0]
|
||||
source $testdir/tester.tcl
|
||||
source $testdir/fts3_common.tcl
|
||||
|
||||
# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
|
||||
ifcapable !fts3 {
|
||||
@ -24,126 +23,25 @@ ifcapable !fts3 {
|
||||
return
|
||||
}
|
||||
|
||||
#*************************************************************************
|
||||
# Probe to see if support for these functions is compiled in.
|
||||
# TODO(shess): Change main.mk to do the right thing and remove this test.
|
||||
db eval {
|
||||
DROP TABLE IF EXISTS t1;
|
||||
CREATE VIRTUAL TABLE t1 USING fts3(c);
|
||||
INSERT INTO t1 (docid, c) VALUES (1, 'x');
|
||||
}
|
||||
|
||||
set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1}
|
||||
set r {1 {unable to use function dump_terms in the requested context}}
|
||||
if {[catchsql $s]==$r} {
|
||||
finish_test
|
||||
return
|
||||
}
|
||||
|
||||
#*************************************************************************
|
||||
# Test that the new functions give appropriate errors.
|
||||
do_test fts3c-0.0 {
|
||||
catchsql {
|
||||
SELECT dump_terms(t1, 1) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_terms: incorrect arguments}}
|
||||
|
||||
do_test fts3c-0.1 {
|
||||
catchsql {
|
||||
SELECT dump_terms(t1, 0, 0, 0) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_terms: incorrect arguments}}
|
||||
|
||||
do_test fts3c-0.2 {
|
||||
catchsql {
|
||||
SELECT dump_terms(1, t1) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {unable to use function dump_terms in the requested context}}
|
||||
|
||||
do_test fts3c-0.3 {
|
||||
catchsql {
|
||||
SELECT dump_terms(t1, 16, 16) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_terms: segment not found}}
|
||||
|
||||
do_test fts3c-0.4 {
|
||||
catchsql {
|
||||
SELECT dump_doclist(t1) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_doclist: incorrect arguments}}
|
||||
|
||||
do_test fts3c-0.5 {
|
||||
catchsql {
|
||||
SELECT dump_doclist(t1, NULL) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_doclist: empty second argument}}
|
||||
|
||||
do_test fts3c-0.6 {
|
||||
catchsql {
|
||||
SELECT dump_doclist(t1, '') FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_doclist: empty second argument}}
|
||||
|
||||
do_test fts3c-0.7 {
|
||||
catchsql {
|
||||
SELECT dump_doclist(t1, 'a', 0) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_doclist: incorrect arguments}}
|
||||
|
||||
do_test fts3c-0.8 {
|
||||
catchsql {
|
||||
SELECT dump_doclist(t1, 'a', 0, 0, 0) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_doclist: incorrect arguments}}
|
||||
|
||||
do_test fts3c-0.9 {
|
||||
catchsql {
|
||||
SELECT dump_doclist(t1, 'a', 16, 16) FROM t1 LIMIT 1;
|
||||
}
|
||||
} {1 {dump_doclist: segment not found}}
|
||||
|
||||
#*************************************************************************
|
||||
# Utility function to check for the expected terms in the segment
|
||||
# level/index. _all version does same but for entire index.
|
||||
proc check_terms {test level index terms} {
|
||||
# TODO(shess): Figure out why uplevel in do_test can't catch
|
||||
# $level and $index directly.
|
||||
set ::level $level
|
||||
set ::index $index
|
||||
do_test $test.terms {
|
||||
execsql {
|
||||
SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1;
|
||||
}
|
||||
} [list $terms]
|
||||
set where "level = $level AND idx = $index"
|
||||
do_test $test.terms [list fts3_terms t1 $where] $terms
|
||||
}
|
||||
proc check_terms_all {test terms} {
|
||||
do_test $test.terms {
|
||||
execsql {
|
||||
SELECT dump_terms(t1) FROM t1 LIMIT 1;
|
||||
}
|
||||
} [list $terms]
|
||||
do_test $test.terms [list fts3_terms t1 1] $terms
|
||||
}
|
||||
|
||||
# Utility function to check for the expected doclist for the term in
|
||||
# segment level/index. _all version does same for entire index.
|
||||
proc check_doclist {test level index term doclist} {
|
||||
# TODO(shess): Again, why can't the non-:: versions work?
|
||||
set ::term $term
|
||||
set ::level $level
|
||||
set ::index $index
|
||||
do_test $test {
|
||||
execsql {
|
||||
SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1;
|
||||
}
|
||||
} [list $doclist]
|
||||
set where "level = $level AND idx = $index"
|
||||
do_test $test [list fts3_doclist t1 $term $where] $doclist
|
||||
}
|
||||
proc check_doclist_all {test term doclist} {
|
||||
set ::term $term
|
||||
do_test $test {
|
||||
execsql {
|
||||
SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1;
|
||||
}
|
||||
} [list $doclist]
|
||||
do_test $test [list fts3_doclist t1 $term 1] $doclist
|
||||
}
|
||||
|
||||
#*************************************************************************
|
||||
|
@ -11,11 +11,10 @@
|
||||
# This file implements regression tests for SQLite library. The focus
|
||||
# of this script is testing the FTS3 module's optimize() function.
|
||||
#
|
||||
# $Id: fts3d.test,v 1.2 2008/07/15 21:32:07 shess Exp $
|
||||
#
|
||||
|
||||
set testdir [file dirname $argv0]
|
||||
source $testdir/tester.tcl
|
||||
source $testdir/fts3_common.tcl
|
||||
|
||||
# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
|
||||
ifcapable !fts3 {
|
||||
@ -23,64 +22,25 @@ ifcapable !fts3 {
|
||||
return
|
||||
}
|
||||
|
||||
#*************************************************************************
|
||||
# Probe to see if support for the FTS3 dump_* functions is compiled in.
|
||||
# TODO(shess): Change main.mk to do the right thing and remove this test.
|
||||
db eval {
|
||||
DROP TABLE IF EXISTS t1;
|
||||
CREATE VIRTUAL TABLE t1 USING fts3(c);
|
||||
INSERT INTO t1 (docid, c) VALUES (1, 'x');
|
||||
}
|
||||
|
||||
set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1}
|
||||
set r {1 {unable to use function dump_terms in the requested context}}
|
||||
if {[catchsql $s]==$r} {
|
||||
finish_test
|
||||
return
|
||||
}
|
||||
|
||||
#*************************************************************************
|
||||
# Utility function to check for the expected terms in the segment
|
||||
# level/index. _all version does same but for entire index.
|
||||
proc check_terms {test level index terms} {
|
||||
# TODO(shess): Figure out why uplevel in do_test can't catch
|
||||
# $level and $index directly.
|
||||
set ::level $level
|
||||
set ::index $index
|
||||
do_test $test.terms {
|
||||
execsql {
|
||||
SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1;
|
||||
}
|
||||
} [list $terms]
|
||||
set where "level = $level AND idx = $index"
|
||||
do_test $test.terms [list fts3_terms t1 $where] $terms
|
||||
}
|
||||
proc check_terms_all {test terms} {
|
||||
do_test $test.terms {
|
||||
execsql {
|
||||
SELECT dump_terms(t1) FROM t1 LIMIT 1;
|
||||
}
|
||||
} [list $terms]
|
||||
do_test $test.terms [list fts3_terms t1 1] $terms
|
||||
}
|
||||
|
||||
# Utility function to check for the expected doclist for the term in
|
||||
# segment level/index. _all version does same for entire index.
|
||||
proc check_doclist {test level index term doclist} {
|
||||
# TODO(shess): Again, why can't the non-:: versions work?
|
||||
set ::term $term
|
||||
set ::level $level
|
||||
set ::index $index
|
||||
do_test $test {
|
||||
execsql {
|
||||
SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1;
|
||||
}
|
||||
} [list $doclist]
|
||||
set where "level = $level AND idx = $index"
|
||||
do_test $test.doclist [list fts3_doclist t1 $term $where] $doclist
|
||||
}
|
||||
proc check_doclist_all {test term doclist} {
|
||||
set ::term $term
|
||||
do_test $test {
|
||||
execsql {
|
||||
SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1;
|
||||
}
|
||||
} [list $doclist]
|
||||
do_test $test.doclist [list fts3_doclist t1 $term 1] $doclist
|
||||
}
|
||||
|
||||
#*************************************************************************
|
||||
@ -293,6 +253,7 @@ check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]}
|
||||
|
||||
# Optimize should leave the result in the level of the highest-level
|
||||
# prior segment.
|
||||
breakpoint
|
||||
do_test fts3d-4.5 {
|
||||
execsql {
|
||||
SELECT OPTIMIZE(t1) FROM t1 LIMIT 1;
|
||||
|
284
test/fts3malloc.test
Normal file
284
test/fts3malloc.test
Normal file
@ -0,0 +1,284 @@
|
||||
# 2009 October 22
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
# This file contains tests to verify that malloc() errors that occur
|
||||
# within the FTS3 module code are handled correctly.
|
||||
#
|
||||
|
||||
set testdir [file dirname $argv0]
|
||||
source $testdir/tester.tcl
|
||||
ifcapable !fts3 { finish_test ; return }
|
||||
source $testdir/malloc_common.tcl
|
||||
|
||||
do_malloc_test fts3_malloc-1.1 -sqlbody {
|
||||
CREATE VIRTUAL TABLE ft USING fts3(a, b, c);
|
||||
}
|
||||
|
||||
do_malloc_test fts3_malloc-1.2 -sqlprep {
|
||||
CREATE VIRTUAL TABLE ft USING fts3(a, b, c);
|
||||
} -sqlbody {
|
||||
DROP TABLE ft;
|
||||
}
|
||||
|
||||
do_malloc_test fts3_malloc-1.3 -sqlprep {
|
||||
CREATE VIRTUAL TABLE ft USING fts3(content);
|
||||
} -sqlbody {
|
||||
INSERT INTO ft VALUES('one two three four');
|
||||
}
|
||||
|
||||
do_malloc_test fts3_malloc-1.4 -tclprep {
|
||||
db eval {CREATE VIRTUAL TABLE ft USING fts3(a, b)}
|
||||
for {set i 0} {$i<16} {incr i} {
|
||||
db eval { INSERT INTO ft VALUES('one two', 'three four') }
|
||||
}
|
||||
} -sqlbody {
|
||||
INSERT INTO ft VALUES('one two', 'three four');
|
||||
}
|
||||
|
||||
proc do_write_test {sql} {
|
||||
uplevel [list db eval $sql]
|
||||
}
|
||||
|
||||
proc do_read_test {name sql result} {
|
||||
|
||||
if {![info exists ::DO_MALLOC_TEST]} {
|
||||
set ::DO_MALLOC_TEST 1
|
||||
}
|
||||
|
||||
set answers [list [list 0 $result]]
|
||||
if {$::DO_MALLOC_TEST } {
|
||||
set answers [list {1 {out of memory}} [list 0 $result]]
|
||||
set modes [list 100000 transient 1 persistent]
|
||||
} else {
|
||||
set modes [list 0 nofail]
|
||||
}
|
||||
set str [join $answers " OR "]
|
||||
|
||||
foreach {nRepeat zName} $modes {
|
||||
for {set iFail 1} 1 {incr iFail} {
|
||||
if {$::DO_MALLOC_TEST} {sqlite3_memdebug_fail $iFail -repeat $nRepeat}
|
||||
|
||||
set res [catchsql $sql]
|
||||
if {[lsearch $answers $res]>=0} {
|
||||
set res $str
|
||||
}
|
||||
do_test $name.$zName.$iFail [list set {} $res] $str
|
||||
set nFail [sqlite3_memdebug_fail -1 -benigncnt nBenign]
|
||||
if {$nFail==0} break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
proc normal_list {l} {
|
||||
set ret [list]
|
||||
foreach elem $l {lappend ret $elem}
|
||||
set ret
|
||||
}
|
||||
|
||||
db close
|
||||
file delete -force test.db test.db-journal
|
||||
sqlite3 db test.db
|
||||
sqlite3_db_config_lookaside db 0 0 0
|
||||
set sqlite_fts3_enable_parentheses 1
|
||||
|
||||
|
||||
do_test fts3_malloc-2.0 {
|
||||
execsql { CREATE VIRTUAL TABLE ft USING fts3(a, b) }
|
||||
for {set ii 1} {$ii < 32} {incr ii} {
|
||||
set a [list]
|
||||
set b [list]
|
||||
if {$ii & 0x01} {lappend a one ; lappend b neung}
|
||||
if {$ii & 0x02} {lappend a two ; lappend b song }
|
||||
if {$ii & 0x04} {lappend a three ; lappend b sahm }
|
||||
if {$ii & 0x08} {lappend a four ; lappend b see }
|
||||
if {$ii & 0x10} {lappend a five ; lappend b hah }
|
||||
execsql { INSERT INTO ft VALUES($a, $b) }
|
||||
}
|
||||
} {}
|
||||
|
||||
foreach {tn sql result} {
|
||||
1 "SELECT count(*) FROM sqlite_master" {5}
|
||||
2 "SELECT * FROM ft WHERE docid = 1" {one neung}
|
||||
3 "SELECT * FROM ft WHERE docid = 2" {two song}
|
||||
4 "SELECT * FROM ft WHERE docid = 3" {{one two} {neung song}}
|
||||
|
||||
5 "SELECT a FROM ft" {
|
||||
{one} {two} {one two}
|
||||
{three} {one three} {two three}
|
||||
{one two three} {four} {one four}
|
||||
{two four} {one two four} {three four}
|
||||
{one three four} {two three four} {one two three four}
|
||||
{five} {one five} {two five}
|
||||
{one two five} {three five} {one three five}
|
||||
{two three five} {one two three five} {four five}
|
||||
{one four five} {two four five} {one two four five}
|
||||
{three four five} {one three four five} {two three four five}
|
||||
{one two three four five}
|
||||
}
|
||||
|
||||
6 "SELECT a FROM ft WHERE a MATCH 'one'" {
|
||||
{one} {one two} {one three} {one two three}
|
||||
{one four} {one two four} {one three four} {one two three four}
|
||||
{one five} {one two five} {one three five} {one two three five}
|
||||
{one four five} {one two four five}
|
||||
{one three four five} {one two three four five}
|
||||
}
|
||||
|
||||
7 "SELECT a FROM ft WHERE a MATCH 'o*'" {
|
||||
{one} {one two} {one three} {one two three}
|
||||
{one four} {one two four} {one three four} {one two three four}
|
||||
{one five} {one two five} {one three five} {one two three five}
|
||||
{one four five} {one two four five}
|
||||
{one three four five} {one two three four five}
|
||||
}
|
||||
|
||||
8 "SELECT a FROM ft WHERE a MATCH 'o* t*'" {
|
||||
{one two} {one three} {one two three}
|
||||
{one two four} {one three four} {one two three four}
|
||||
{one two five} {one three five} {one two three five}
|
||||
{one two four five} {one three four five} {one two three four five}
|
||||
}
|
||||
|
||||
9 "SELECT a FROM ft WHERE a MATCH '\"o* t*\"'" {
|
||||
{one two} {one three} {one two three}
|
||||
{one two four} {one three four} {one two three four}
|
||||
{one two five} {one three five} {one two three five}
|
||||
{one two four five} {one three four five} {one two three four five}
|
||||
}
|
||||
|
||||
10 {SELECT a FROM ft WHERE a MATCH '"o* f*"'} {
|
||||
{one four} {one five} {one four five}
|
||||
}
|
||||
|
||||
11 {SELECT a FROM ft WHERE a MATCH '"one two three"'} {
|
||||
{one two three}
|
||||
{one two three four}
|
||||
{one two three five}
|
||||
{one two three four five}
|
||||
}
|
||||
|
||||
12 {SELECT a FROM ft WHERE a MATCH '"two three four"'} {
|
||||
{two three four}
|
||||
{one two three four}
|
||||
{two three four five}
|
||||
{one two three four five}
|
||||
}
|
||||
|
||||
12 {SELECT a FROM ft WHERE a MATCH '"two three" five'} {
|
||||
{two three five} {one two three five}
|
||||
{two three four five} {one two three four five}
|
||||
}
|
||||
|
||||
13 {SELECT a FROM ft WHERE ft MATCH '"song sahm" hah'} {
|
||||
{two three five} {one two three five}
|
||||
{two three four five} {one two three four five}
|
||||
}
|
||||
|
||||
14 {SELECT a FROM ft WHERE b MATCH 'neung'} {
|
||||
{one} {one two}
|
||||
{one three} {one two three}
|
||||
{one four} {one two four}
|
||||
{one three four} {one two three four}
|
||||
{one five} {one two five}
|
||||
{one three five} {one two three five}
|
||||
{one four five} {one two four five}
|
||||
{one three four five} {one two three four five}
|
||||
}
|
||||
|
||||
15 {SELECT a FROM ft WHERE b MATCH '"neung song sahm"'} {
|
||||
{one two three} {one two three four}
|
||||
{one two three five} {one two three four five}
|
||||
}
|
||||
|
||||
16 {SELECT a FROM ft WHERE b MATCH 'hah "song sahm"'} {
|
||||
{two three five} {one two three five}
|
||||
{two three four five} {one two three four five}
|
||||
}
|
||||
|
||||
17 {SELECT a FROM ft WHERE b MATCH 'song OR sahm'} {
|
||||
{two} {one two} {three}
|
||||
{one three} {two three} {one two three}
|
||||
{two four} {one two four} {three four}
|
||||
{one three four} {two three four} {one two three four}
|
||||
{two five} {one two five} {three five}
|
||||
{one three five} {two three five} {one two three five}
|
||||
{two four five} {one two four five} {three four five}
|
||||
{one three four five} {two three four five} {one two three four five}
|
||||
}
|
||||
|
||||
18 {SELECT a FROM ft WHERE a MATCH 'three NOT two'} {
|
||||
{three} {one three} {three four}
|
||||
{one three four} {three five} {one three five}
|
||||
{three four five} {one three four five}
|
||||
}
|
||||
|
||||
19 {SELECT a FROM ft WHERE b MATCH 'sahm NOT song'} {
|
||||
{three} {one three} {three four}
|
||||
{one three four} {three five} {one three five}
|
||||
{three four five} {one three four five}
|
||||
}
|
||||
|
||||
20 {SELECT a FROM ft WHERE ft MATCH 'sahm NOT song'} {
|
||||
{three} {one three} {three four}
|
||||
{one three four} {three five} {one three five}
|
||||
{three four five} {one three four five}
|
||||
}
|
||||
|
||||
21 {SELECT a FROM ft WHERE b MATCH 'neung NEAR song NEAR sahm'} {
|
||||
{one two three} {one two three four}
|
||||
{one two three five} {one two three four five}
|
||||
}
|
||||
|
||||
} {
|
||||
set result [normal_list $result]
|
||||
do_read_test fts3_malloc-2.$tn $sql $result
|
||||
}
|
||||
|
||||
do_test fts3_malloc-3.0 {
|
||||
execsql BEGIN
|
||||
for {set ii 32} {$ii < 1024} {incr ii} {
|
||||
set a [list]
|
||||
set b [list]
|
||||
if {$ii & 0x0001} {lappend a one ; lappend b neung }
|
||||
if {$ii & 0x0002} {lappend a two ; lappend b song }
|
||||
if {$ii & 0x0004} {lappend a three ; lappend b sahm }
|
||||
if {$ii & 0x0008} {lappend a four ; lappend b see }
|
||||
if {$ii & 0x0010} {lappend a five ; lappend b hah }
|
||||
if {$ii & 0x0020} {lappend a six ; lappend b hok }
|
||||
if {$ii & 0x0040} {lappend a seven ; lappend b jet }
|
||||
if {$ii & 0x0080} {lappend a eight ; lappend b bairt }
|
||||
if {$ii & 0x0100} {lappend a nine ; lappend b gow }
|
||||
if {$ii & 0x0200} {lappend a ten ; lappend b sip }
|
||||
execsql { INSERT INTO ft VALUES($a, $b) }
|
||||
}
|
||||
execsql COMMIT
|
||||
} {}
|
||||
foreach {tn sql result} {
|
||||
1 "SELECT count(*) FROM ft" {1023}
|
||||
2 "SELECT a FROM ft WHERE a MATCH 'one two three four five six seven eight'" {
|
||||
{one two three four five six seven eight}
|
||||
{one two three four five six seven eight nine}
|
||||
{one two three four five six seven eight ten}
|
||||
{one two three four five six seven eight nine ten}
|
||||
}
|
||||
|
||||
3 {SELECT count(*), sum(docid) FROM ft WHERE a MATCH 'o*'} {512 262144}
|
||||
4 {SELECT count(*), sum(docid) FROM ft WHERE a MATCH '"two three four"'} {
|
||||
128 66368
|
||||
}
|
||||
} {
|
||||
#set ::DO_MALLOC_TEST 0
|
||||
set result [normal_list $result]
|
||||
do_read_test fts3_malloc-3.$tn $sql $result
|
||||
}
|
||||
|
||||
finish_test
|
||||
|
Loading…
Reference in New Issue
Block a user