mirror of https://github.com/sqlite/sqlite
Performance optimizations on the CSV virtual table. Disallow WITHOUT ROWID
virtual tables that have an xUpdate method, for now. FossilOrigin-Name: 3134b3266c36c9d018e8d365ef46ef638c0792f4
This commit is contained in:
parent
35db31b24b
commit
ac9c3d2c18
210
ext/misc/csv.c
210
ext/misc/csv.c
|
@ -27,6 +27,17 @@
|
|||
** filename = "../http.log",
|
||||
** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
|
||||
** );
|
||||
**
|
||||
** Instead of specifying a file, the text of the CSV can be loaded using
|
||||
** the data= parameter.
|
||||
**
|
||||
** If the columns=N parameter is supplied, then the CSV file is assumed to have
|
||||
** N columns. If the columns parameter is omitted, the CSV file is opened
|
||||
** as soon as the virtual table is constructed and the first row of the CSV
|
||||
** is read in order to count the tables.
|
||||
**
|
||||
** Some extra debugging features (used for testing virtual tables) are available
|
||||
** if this module is compiled with -DSQLITE_TEST.
|
||||
*/
|
||||
#include <sqlite3ext.h>
|
||||
SQLITE_EXTENSION_INIT1
|
||||
|
@ -202,42 +213,43 @@ static char *csv_read_one_field(CsvReader *p){
|
|||
if( c=='"' ){
|
||||
int pc, ppc;
|
||||
int startLine = p->nLine;
|
||||
int cQuote = c;
|
||||
pc = ppc = 0;
|
||||
while( 1 ){
|
||||
c = csv_getc(p);
|
||||
if( c=='\n' ) p->nLine++;
|
||||
if( c==cQuote ){
|
||||
if( pc==cQuote ){
|
||||
pc = 0;
|
||||
continue;
|
||||
if( c<='"' || pc=='"' ){
|
||||
if( c=='\n' ) p->nLine++;
|
||||
if( c=='"' ){
|
||||
if( pc=='"' ){
|
||||
pc = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if( (c==',' && pc=='"')
|
||||
|| (c=='\n' && pc=='"')
|
||||
|| (c=='\n' && pc=='\r' && ppc=='"')
|
||||
|| (c==EOF && pc=='"')
|
||||
){
|
||||
do{ p->n--; }while( p->z[p->n]!='"' );
|
||||
p->cTerm = c;
|
||||
break;
|
||||
}
|
||||
if( pc=='"' && c!='\r' ){
|
||||
csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
|
||||
break;
|
||||
}
|
||||
if( c==EOF ){
|
||||
csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
|
||||
startLine, '"');
|
||||
p->cTerm = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( (c==',' && pc==cQuote)
|
||||
|| (c=='\n' && pc==cQuote)
|
||||
|| (c=='\n' && pc=='\r' && ppc==cQuote)
|
||||
|| (c==EOF && pc==cQuote)
|
||||
){
|
||||
do{ p->n--; }while( p->z[p->n]!=cQuote );
|
||||
p->cTerm = c;
|
||||
break;
|
||||
}
|
||||
if( pc==cQuote && c!='\r' ){
|
||||
csv_errmsg(p, "line %d: unescaped %c character", p->nLine, cQuote);
|
||||
break;
|
||||
}
|
||||
if( c==EOF ){
|
||||
csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
|
||||
startLine, cQuote);
|
||||
p->cTerm = c;
|
||||
break;
|
||||
}
|
||||
if( csv_append(p, (char)c) ) return 0;
|
||||
ppc = pc;
|
||||
pc = c;
|
||||
}
|
||||
}else{
|
||||
while( c!=EOF && c!=',' && c!='\n' ){
|
||||
while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
|
||||
if( csv_append(p, (char)c) ) return 0;
|
||||
c = csv_getc(p);
|
||||
}
|
||||
|
@ -287,6 +299,7 @@ typedef struct CsvCursor {
|
|||
sqlite3_vtab_cursor base; /* Base class. Must be first */
|
||||
CsvReader rdr; /* The CsvReader object */
|
||||
char **azVal; /* Value of the current row */
|
||||
int *aLen; /* Length of each entry */
|
||||
sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
|
||||
} CsvCursor;
|
||||
|
||||
|
@ -410,6 +423,9 @@ static int csv_boolean(const char *z){
|
|||
** header=YES|NO First row of CSV defines the names of
|
||||
** columns if "yes". Default "no".
|
||||
** columns=N Assume the CSV file contains N columns.
|
||||
**
|
||||
** Only available if compiled with SQLITE_TEST:
|
||||
**
|
||||
** testflags=N Bitmask of test flags. Optional
|
||||
**
|
||||
** If schema= is omitted, then the columns are named "c0", "c1", "c2",
|
||||
|
@ -428,7 +444,9 @@ static int csvtabConnect(
|
|||
int bHeader = -1; /* header= flags. -1 means not seen yet */
|
||||
int rc = SQLITE_OK; /* Result code from this routine */
|
||||
int i, j; /* Loop counters */
|
||||
#ifdef SQLITE_TEST
|
||||
int tstFlags = 0; /* Value for testflags=N parameter */
|
||||
#endif
|
||||
int nCol = -99; /* Value of the columns= parameter */
|
||||
CsvReader sRdr; /* A CSV file reader used to store an error
|
||||
** message and/or to count the number of columns */
|
||||
|
@ -469,9 +487,11 @@ static int csvtabConnect(
|
|||
goto csvtab_connect_error;
|
||||
}
|
||||
}else
|
||||
#ifdef SQLITE_TEST
|
||||
if( (zValue = csv_parameter("testflags",9,z))!=0 ){
|
||||
tstFlags = (unsigned int)atoi(zValue);
|
||||
}else
|
||||
#endif
|
||||
if( (zValue = csv_parameter("columns",7,z))!=0 ){
|
||||
if( nCol>0 ){
|
||||
csv_errmsg(&sRdr, "more than one 'columns' parameter");
|
||||
|
@ -510,7 +530,9 @@ static int csvtabConnect(
|
|||
}
|
||||
pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
|
||||
pNew->zData = CSV_DATA; CSV_DATA = 0;
|
||||
#ifdef SQLITE_TEST
|
||||
pNew->tstFlags = tstFlags;
|
||||
#endif
|
||||
pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
|
||||
csv_reader_reset(&sRdr);
|
||||
if( CSV_SCHEMA==0 ){
|
||||
|
@ -557,6 +579,7 @@ static void csvtabCursorRowReset(CsvCursor *pCur){
|
|||
for(i=0; i<pTab->nCol; i++){
|
||||
sqlite3_free(pCur->azVal[i]);
|
||||
pCur->azVal[i] = 0;
|
||||
pCur->aLen[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -591,10 +614,13 @@ static int csvtabClose(sqlite3_vtab_cursor *cur){
|
|||
static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
|
||||
CsvTable *pTab = (CsvTable*)p;
|
||||
CsvCursor *pCur;
|
||||
pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol );
|
||||
size_t nByte;
|
||||
nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
|
||||
pCur = sqlite3_malloc( nByte );
|
||||
if( pCur==0 ) return SQLITE_NOMEM;
|
||||
memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol );
|
||||
memset(pCur, 0, nByte);
|
||||
pCur->azVal = (char**)&pCur[1];
|
||||
pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
|
||||
*ppCursor = &pCur->base;
|
||||
if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
|
||||
csv_xfer_error(pTab, &pCur->rdr);
|
||||
|
@ -613,23 +639,33 @@ static int csvtabNext(sqlite3_vtab_cursor *cur){
|
|||
CsvTable *pTab = (CsvTable*)cur->pVtab;
|
||||
int i = 0;
|
||||
char *z;
|
||||
csvtabCursorRowReset(pCur);
|
||||
do{
|
||||
z = csv_read_one_field(&pCur->rdr);
|
||||
if( z==0 ){
|
||||
csv_xfer_error(pTab, &pCur->rdr);
|
||||
break;
|
||||
}
|
||||
z = sqlite3_mprintf("%s", z);
|
||||
if( z==0 ){
|
||||
csv_errmsg(&pCur->rdr, "out of memory");
|
||||
csv_xfer_error(pTab, &pCur->rdr);
|
||||
break;
|
||||
}
|
||||
if( i<pTab->nCol ){
|
||||
pCur->azVal[i++] = z;
|
||||
if( pCur->aLen[i] < pCur->rdr.n+1 ){
|
||||
char *zNew = sqlite3_realloc(pCur->azVal[i], pCur->rdr.n+1);
|
||||
if( zNew==0 ){
|
||||
csv_errmsg(&pCur->rdr, "out of memory");
|
||||
csv_xfer_error(pTab, &pCur->rdr);
|
||||
break;
|
||||
}
|
||||
pCur->azVal[i] = zNew;
|
||||
pCur->aLen[i] = pCur->rdr.n+1;
|
||||
}
|
||||
memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
|
||||
i++;
|
||||
}
|
||||
}while( z!=0 && pCur->rdr.cTerm==',' );
|
||||
}while( pCur->rdr.cTerm==',' );
|
||||
while( i<pTab->nCol ){
|
||||
sqlite3_free(pCur->azVal[i]);
|
||||
pCur->azVal[i] = 0;
|
||||
pCur->aLen[i] = 0;
|
||||
i++;
|
||||
}
|
||||
if( z==0 || pCur->rdr.cTerm==EOF ){
|
||||
pCur->iRowid = -1;
|
||||
}else{
|
||||
|
@ -707,37 +743,37 @@ static int csvtabBestIndex(
|
|||
sqlite3_vtab *tab,
|
||||
sqlite3_index_info *pIdxInfo
|
||||
){
|
||||
CsvTable *pTab = (CsvTable*)tab;
|
||||
int i;
|
||||
int nConst = 0;
|
||||
pIdxInfo->estimatedCost = 1000000;
|
||||
if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/* The usual (and sensible) case is to take the "return SQLITE_OK" above.
|
||||
** The code below only runs when testflags=1. The code below
|
||||
** generates an artifical and unrealistic plan which is useful
|
||||
** for testing virtual table logic but is not helpfulto real applications.
|
||||
**
|
||||
** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
|
||||
** table (even though it is not) and the cost of running the virtual table
|
||||
** is reduced from 1 million to just 10. The constraints are *not* marked
|
||||
** as omittable, however, so the query planner should still generate a
|
||||
** plan that gives a correct answer, even if they plan is not optimal.
|
||||
*/
|
||||
for(i=0; i<pIdxInfo->nConstraint; i++){
|
||||
unsigned char op;
|
||||
if( pIdxInfo->aConstraint[i].usable==0 ) continue;
|
||||
op = pIdxInfo->aConstraint[i].op;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_EQ
|
||||
|| op==SQLITE_INDEX_CONSTRAINT_LIKE
|
||||
|| op==SQLITE_INDEX_CONSTRAINT_GLOB
|
||||
){
|
||||
pIdxInfo->estimatedCost = 10;
|
||||
pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
|
||||
nConst++;
|
||||
#ifdef SQLITE_TEST
|
||||
if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
|
||||
/* The usual (and sensible) case is to always do a full table scan.
|
||||
** The code in this branch only runs when testflags=1. This code
|
||||
** generates an artifical and unrealistic plan which is useful
|
||||
** for testing virtual table logic but is not helpful to real applications.
|
||||
**
|
||||
** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
|
||||
** table (even though it is not) and the cost of running the virtual table
|
||||
** is reduced from 1 million to just 10. The constraints are *not* marked
|
||||
** as omittable, however, so the query planner should still generate a
|
||||
** plan that gives a correct answer, even if they plan is not optimal.
|
||||
*/
|
||||
int i;
|
||||
int nConst = 0;
|
||||
for(i=0; i<pIdxInfo->nConstraint; i++){
|
||||
unsigned char op;
|
||||
if( pIdxInfo->aConstraint[i].usable==0 ) continue;
|
||||
op = pIdxInfo->aConstraint[i].op;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_EQ
|
||||
|| op==SQLITE_INDEX_CONSTRAINT_LIKE
|
||||
|| op==SQLITE_INDEX_CONSTRAINT_GLOB
|
||||
){
|
||||
pIdxInfo->estimatedCost = 10;
|
||||
pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
|
||||
nConst++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
|
@ -765,6 +801,41 @@ static sqlite3_module CsvModule = {
|
|||
0, /* xRename */
|
||||
};
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
/*
|
||||
** For virtual table testing, make a version of the CSV virtual table
|
||||
** available that has an xUpdate function. But the xUpdate always returns
|
||||
** SQLITE_READONLY since the CSV file is not really writable.
|
||||
*/
|
||||
static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
|
||||
return SQLITE_READONLY;
|
||||
}
|
||||
static sqlite3_module CsvModuleFauxWrite = {
|
||||
0, /* iVersion */
|
||||
csvtabCreate, /* xCreate */
|
||||
csvtabConnect, /* xConnect */
|
||||
csvtabBestIndex, /* xBestIndex */
|
||||
csvtabDisconnect, /* xDisconnect */
|
||||
csvtabDisconnect, /* xDestroy */
|
||||
csvtabOpen, /* xOpen - open a cursor */
|
||||
csvtabClose, /* xClose - close a cursor */
|
||||
csvtabFilter, /* xFilter - configure scan constraints */
|
||||
csvtabNext, /* xNext - advance a cursor */
|
||||
csvtabEof, /* xEof - check for end of scan */
|
||||
csvtabColumn, /* xColumn - read data */
|
||||
csvtabRowid, /* xRowid - read data */
|
||||
csvtabUpdate, /* xUpdate */
|
||||
0, /* xBegin */
|
||||
0, /* xSync */
|
||||
0, /* xCommit */
|
||||
0, /* xRollback */
|
||||
0, /* xFindMethod */
|
||||
0, /* xRename */
|
||||
};
|
||||
#endif /* SQLITE_TEST */
|
||||
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
__declspec(dllexport)
|
||||
#endif
|
||||
|
@ -778,6 +849,13 @@ int sqlite3_csv_init(
|
|||
char **pzErrMsg,
|
||||
const sqlite3_api_routines *pApi
|
||||
){
|
||||
int rc;
|
||||
SQLITE_EXTENSION_INIT2(pApi);
|
||||
return sqlite3_create_module(db, "csv", &CsvModule, 0);
|
||||
rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
|
||||
#ifdef SQLITE_TEST
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
|
||||
}
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
|
16
manifest
16
manifest
|
@ -1,5 +1,5 @@
|
|||
C Add\sthe\sCSV\sextension\sto\sthe\stest\sfixture.\s\sFix\sa\smemory\sleak\sin\sthe\sCSV\nextension.\s\sAdd\stest\scases\sfor\sthe\sCSV\sextension,\sincluding\sone\sthat\suses\na\sWITHOUT\sROWID\svirtual\stable\sparticipating\sin\sthe\sOR\soptimization.
|
||||
D 2016-06-02T23:13:21.510
|
||||
C Performance\soptimizations\son\sthe\sCSV\svirtual\stable.\s\sDisallow\sWITHOUT\sROWID\nvirtual\stables\sthat\shave\san\sxUpdate\smethod,\sfor\snow.
|
||||
D 2016-06-03T01:01:57.592
|
||||
F Makefile.in 7321ef0b584224781ec7731408857fa8962c32cc
|
||||
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
|
||||
F Makefile.msc 831503fc4e988f571590af1405645fff121b5f1e
|
||||
|
@ -206,7 +206,7 @@ F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
|
|||
F ext/misc/amatch.c 211108e201105e4bb0c076527b8cfd34330fc234
|
||||
F ext/misc/closure.c 0d2a038df8fbae7f19de42e7c7d71f2e4dc88704
|
||||
F ext/misc/compress.c 122faa92d25033d6c3f07c39231de074ab3d2e83
|
||||
F ext/misc/csv.c 58ad4e9eb25310a2712e0cb78592dc15924f0379
|
||||
F ext/misc/csv.c f01126ba170fd4ef7c752b156568a80c912d4441
|
||||
F ext/misc/eval.c f971962e92ebb8b0a4e6b62949463ee454d88fa2
|
||||
F ext/misc/fileio.c d4171c815d6543a9edef8308aab2951413cd8d0f
|
||||
F ext/misc/fuzzer.c 7c64b8197bb77b7d64eff7cac7848870235d4c25
|
||||
|
@ -456,7 +456,7 @@ F src/vdbeblob.c c9f2f494b911c6fa34efd9803f0a10807da80f77
|
|||
F src/vdbemem.c 5cfef60e60e19cab6275d1b975bf4c791d575beb
|
||||
F src/vdbesort.c 91fda3909326860382b0ca8aa251e609c6a9d62c
|
||||
F src/vdbetrace.c f75c5455d8cf389ef86a8bfdfd3177e0e3692484
|
||||
F src/vtab.c a80b4e40ed8687daa3303e970d938b1f613a3eeb
|
||||
F src/vtab.c a9e8175477539660814a4704e8eaba6df3678651
|
||||
F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
|
||||
F src/wal.c 02eeecc265f6ffd0597378f5d8ae9070b62a406a
|
||||
F src/wal.h 2f7c831cf3b071fa548bf2d5cac640846a7ff19c
|
||||
|
@ -612,7 +612,7 @@ F test/crashM.test d95f59046fa749b0d0822edf18a717788c8f318d
|
|||
F test/crashtest1.c 09c1c7d728ccf4feb9e481671e29dda5669bbcc2
|
||||
F test/createtab.test b5de160630b209c4b8925bdcbbaf48cc90b67fe8
|
||||
F test/cse.test 277350a26264495e86b1785f34d2d0c8600e021c
|
||||
F test/csv01.test 44a9786f6cb4dcf40f3d5d59844dbb88d2265e5d
|
||||
F test/csv01.test 69aecc17f11f597390b47700aec8d748b8364140
|
||||
F test/ctime.test 7bd009071e242aac4f18521581536b652b789a47
|
||||
F test/cursorhint.test 7bc346788390475e77a345da2b92270d04d35856
|
||||
F test/date.test 984ac1e3e5e031386866f034006148d3972b4a65
|
||||
|
@ -1498,7 +1498,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
|
|||
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
|
||||
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
|
||||
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
|
||||
P 769191042aa14e6eccdfe2391fc1011171d5c9ad
|
||||
R 7cb009be35227902926fe0d852282f91
|
||||
P 95f483e86e30ae68108904400e18ed41d389446b
|
||||
R 110a9ed97c9cb7052ae396116c46b04d
|
||||
U drh
|
||||
Z 7f2a8b3dbd34984ea991a68c56b4587b
|
||||
Z 863bf523824cd922d795a07ca8451a6d
|
||||
|
|
|
@ -1 +1 @@
|
|||
95f483e86e30ae68108904400e18ed41d389446b
|
||||
3134b3266c36c9d018e8d365ef46ef638c0792f4
|
|
@ -762,6 +762,9 @@ int sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){
|
|||
pNew->nCol = 0;
|
||||
pNew->aCol = 0;
|
||||
assert( pTab->pIndex==0 );
|
||||
if( !HasRowid(pNew) && pCtx->pVTable->pMod->pModule->xUpdate!=0 ){
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
pIdx = pNew->pIndex;
|
||||
if( pIdx ){
|
||||
assert( pIdx->pNext==0 );
|
||||
|
|
|
@ -88,4 +88,20 @@ do_execsql_test 3.1 {
|
|||
SELECT a FROM t3 WHERE +b=6 OR c=7 OR d=12 ORDER BY +a;
|
||||
} {5 9}
|
||||
|
||||
do_catchsql_test 4.0 {
|
||||
DROP TABLE t3;
|
||||
CREATE VIRTUAL TABLE temp.t4 USING csv_wr(
|
||||
data=
|
||||
'1,2,3,4
|
||||
5,6,7,8
|
||||
9,10,11,12
|
||||
13,14,15,16
|
||||
',
|
||||
columns=4,
|
||||
schema=
|
||||
'CREATE TABLE t3(a PRIMARY KEY,b TEXT,c TEXT,d TEXT) WITHOUT ROWID',
|
||||
testflags=1
|
||||
);
|
||||
} {1 {vtable constructor failed: t4}}
|
||||
|
||||
finish_test
|
||||
|
|
Loading…
Reference in New Issue