Performance optimizations on the CSV virtual table. Disallow WITHOUT ROWID

virtual tables that have an xUpdate method, for now.

FossilOrigin-Name: 3134b3266c36c9d018e8d365ef46ef638c0792f4
This commit is contained in:
drh 2016-06-03 01:01:57 +00:00
parent 35db31b24b
commit ac9c3d2c18
5 changed files with 172 additions and 75 deletions

View File

@ -27,6 +27,17 @@
** filename = "../http.log",
** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
** );
**
** Instead of specifying a file, the text of the CSV can be loaded using
** the data= parameter.
**
** If the columns=N parameter is supplied, then the CSV file is assumed to have
** N columns. If the columns parameter is omitted, the CSV file is opened
** as soon as the virtual table is constructed and the first row of the CSV
** is read in order to count the tables.
**
** Some extra debugging features (used for testing virtual tables) are available
** if this module is compiled with -DSQLITE_TEST.
*/
#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1
@ -202,42 +213,43 @@ static char *csv_read_one_field(CsvReader *p){
if( c=='"' ){
int pc, ppc;
int startLine = p->nLine;
int cQuote = c;
pc = ppc = 0;
while( 1 ){
c = csv_getc(p);
if( c=='\n' ) p->nLine++;
if( c==cQuote ){
if( pc==cQuote ){
pc = 0;
continue;
if( c<='"' || pc=='"' ){
if( c=='\n' ) p->nLine++;
if( c=='"' ){
if( pc=='"' ){
pc = 0;
continue;
}
}
if( (c==',' && pc=='"')
|| (c=='\n' && pc=='"')
|| (c=='\n' && pc=='\r' && ppc=='"')
|| (c==EOF && pc=='"')
){
do{ p->n--; }while( p->z[p->n]!='"' );
p->cTerm = c;
break;
}
if( pc=='"' && c!='\r' ){
csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
break;
}
if( c==EOF ){
csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
startLine, '"');
p->cTerm = c;
break;
}
}
if( (c==',' && pc==cQuote)
|| (c=='\n' && pc==cQuote)
|| (c=='\n' && pc=='\r' && ppc==cQuote)
|| (c==EOF && pc==cQuote)
){
do{ p->n--; }while( p->z[p->n]!=cQuote );
p->cTerm = c;
break;
}
if( pc==cQuote && c!='\r' ){
csv_errmsg(p, "line %d: unescaped %c character", p->nLine, cQuote);
break;
}
if( c==EOF ){
csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
startLine, cQuote);
p->cTerm = c;
break;
}
if( csv_append(p, (char)c) ) return 0;
ppc = pc;
pc = c;
}
}else{
while( c!=EOF && c!=',' && c!='\n' ){
while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
if( csv_append(p, (char)c) ) return 0;
c = csv_getc(p);
}
@ -287,6 +299,7 @@ typedef struct CsvCursor {
sqlite3_vtab_cursor base; /* Base class. Must be first */
CsvReader rdr; /* The CsvReader object */
char **azVal; /* Value of the current row */
int *aLen; /* Length of each entry */
sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
} CsvCursor;
@ -410,6 +423,9 @@ static int csv_boolean(const char *z){
** header=YES|NO First row of CSV defines the names of
** columns if "yes". Default "no".
** columns=N Assume the CSV file contains N columns.
**
** Only available if compiled with SQLITE_TEST:
**
** testflags=N Bitmask of test flags. Optional
**
** If schema= is omitted, then the columns are named "c0", "c1", "c2",
@ -428,7 +444,9 @@ static int csvtabConnect(
int bHeader = -1; /* header= flags. -1 means not seen yet */
int rc = SQLITE_OK; /* Result code from this routine */
int i, j; /* Loop counters */
#ifdef SQLITE_TEST
int tstFlags = 0; /* Value for testflags=N parameter */
#endif
int nCol = -99; /* Value of the columns= parameter */
CsvReader sRdr; /* A CSV file reader used to store an error
** message and/or to count the number of columns */
@ -469,9 +487,11 @@ static int csvtabConnect(
goto csvtab_connect_error;
}
}else
#ifdef SQLITE_TEST
if( (zValue = csv_parameter("testflags",9,z))!=0 ){
tstFlags = (unsigned int)atoi(zValue);
}else
#endif
if( (zValue = csv_parameter("columns",7,z))!=0 ){
if( nCol>0 ){
csv_errmsg(&sRdr, "more than one 'columns' parameter");
@ -510,7 +530,9 @@ static int csvtabConnect(
}
pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
pNew->zData = CSV_DATA; CSV_DATA = 0;
#ifdef SQLITE_TEST
pNew->tstFlags = tstFlags;
#endif
pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
csv_reader_reset(&sRdr);
if( CSV_SCHEMA==0 ){
@ -557,6 +579,7 @@ static void csvtabCursorRowReset(CsvCursor *pCur){
for(i=0; i<pTab->nCol; i++){
sqlite3_free(pCur->azVal[i]);
pCur->azVal[i] = 0;
pCur->aLen[i] = 0;
}
}
@ -591,10 +614,13 @@ static int csvtabClose(sqlite3_vtab_cursor *cur){
static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
CsvTable *pTab = (CsvTable*)p;
CsvCursor *pCur;
pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol );
size_t nByte;
nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
pCur = sqlite3_malloc( nByte );
if( pCur==0 ) return SQLITE_NOMEM;
memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol );
memset(pCur, 0, nByte);
pCur->azVal = (char**)&pCur[1];
pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
*ppCursor = &pCur->base;
if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
csv_xfer_error(pTab, &pCur->rdr);
@ -613,23 +639,33 @@ static int csvtabNext(sqlite3_vtab_cursor *cur){
CsvTable *pTab = (CsvTable*)cur->pVtab;
int i = 0;
char *z;
csvtabCursorRowReset(pCur);
do{
z = csv_read_one_field(&pCur->rdr);
if( z==0 ){
csv_xfer_error(pTab, &pCur->rdr);
break;
}
z = sqlite3_mprintf("%s", z);
if( z==0 ){
csv_errmsg(&pCur->rdr, "out of memory");
csv_xfer_error(pTab, &pCur->rdr);
break;
}
if( i<pTab->nCol ){
pCur->azVal[i++] = z;
if( pCur->aLen[i] < pCur->rdr.n+1 ){
char *zNew = sqlite3_realloc(pCur->azVal[i], pCur->rdr.n+1);
if( zNew==0 ){
csv_errmsg(&pCur->rdr, "out of memory");
csv_xfer_error(pTab, &pCur->rdr);
break;
}
pCur->azVal[i] = zNew;
pCur->aLen[i] = pCur->rdr.n+1;
}
memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
i++;
}
}while( z!=0 && pCur->rdr.cTerm==',' );
}while( pCur->rdr.cTerm==',' );
while( i<pTab->nCol ){
sqlite3_free(pCur->azVal[i]);
pCur->azVal[i] = 0;
pCur->aLen[i] = 0;
i++;
}
if( z==0 || pCur->rdr.cTerm==EOF ){
pCur->iRowid = -1;
}else{
@ -707,37 +743,37 @@ static int csvtabBestIndex(
sqlite3_vtab *tab,
sqlite3_index_info *pIdxInfo
){
CsvTable *pTab = (CsvTable*)tab;
int i;
int nConst = 0;
pIdxInfo->estimatedCost = 1000000;
if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){
return SQLITE_OK;
}
/* The usual (and sensible) case is to take the "return SQLITE_OK" above.
** The code below only runs when testflags=1. The code below
** generates an artifical and unrealistic plan which is useful
** for testing virtual table logic but is not helpfulto real applications.
**
** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
** table (even though it is not) and the cost of running the virtual table
** is reduced from 1 million to just 10. The constraints are *not* marked
** as omittable, however, so the query planner should still generate a
** plan that gives a correct answer, even if they plan is not optimal.
*/
for(i=0; i<pIdxInfo->nConstraint; i++){
unsigned char op;
if( pIdxInfo->aConstraint[i].usable==0 ) continue;
op = pIdxInfo->aConstraint[i].op;
if( op==SQLITE_INDEX_CONSTRAINT_EQ
|| op==SQLITE_INDEX_CONSTRAINT_LIKE
|| op==SQLITE_INDEX_CONSTRAINT_GLOB
){
pIdxInfo->estimatedCost = 10;
pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
nConst++;
#ifdef SQLITE_TEST
if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
/* The usual (and sensible) case is to always do a full table scan.
** The code in this branch only runs when testflags=1. This code
** generates an artifical and unrealistic plan which is useful
** for testing virtual table logic but is not helpful to real applications.
**
** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
** table (even though it is not) and the cost of running the virtual table
** is reduced from 1 million to just 10. The constraints are *not* marked
** as omittable, however, so the query planner should still generate a
** plan that gives a correct answer, even if they plan is not optimal.
*/
int i;
int nConst = 0;
for(i=0; i<pIdxInfo->nConstraint; i++){
unsigned char op;
if( pIdxInfo->aConstraint[i].usable==0 ) continue;
op = pIdxInfo->aConstraint[i].op;
if( op==SQLITE_INDEX_CONSTRAINT_EQ
|| op==SQLITE_INDEX_CONSTRAINT_LIKE
|| op==SQLITE_INDEX_CONSTRAINT_GLOB
){
pIdxInfo->estimatedCost = 10;
pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
nConst++;
}
}
}
#endif
return SQLITE_OK;
}
@ -765,6 +801,41 @@ static sqlite3_module CsvModule = {
0, /* xRename */
};
#ifdef SQLITE_TEST
/*
** For virtual table testing, make a version of the CSV virtual table
** available that has an xUpdate function. But the xUpdate always returns
** SQLITE_READONLY since the CSV file is not really writable.
*/
static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
return SQLITE_READONLY;
}
static sqlite3_module CsvModuleFauxWrite = {
0, /* iVersion */
csvtabCreate, /* xCreate */
csvtabConnect, /* xConnect */
csvtabBestIndex, /* xBestIndex */
csvtabDisconnect, /* xDisconnect */
csvtabDisconnect, /* xDestroy */
csvtabOpen, /* xOpen - open a cursor */
csvtabClose, /* xClose - close a cursor */
csvtabFilter, /* xFilter - configure scan constraints */
csvtabNext, /* xNext - advance a cursor */
csvtabEof, /* xEof - check for end of scan */
csvtabColumn, /* xColumn - read data */
csvtabRowid, /* xRowid - read data */
csvtabUpdate, /* xUpdate */
0, /* xBegin */
0, /* xSync */
0, /* xCommit */
0, /* xRollback */
0, /* xFindMethod */
0, /* xRename */
};
#endif /* SQLITE_TEST */
#ifdef _WIN32
__declspec(dllexport)
#endif
@ -778,6 +849,13 @@ int sqlite3_csv_init(
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
int rc;
SQLITE_EXTENSION_INIT2(pApi);
return sqlite3_create_module(db, "csv", &CsvModule, 0);
rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
#ifdef SQLITE_TEST
if( rc==SQLITE_OK ){
rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
}
#endif
return rc;
}

View File

@ -1,5 +1,5 @@
C Add\sthe\sCSV\sextension\sto\sthe\stest\sfixture.\s\sFix\sa\smemory\sleak\sin\sthe\sCSV\nextension.\s\sAdd\stest\scases\sfor\sthe\sCSV\sextension,\sincluding\sone\sthat\suses\na\sWITHOUT\sROWID\svirtual\stable\sparticipating\sin\sthe\sOR\soptimization.
D 2016-06-02T23:13:21.510
C Performance\soptimizations\son\sthe\sCSV\svirtual\stable.\s\sDisallow\sWITHOUT\sROWID\nvirtual\stables\sthat\shave\san\sxUpdate\smethod,\sfor\snow.
D 2016-06-03T01:01:57.592
F Makefile.in 7321ef0b584224781ec7731408857fa8962c32cc
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
F Makefile.msc 831503fc4e988f571590af1405645fff121b5f1e
@ -206,7 +206,7 @@ F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
F ext/misc/amatch.c 211108e201105e4bb0c076527b8cfd34330fc234
F ext/misc/closure.c 0d2a038df8fbae7f19de42e7c7d71f2e4dc88704
F ext/misc/compress.c 122faa92d25033d6c3f07c39231de074ab3d2e83
F ext/misc/csv.c 58ad4e9eb25310a2712e0cb78592dc15924f0379
F ext/misc/csv.c f01126ba170fd4ef7c752b156568a80c912d4441
F ext/misc/eval.c f971962e92ebb8b0a4e6b62949463ee454d88fa2
F ext/misc/fileio.c d4171c815d6543a9edef8308aab2951413cd8d0f
F ext/misc/fuzzer.c 7c64b8197bb77b7d64eff7cac7848870235d4c25
@ -456,7 +456,7 @@ F src/vdbeblob.c c9f2f494b911c6fa34efd9803f0a10807da80f77
F src/vdbemem.c 5cfef60e60e19cab6275d1b975bf4c791d575beb
F src/vdbesort.c 91fda3909326860382b0ca8aa251e609c6a9d62c
F src/vdbetrace.c f75c5455d8cf389ef86a8bfdfd3177e0e3692484
F src/vtab.c a80b4e40ed8687daa3303e970d938b1f613a3eeb
F src/vtab.c a9e8175477539660814a4704e8eaba6df3678651
F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9
F src/wal.c 02eeecc265f6ffd0597378f5d8ae9070b62a406a
F src/wal.h 2f7c831cf3b071fa548bf2d5cac640846a7ff19c
@ -612,7 +612,7 @@ F test/crashM.test d95f59046fa749b0d0822edf18a717788c8f318d
F test/crashtest1.c 09c1c7d728ccf4feb9e481671e29dda5669bbcc2
F test/createtab.test b5de160630b209c4b8925bdcbbaf48cc90b67fe8
F test/cse.test 277350a26264495e86b1785f34d2d0c8600e021c
F test/csv01.test 44a9786f6cb4dcf40f3d5d59844dbb88d2265e5d
F test/csv01.test 69aecc17f11f597390b47700aec8d748b8364140
F test/ctime.test 7bd009071e242aac4f18521581536b652b789a47
F test/cursorhint.test 7bc346788390475e77a345da2b92270d04d35856
F test/date.test 984ac1e3e5e031386866f034006148d3972b4a65
@ -1498,7 +1498,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 769191042aa14e6eccdfe2391fc1011171d5c9ad
R 7cb009be35227902926fe0d852282f91
P 95f483e86e30ae68108904400e18ed41d389446b
R 110a9ed97c9cb7052ae396116c46b04d
U drh
Z 7f2a8b3dbd34984ea991a68c56b4587b
Z 863bf523824cd922d795a07ca8451a6d

View File

@ -1 +1 @@
95f483e86e30ae68108904400e18ed41d389446b
3134b3266c36c9d018e8d365ef46ef638c0792f4

View File

@ -762,6 +762,9 @@ int sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){
pNew->nCol = 0;
pNew->aCol = 0;
assert( pTab->pIndex==0 );
if( !HasRowid(pNew) && pCtx->pVTable->pMod->pModule->xUpdate!=0 ){
rc = SQLITE_ERROR;
}
pIdx = pNew->pIndex;
if( pIdx ){
assert( pIdx->pNext==0 );

View File

@ -88,4 +88,20 @@ do_execsql_test 3.1 {
SELECT a FROM t3 WHERE +b=6 OR c=7 OR d=12 ORDER BY +a;
} {5 9}
do_catchsql_test 4.0 {
DROP TABLE t3;
CREATE VIRTUAL TABLE temp.t4 USING csv_wr(
data=
'1,2,3,4
5,6,7,8
9,10,11,12
13,14,15,16
',
columns=4,
schema=
'CREATE TABLE t3(a PRIMARY KEY,b TEXT,c TEXT,d TEXT) WITHOUT ROWID',
testflags=1
);
} {1 {vtable constructor failed: t4}}
finish_test