Add the data= parameter to the CSV virtual table extension.
FossilOrigin-Name: 769191042aa14e6eccdfe2391fc1011171d5c9ad
This commit is contained in:
parent
273bfe9f20
commit
adcba64df4
214
ext/misc/csv.c
214
ext/misc/csv.c
@ -53,6 +53,9 @@ SQLITE_EXTENSION_INIT1
|
||||
/* Max size of the error message in a CsvReader */
|
||||
#define CSV_MXERR 200
|
||||
|
||||
/* Size of the CsvReader input buffer */
|
||||
#define CSV_INBUFSZ 1024
|
||||
|
||||
/* A context object used when read a CSV file. */
|
||||
typedef struct CsvReader CsvReader;
|
||||
struct CsvReader {
|
||||
@ -61,18 +64,31 @@ struct CsvReader {
|
||||
int n; /* Number of bytes in z */
|
||||
int nAlloc; /* Space allocated for z[] */
|
||||
int nLine; /* Current line number */
|
||||
int cTerm; /* Character that terminated the most recent field */
|
||||
char cTerm; /* Character that terminated the most recent field */
|
||||
size_t iIn; /* Next unread character in the input buffer */
|
||||
size_t nIn; /* Number of characters in the input buffer */
|
||||
char *zIn; /* The input buffer */
|
||||
char zErr[CSV_MXERR]; /* Error message */
|
||||
};
|
||||
|
||||
/* Initialize a CsvReader object */
|
||||
static void csv_reader_init(CsvReader *p){
|
||||
memset(p, 0, sizeof(*p));
|
||||
p->in = 0;
|
||||
p->z = 0;
|
||||
p->n = 0;
|
||||
p->nAlloc = 0;
|
||||
p->nLine = 0;
|
||||
p->nIn = 0;
|
||||
p->zIn = 0;
|
||||
p->zErr[0] = 0;
|
||||
}
|
||||
|
||||
/* Close and reset a CsvReader object */
|
||||
static void csv_reader_reset(CsvReader *p){
|
||||
if( p->in ) fclose(p->in);
|
||||
if( p->in ){
|
||||
fclose(p->in);
|
||||
sqlite3_free(p->zIn);
|
||||
}
|
||||
sqlite3_free(p->z);
|
||||
csv_reader_init(p);
|
||||
}
|
||||
@ -88,15 +104,56 @@ static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
|
||||
/* Open the file associated with a CsvReader
|
||||
** Return the number of errors.
|
||||
*/
|
||||
static int csv_reader_open(CsvReader *p, const char *zFilename){
|
||||
p->in = fopen(zFilename, "rb");
|
||||
if( p->in==0 ){
|
||||
csv_errmsg(p, "cannot open '%s' for reading", zFilename);
|
||||
return 1;
|
||||
static int csv_reader_open(
|
||||
CsvReader *p, /* The reader to open */
|
||||
const char *zFilename, /* Read from this filename */
|
||||
const char *zData /* ... or use this data */
|
||||
){
|
||||
if( zFilename ){
|
||||
p->zIn = sqlite3_malloc( CSV_INBUFSZ );
|
||||
if( p->zIn==0 ){
|
||||
csv_errmsg(p, "out of memory");
|
||||
return 1;
|
||||
}
|
||||
p->in = fopen(zFilename, "rb");
|
||||
if( p->in==0 ){
|
||||
csv_reader_reset(p);
|
||||
csv_errmsg(p, "cannot open '%s' for reading", zFilename);
|
||||
return 1;
|
||||
}
|
||||
}else{
|
||||
assert( p->in==0 );
|
||||
p->zIn = (char*)zData;
|
||||
p->nIn = strlen(zData);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The input buffer has overflowed. Refill the input buffer, then
|
||||
** return the next character
|
||||
*/
|
||||
static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
|
||||
size_t got;
|
||||
|
||||
assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
|
||||
assert( p->in!=0 ); /* Only called if reading froma file */
|
||||
|
||||
got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
|
||||
if( got==0 ) return EOF;
|
||||
p->nIn = got;
|
||||
p->iIn = 1;
|
||||
return p->zIn[0];
|
||||
}
|
||||
|
||||
/* Return the next character of input. Return EOF at end of input. */
|
||||
static int csv_getc(CsvReader *p){
|
||||
if( p->iIn >= p->nIn ){
|
||||
if( p->in!=0 ) return csv_getc_refill(p);
|
||||
return EOF;
|
||||
}
|
||||
return p->zIn[p->iIn++];
|
||||
}
|
||||
|
||||
/* Increase the size of p->z and append character c to the end.
|
||||
** Return 0 on success and non-zero if there is an OOM error */
|
||||
static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
|
||||
@ -137,7 +194,7 @@ static int csv_append(CsvReader *p, char c){
|
||||
static char *csv_read_one_field(CsvReader *p){
|
||||
int c;
|
||||
p->n = 0;
|
||||
c = fgetc(p->in);
|
||||
c = csv_getc(p);
|
||||
if( c==EOF ){
|
||||
p->cTerm = EOF;
|
||||
return "";
|
||||
@ -148,7 +205,7 @@ static char *csv_read_one_field(CsvReader *p){
|
||||
int cQuote = c;
|
||||
pc = ppc = 0;
|
||||
while( 1 ){
|
||||
c = fgetc(p->in);
|
||||
c = csv_getc(p);
|
||||
if( c=='\n' ) p->nLine++;
|
||||
if( c==cQuote ){
|
||||
if( pc==cQuote ){
|
||||
@ -182,7 +239,7 @@ static char *csv_read_one_field(CsvReader *p){
|
||||
}else{
|
||||
while( c!=EOF && c!=',' && c!='\n' ){
|
||||
if( csv_append(p, (char)c) ) return 0;
|
||||
c = fgetc(p->in);
|
||||
c = csv_getc(p);
|
||||
}
|
||||
if( c=='\n' ){
|
||||
p->nLine++;
|
||||
@ -216,6 +273,7 @@ static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
|
||||
typedef struct CsvTable {
|
||||
sqlite3_vtab base; /* Base class. Must be first */
|
||||
char *zFilename; /* Name of the CSV file */
|
||||
char *zData; /* Raw CSV data in lieu of zFilename */
|
||||
long iStart; /* Offset to start of data in zFilename */
|
||||
int nCol; /* Number of columns in the CSV file */
|
||||
unsigned int tstFlags; /* Bit values used for testing */
|
||||
@ -290,6 +348,37 @@ static const char *csv_parameter(const char *zTag, int nTag, const char *z){
|
||||
return csv_skip_whitespace(z+1);
|
||||
}
|
||||
|
||||
/* Decode a parameter that requires a dequoted string.
|
||||
**
|
||||
** Return 1 if the parameter is seen, or 0 if not. 1 is returned
|
||||
** even if there is an error. If an error occurs, then an error message
|
||||
** is left in p->zErr. If there are no errors, p->zErr[0]==0.
|
||||
*/
|
||||
static int csv_string_parameter(
|
||||
CsvReader *p, /* Leave the error message here, if there is one */
|
||||
const char *zParam, /* Parameter we are checking for */
|
||||
const char *zArg, /* Raw text of the virtual table argment */
|
||||
char **pzVal /* Write the dequoted string value here */
|
||||
){
|
||||
const char *zValue;
|
||||
zValue = csv_parameter(zParam,strlen(zParam),zArg);
|
||||
if( zValue==0 ) return 0;
|
||||
p->zErr[0] = 0;
|
||||
if( *pzVal ){
|
||||
csv_errmsg(p, "more than one '%s' parameter", zParam);
|
||||
return 1;
|
||||
}
|
||||
*pzVal = sqlite3_mprintf("%s", zValue);
|
||||
if( *pzVal==0 ){
|
||||
csv_errmsg(p, "out of memory");
|
||||
return 1;
|
||||
}
|
||||
csv_trim_whitespace(*pzVal);
|
||||
csv_dequote(*pzVal);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* Return 0 if the argument is false and 1 if it is true. Return -1 if
|
||||
** we cannot really tell.
|
||||
*/
|
||||
@ -314,11 +403,12 @@ static int csv_boolean(const char *z){
|
||||
|
||||
/*
|
||||
** Parameters:
|
||||
** filename=FILENAME Required
|
||||
** filename=FILENAME Name of file containing CSV content
|
||||
** data=TEXT Direct CSV content.
|
||||
** schema=SCHEMA Alternative CSV schema.
|
||||
** header=YES|NO First row of CSV defines the names of
|
||||
** columns if "yes". Default "no".
|
||||
** columns=N Assum the CSV file contains N columns.
|
||||
** columns=N Assume the CSV file contains N columns.
|
||||
** testflags=N Bitmask of test flags. Optional
|
||||
**
|
||||
** If schema= is omitted, then the columns are named "c0", "c1", "c2",
|
||||
@ -336,37 +426,31 @@ static int csvtabConnect(
|
||||
CsvTable *pNew = 0; /* The CsvTable object to construct */
|
||||
int bHeader = -1; /* header= flags. -1 means not seen yet */
|
||||
int rc = SQLITE_OK; /* Result code from this routine */
|
||||
int i; /* Loop counter */
|
||||
char *zFilename = 0; /* Value of the filename= parameter */
|
||||
char *zSchema = 0; /* Value of the schema= parameter */
|
||||
int tstFlags = 0; /* Value of the testflags= parameter */
|
||||
int i, j; /* Loop counters */
|
||||
int tstFlags = 0; /* Value for testflags=N parameter */
|
||||
int nCol = -99; /* Value of the columns= parameter */
|
||||
CsvReader sRdr; /* A CSV file reader used to store an error
|
||||
** message and/or to count the number of columns */
|
||||
static const char *azParam[] = {
|
||||
"filename", "data", "schema",
|
||||
};
|
||||
char *azPValue[3]; /* Parameter values */
|
||||
# define CSV_FILENAME (azPValue[0])
|
||||
# define CSV_DATA (azPValue[1])
|
||||
# define CSV_SCHEMA (azPValue[2])
|
||||
|
||||
|
||||
assert( sizeof(azPValue)==sizeof(azParam) );
|
||||
memset(&sRdr, 0, sizeof(sRdr));
|
||||
memset(azPValue, 0, sizeof(azPValue));
|
||||
for(i=3; i<argc; i++){
|
||||
const char *z = argv[i];
|
||||
const char *zValue;
|
||||
if( (zValue = csv_parameter("filename",8,z))!=0 ){
|
||||
if( zFilename ){
|
||||
csv_errmsg(&sRdr, "more than one 'filename' parameter");
|
||||
goto csvtab_connect_error;
|
||||
}
|
||||
zFilename = sqlite3_mprintf("%s", zValue);
|
||||
if( zFilename==0 ) goto csvtab_connect_oom;
|
||||
csv_trim_whitespace(zFilename);
|
||||
csv_dequote(zFilename);
|
||||
}else
|
||||
if( (zValue = csv_parameter("schema",6,z))!=0 ){
|
||||
if( zSchema ){
|
||||
csv_errmsg(&sRdr, "more than one 'schema' parameter");
|
||||
goto csvtab_connect_error;
|
||||
}
|
||||
zSchema = sqlite3_mprintf("%s", zValue);
|
||||
if( zSchema==0 ) goto csvtab_connect_oom;
|
||||
csv_trim_whitespace(zSchema);
|
||||
csv_dequote(zSchema);
|
||||
for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
|
||||
if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
|
||||
}
|
||||
if( j<sizeof(azParam)/sizeof(azParam[0]) ){
|
||||
if( sRdr.zErr[0] ) goto csvtab_connect_error;
|
||||
}else
|
||||
if( (zValue = csv_parameter("header",6,z))!=0 ){
|
||||
int x;
|
||||
@ -403,11 +487,11 @@ static int csvtabConnect(
|
||||
goto csvtab_connect_error;
|
||||
}
|
||||
}
|
||||
if( zFilename==0 ){
|
||||
csv_errmsg(&sRdr, "missing 'filename' parameter");
|
||||
if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
|
||||
csv_errmsg(&sRdr, "must either filename= or data= but not both");
|
||||
goto csvtab_connect_error;
|
||||
}
|
||||
if( nCol<=0 && csv_reader_open(&sRdr, zFilename) ){
|
||||
if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
|
||||
goto csvtab_connect_error;
|
||||
}
|
||||
pNew = sqlite3_malloc( sizeof(*pNew) );
|
||||
@ -423,24 +507,26 @@ static int csvtabConnect(
|
||||
pNew->nCol++;
|
||||
}while( sRdr.cTerm==',' );
|
||||
}
|
||||
pNew->zFilename = zFilename;
|
||||
pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
|
||||
pNew->zData = CSV_DATA; CSV_DATA = 0;
|
||||
pNew->tstFlags = tstFlags;
|
||||
zFilename = 0;
|
||||
pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
|
||||
csv_reader_reset(&sRdr);
|
||||
if( zSchema==0 ){
|
||||
if( CSV_SCHEMA==0 ){
|
||||
char *zSep = "";
|
||||
zSchema = sqlite3_mprintf("CREATE TABLE x(");
|
||||
if( zSchema==0 ) goto csvtab_connect_oom;
|
||||
CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
|
||||
if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
|
||||
for(i=0; i<pNew->nCol; i++){
|
||||
zSchema = sqlite3_mprintf("%z%sc%d TEXT",zSchema, zSep, i);
|
||||
CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
|
||||
zSep = ",";
|
||||
}
|
||||
zSchema = sqlite3_mprintf("%z);", zSchema);
|
||||
CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
|
||||
}
|
||||
rc = sqlite3_declare_vtab(db, zSchema);
|
||||
rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
|
||||
if( rc ) goto csvtab_connect_error;
|
||||
sqlite3_free(zSchema);
|
||||
for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
|
||||
sqlite3_free(azPValue[i]);
|
||||
}
|
||||
return SQLITE_OK;
|
||||
|
||||
csvtab_connect_oom:
|
||||
@ -449,8 +535,9 @@ csvtab_connect_oom:
|
||||
|
||||
csvtab_connect_error:
|
||||
if( pNew ) csvtabDisconnect(&pNew->base);
|
||||
sqlite3_free(zFilename);
|
||||
sqlite3_free(zSchema);
|
||||
for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
|
||||
sqlite3_free(azPValue[i]);
|
||||
}
|
||||
if( sRdr.zErr[0] ){
|
||||
sqlite3_free(*pzErr);
|
||||
*pzErr = sqlite3_mprintf("%s", sRdr.zErr);
|
||||
@ -508,7 +595,7 @@ static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
|
||||
memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol );
|
||||
pCur->azVal = (char**)&pCur[1];
|
||||
*ppCursor = &pCur->base;
|
||||
if( csv_reader_open(&pCur->rdr, pTab->zFilename) ){
|
||||
if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
|
||||
csv_xfer_error(pTab, &pCur->rdr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
@ -597,12 +684,20 @@ static int csvtabFilter(
|
||||
CsvCursor *pCur = (CsvCursor*)pVtabCursor;
|
||||
CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
|
||||
pCur->iRowid = 0;
|
||||
fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
|
||||
if( pCur->rdr.in==0 ){
|
||||
assert( pCur->rdr.zIn==pTab->zData );
|
||||
assert( pTab->iStart<=pCur->rdr.nIn );
|
||||
pCur->rdr.iIn = pTab->iStart;
|
||||
}else{
|
||||
fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
|
||||
pCur->rdr.iIn = 0;
|
||||
pCur->rdr.nIn = 0;
|
||||
}
|
||||
return csvtabNext(pVtabCursor);
|
||||
}
|
||||
|
||||
/*
|
||||
** Only a forwards full table scan is supported. xBestIndex is mostly
|
||||
** Only a forward full table scan is supported. xBestIndex is mostly
|
||||
** a no-op. If CSVTEST_FIDX is set, then the presence of equality
|
||||
** constraints lowers the estimated cost, which is fiction, but is useful
|
||||
** for testing certain kinds of virtual table behavior.
|
||||
@ -618,10 +713,17 @@ static int csvtabBestIndex(
|
||||
if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/* The usual (an sensible) case is to take the "return SQLITE_OK" above.
|
||||
** The code below only runs when testflags=1. The following code
|
||||
/* The usual (and sensible) case is to take the "return SQLITE_OK" above.
|
||||
** The code below only runs when testflags=1. The code below
|
||||
** generates an artifical and unrealistic plan which is useful
|
||||
** for testing virtual table logic but is useless for real applications. */
|
||||
** for testing virtual table logic but is not helpfulto real applications.
|
||||
**
|
||||
** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
|
||||
** table (even though it is not) and the cost of running the virtual table
|
||||
** is reduced from 1 million to just 10. The constraints are *not* marked
|
||||
** as omittable, however, so the query planner should still generate a
|
||||
** plan that gives a correct answer, even if they plan is not optimal.
|
||||
*/
|
||||
for(i=0; i<pIdxInfo->nConstraint; i++){
|
||||
unsigned char op;
|
||||
if( pIdxInfo->aConstraint[i].usable==0 ) continue;
|
||||
|
12
manifest
12
manifest
@ -1,5 +1,5 @@
|
||||
C Fix\scorner\scases\sin\sthe\sWITHOUT\sROWID\svirtual\stable\slogic.
|
||||
D 2016-06-02T16:22:53.508
|
||||
C Add\sthe\sdata=\sparameter\sto\sthe\sCSV\svirtual\stable\sextension.
|
||||
D 2016-06-02T17:44:24.492
|
||||
F Makefile.in f59e0763ff448719fc1bd25513882b0567286317
|
||||
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
|
||||
F Makefile.msc 306d73e854b1a92ea06e5d1e637faa5c44de53c7
|
||||
@ -206,7 +206,7 @@ F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
|
||||
F ext/misc/amatch.c 211108e201105e4bb0c076527b8cfd34330fc234
|
||||
F ext/misc/closure.c 0d2a038df8fbae7f19de42e7c7d71f2e4dc88704
|
||||
F ext/misc/compress.c 122faa92d25033d6c3f07c39231de074ab3d2e83
|
||||
F ext/misc/csv.c 39060a352f43e43df575484cc0f23ab61091dac3
|
||||
F ext/misc/csv.c d4f6f0776bcaaed7929d6ef1e1261287b856e0da
|
||||
F ext/misc/eval.c f971962e92ebb8b0a4e6b62949463ee454d88fa2
|
||||
F ext/misc/fileio.c d4171c815d6543a9edef8308aab2951413cd8d0f
|
||||
F ext/misc/fuzzer.c 7c64b8197bb77b7d64eff7cac7848870235d4c25
|
||||
@ -1497,7 +1497,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
|
||||
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
|
||||
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
|
||||
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
|
||||
P 49638f180e26477974cacc69b79e0be0a5e18b29
|
||||
R cf24c2970498f36ff11e1ac85a33493a
|
||||
P a393bbb972660c1ffcdda923d0f3564ecfcd2f0a
|
||||
R 47b04ec69529b5020af652ca946943fd
|
||||
U drh
|
||||
Z 5a84619c2e2d24987b3ea5aba48efb6c
|
||||
Z ea4616a356c99262b96b6fda702a53cc
|
||||
|
@ -1 +1 @@
|
||||
a393bbb972660c1ffcdda923d0f3564ecfcd2f0a
|
||||
769191042aa14e6eccdfe2391fc1011171d5c9ad
|
Loading…
Reference in New Issue
Block a user