Add tests to restore coverage of fts5_tokenizer.c.

FossilOrigin-Name: 8f9257361b05e368bf433e56d0698923b0f97d12e7c0ad7760aaab6746c0e467
This commit is contained in:
dan 2024-08-17 17:22:49 +00:00
parent 342984075b
commit b651084713
8 changed files with 146 additions and 45 deletions

View File

@ -628,6 +628,9 @@ proc print_categories {lMap} {
$caseP
$caseS
$caseZ
default:
return 1;
}
return 0;
}

View File

@ -79,7 +79,7 @@ static int fts5AsciiCreate(
int i;
memset(p, 0, sizeof(AsciiTokenizer));
memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
fts5AsciiAddExceptions(p, zArg, 1);
@ -90,7 +90,6 @@ static int fts5AsciiCreate(
rc = SQLITE_ERROR;
}
}
if( rc==SQLITE_OK && i<nArg ) rc = SQLITE_ERROR;
if( rc!=SQLITE_OK ){
fts5AsciiDelete((Fts5Tokenizer*)p);
p = 0;
@ -382,7 +381,7 @@ static int fts5UnicodeCreate(
}
/* Search for a "categories" argument */
for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
if( 0==sqlite3_stricmp(azArg[i], "categories") ){
zCat = azArg[i+1];
}
@ -391,7 +390,7 @@ static int fts5UnicodeCreate(
rc = unicodeSetCategories(p, zCat);
}
for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
@ -416,8 +415,6 @@ static int fts5UnicodeCreate(
rc = SQLITE_ERROR;
}
}
if( i<nArg && rc==SQLITE_OK ) rc = SQLITE_ERROR;
}else{
rc = SQLITE_NOMEM;
}
@ -1294,41 +1291,46 @@ static int fts5TriCreate(
Fts5Tokenizer **ppOut
){
int rc = SQLITE_OK;
TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew));
TrigramTokenizer *pNew = 0;
UNUSED_PARAM(pUnused);
if( pNew==0 ){
rc = SQLITE_NOMEM;
if( nArg%2 ){
rc = SQLITE_ERROR;
}else{
int i;
pNew->bFold = 1;
pNew->iFoldParam = 0;
for(i=0; rc==SQLITE_OK && i<nArg-1; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
rc = SQLITE_ERROR;
pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew));
if( pNew==0 ){
rc = SQLITE_NOMEM;
}else{
pNew->bFold = 1;
pNew->iFoldParam = 0;
for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
const char *zArg = azArg[i+1];
if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
rc = SQLITE_ERROR;
}else{
pNew->bFold = (zArg[0]=='0');
}
}else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
rc = SQLITE_ERROR;
}else{
pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0;
}
}else{
pNew->bFold = (zArg[0]=='0');
}
}else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
rc = SQLITE_ERROR;
}else{
pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0;
}
}else{
}
if( pNew->iFoldParam!=0 && pNew->bFold==0 ){
rc = SQLITE_ERROR;
}
}
if( i<nArg && rc==SQLITE_OK ) rc = SQLITE_ERROR;
if( pNew->iFoldParam!=0 && pNew->bFold==0 ){
rc = SQLITE_ERROR;
}
if( rc!=SQLITE_OK ){
fts5TriDelete((Fts5Tokenizer*)pNew);
pNew = 0;
if( rc!=SQLITE_OK ){
fts5TriDelete((Fts5Tokenizer*)pNew);
pNew = 0;
}
}
}
*ppOut = (Fts5Tokenizer*)pNew;

View File

@ -364,6 +364,9 @@ int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){
default: return 1; }
break;
default:
return 1;
}
return 0;
}

View File

@ -55,5 +55,22 @@ do_execsql_test 1.5 {
SELECT * FROM t4t
} {สนามกีฬา 1 1}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 2.0 "
CREATE VIRTUAL TABLE x1 USING fts5(c,
tokenize=\"unicode61 categories ' \t'\");
"
do_catchsql_test 2.1 "
CREATE VIRTUAL TABLE x2 USING fts5(c,
tokenize=\"unicode61 categories 'N*\t\tMYZ'\");
" {1 {error in tokenizer constructor}}
do_catchsql_test 2.2 "
CREATE VIRTUAL TABLE x2 USING fts5(c,
tokenize=\"unicode61 categories 'N*\t\tXYZ'\");
" {1 {error in tokenizer constructor}}
finish_test

View File

@ -286,5 +286,61 @@ do_execsql_test 9.3 {
SELECT rowid FROM t1 WHERE a12 LIKE NULL
} {}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 10.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=trigram);
}
do_test 10.1 {
foreach {val} {
"abc \UFFjkl\UFF"
"abc \UFFFjkl\UFFF"
"abc \UFFFFjkl\UFFFF"
"abc \UFFFFFjkl\UFFFFF"
"\UFFjkl\UFF abc"
"\UFFFjkl\UFFF abc"
"\UFFFFjkl\UFFFF abc"
"\UFFFFFjkl\UFFFFF abc"
"\U10001jkl\U10001 abc"
} {
execsql { INSERT INTO t1 VALUES( $val ) }
}
} {}
do_test 10.2 {
foreach {val} {
X'E18000626320646566'
X'61EDA0806320646566'
X'61EDA0806320646566'
X'61EFBFBE6320646566'
X'76686920E18000626320646566'
X'7668692061EDA0806320646566'
X'7668692061EDA0806320646566'
X'7668692061EFBFBE6320646566'
} {
execsql " INSERT INTO t1 VALUES( $val ) "
}
} {}
do_test 10.3 {
set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
INSERT INTO t1 VALUES('abcd' || $a);
INSERT INTO t1 VALUES('abcd' || $b);
INSERT INTO t1 VALUES('abcd' || $c);
INSERT INTO t1 VALUES('abcd' || $d);
}
} {}
finish_test

View File

@ -470,4 +470,24 @@ do_execsql_test 8.2.3 {
SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
} {2 4}
#-------------------------------------------------------------------------
foreach {tn val bErr} {
1 0 0
2 1 0
3 2 0
4 3 1
5 11 1
} {
reset_db
set aRes(0) {0 {}}
set aRes(1) {1 {error in tokenizer constructor}}
set res $aRes($bErr)
do_catchsql_test 9.1.$tn "
CREATE VIRTUAL TABLE bl USING fts5(
s, tokenize='trigram remove_diacritics $val'
);
" $res
}
finish_test

View File

@ -1,5 +1,5 @@
C Add\stests\sto\srestore\scoverage\sof\sfts5_config.c.
D 2024-08-16T19:05:47.160
C Add\stests\sto\srestore\scoverage\sof\sfts5_tokenizer.c.
D 2024-08-17T17:22:49.665
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@ -89,7 +89,7 @@ F ext/fts3/tool/fts3cov.sh c331d006359456cf6f8f953e37f2b9c7d568f3863f00bb5f7eb87
F ext/fts3/tool/fts3view.c 413c346399159df81f86c4928b7c4a455caab73bfbc8cd68f950f632e5751674
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/mkunicode.tcl 63db9624ccf70d4887836c320eda93ab552f21008f3be7ede551eac3ead62baa
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
F ext/fts5/fts5.h 4c6998c6186268b4dbe9baef2c0d2ab974bd90996d61d4dbe801367249be6de4
@ -105,8 +105,8 @@ F ext/fts5/fts5_storage.c acece8abf1b4b2012bdad93aa2d2cf6fc0d1da919763479c7055f6
F ext/fts5/fts5_tcl.c 1dcf08028141c40a32634bdcf2d5601622ce4edc48f82ac4ce0cbe0a92a6961d
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c 96efa85a21a85276680ce3fb19dc5dd8d6b6541b2c37f953ee55bc15092262e1
F ext/fts5/fts5_unicode2.c eca63dbc797f8ff0572e97caf4631389c0ab900d6364861b915bdd4735973f00
F ext/fts5/fts5_tokenize.c ae9c4fa93174ef06ffc138bd4280a1c37f7e13624d3d2706aad4b80573f23c41
F ext/fts5/fts5_unicode2.c 6f9b0fb79a8facaed76628ffd4eb9c16d7f2b84b52872784f617cf3422a9b043
F ext/fts5/fts5_varint.c e64d2113f6e1bfee0032972cffc1207b77af63319746951bf1d09885d1dadf80
F ext/fts5/fts5_vocab.c e4830b00809e5da53bc10f93adc59e321407b0f801c7f4167c0e47f5552267e0
F ext/fts5/fts5parse.y eb526940f892ade5693f22ffd6c4f2702543a9059942772526eac1fde256bb05
@ -133,7 +133,7 @@ F ext/fts5/test/fts5bigid.test 2860854c2561a57594192b00c33a29f91cb85e25f3d6c03b5
F ext/fts5/test/fts5bigpl.test 8f09858aab866c33593560e6480b2b6975ae7ff29ca32ad7b77e2da61402f8ef
F ext/fts5/test/fts5bigtok.test 541119e616c637caea925a8c028c37c2c29e94383e00aa2f9198d530724b6e36
F ext/fts5/test/fts5blob.test caa33369e93e99ff494cd1103506ae34c5afbc0bcc369ed5e58e135144e33689
F ext/fts5/test/fts5cat.test daba0b80659460b0cb60bd1f40b402478a761fe7ea414c3c94c2be25568cc33a
F ext/fts5/test/fts5cat.test bf67dd335f964482ee658287521b81e2b88697b45eb7f73933e15f198ed447cb
F ext/fts5/test/fts5circref.test f880dfd0d99f6fb73b88ccacb0927d18e833672fd906cc47d6b4e529419eaa62
F ext/fts5/test/fts5colset.test 544f4998cdbfe06a3123887fc0221612e8aa8192cdaff152872f1aadb10e6897
F ext/fts5/test/fts5columnsize.test 0af91d63985afdf663455d4b572b935238380140d74079eac362760866d3297b
@ -235,12 +235,12 @@ F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2
F ext/fts5/test/fts5tokenizer.test 7937cec672b148223fff8746d21d3e7ed0965fd7caf35ccdc888a005bb452f98
F ext/fts5/test/fts5tokenizer2.test ddb8b10fbe4b84b2a75812671f127774c1d2e3e2bf82d2e0e4f0bb1cd8a2b2d6
F ext/fts5/test/fts5tokenizer3.test eea778f7bb7024c3e904e28915f9d53286141671b138722148be22a9c758bdc3
F ext/fts5/test/fts5trigram.test f99fc0235e967826e556de9aee959d3cb8eae2e9abff37b5fe2732fd7052dac2
F ext/fts5/test/fts5trigram.test fcb6b93510b917add63aee8f363c466ac406528822951e76e0980ce6f5170978
F ext/fts5/test/fts5trigram2.test 6fde9de7f63a6b4aa18dc731be56dbd6be4e755c9b13dcd55479e200d1df0e61
F ext/fts5/test/fts5ubsan.test 9a2dcf399dc8d0e0de661f0d93884d1d27e5b7f0693cfceb97dd24d818df5dd2
F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602
F ext/fts5/test/fts5unicode.test 41898f7e476e6515cd4b737c02a442cda5a580a74509788aa9072a2074948e0e
F ext/fts5/test/fts5unicode2.test a5c38179b311a188b24376772309389b073c996f52b79bb9ca760a19e62043ea
F ext/fts5/test/fts5unicode2.test 3bbd30152f9f760bf13886e5b1e5ec23ff62f56758ddda5d9c775a6082fb4c7c
F ext/fts5/test/fts5unicode3.test f4891a3dac3b49c3d7c0fdb29566e9eb0ecff35263370c89f9661b1952b20818
F ext/fts5/test/fts5unicode4.test 728c8f0caafb05567f524ad313d9f8b780fa45987b8a8df04eff87923c74b4d0
F ext/fts5/test/fts5unindexed.test 168838d2c385e131120bbf5b516d2432a5fabc4caa2259c932e1d49ae209a4ae
@ -2208,8 +2208,8 @@ F vsixtest/vsixtest.tcl 6195aba1f12a5e10efc2b8c0009532167be5e301abe5b31385638080
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P 38841881a08f4d75c2dfcbc1b2b45e311d7dda1287780f8ba1c8388f7adc1629
R d19b3ba3504e0807fa9fc6488171b084
P 9d971b31df7ad4d68eb348f95d8f996071cf87d41c47033bde3fcc4dba732e06
R 00d1f889322bc8b1a7560fd99767ae0b
U dan
Z 92294a85664ab83f8a52ccaa818e0f8f
Z 653dde5241b23832e0da59a6216ceb6f
# Remove this line to create a well-formed Fossil manifest.

View File

@ -1 +1 @@
9d971b31df7ad4d68eb348f95d8f996071cf87d41c47033bde3fcc4dba732e06
8f9257361b05e368bf433e56d0698923b0f97d12e7c0ad7760aaab6746c0e467