Add the "%token_class" directive to the LEMON parser generator. This opens up

the possibility of simplifying the parser.  Also remove all calls to 
sprintf(), strcpy(), and strcat() from LEMON to avoid compiler warnings
on OpenBSD.  (Aside:  It is this change to avoid harmless compiler warnings
that was the cause of the reason spat of bugs.)

FossilOrigin-Name: 8eb48c04bd0a14031488b3160fde67307eb8b35d
This commit is contained in:
drh 2014-01-11 03:27:37 +00:00
commit ecaa9d399c
3 changed files with 195 additions and 42 deletions

View File

@ -1,5 +1,5 @@
C Remove\sunused\sstructure\sdefinition\sfrom\sparse.y.
D 2014-01-10T20:51:16.498
C Add\sthe\s"%token_class"\sdirective\sto\sthe\sLEMON\sparser\sgenerator.\s\sThis\sopens\sup\nthe\spossibility\sof\ssimplifying\sthe\sparser.\s\sAlso\sremove\sall\scalls\sto\s\nsprintf(),\sstrcpy(),\sand\sstrcat()\sfrom\sLEMON\sto\savoid\scompiler\swarnings\non\sOpenBSD.\s\s(Aside:\s\sIt\sis\sthis\schange\sto\savoid\sharmless\scompiler\swarnings\nthat\swas\sthe\scause\sof\sthe\sreason\sspat\sof\sbugs.)
D 2014-01-11T03:27:37.624
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2ef13430cd359f7b361bb863504e227b25cc7f81
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -1109,7 +1109,7 @@ F tool/fragck.tcl 5265a95126abcf6ab357f7efa544787e5963f439
F tool/genfkey.README cf68fddd4643bbe3ff8e31b8b6d8b0a1b85e20f4
F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5
F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce
F tool/lemon.c 796930d5fc2036c7636f3f1ee12f9ae03719a2eb
F tool/lemon.c 624b24c5dc048e09979f88a03e148bc728c70b73
F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc
F tool/logest.c 7ad625cac3d54012b27d468b7af6612f78b9ba75
F tool/mkautoconfamal.sh f8d8dbf7d62f409ebed5134998bf5b51d7266383
@ -1148,7 +1148,7 @@ F tool/vdbe-compress.tcl 0cf56e9263a152b84da86e75a5c0cdcdb7a47891
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01
F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff
P c9ea7d199f06a7801ab639e7ac98ebeb98706f24
R b8682b1b95c61647eceb7cac2c7aa836
P 7f1e7ae313c7625ef2623d78883dce776eecca30 da7890ca6b1d8e511377a469047120220e8c3b2d
R 4ba60b3fb47af12fdb784f3275ccdc2b
U drh
Z 2b3078d680e621e51a9cb7dd08f4a256
Z 11bef21bd681893c7b5fd9fec7621236

View File

@ -1 +1 @@
7f1e7ae313c7625ef2623d78883dce776eecca30
8eb48c04bd0a14031488b3160fde67307eb8b35d

View File

@ -50,6 +50,107 @@ static char *msort(char*,char**,int(*)(const char*,const char*));
*/
#define lemonStrlen(X) ((int)strlen(X))
/*
** Compilers are starting to complain about the use of sprintf() and strcpy(),
** saying they are unsafe. So we define our own versions of those routines too.
**
** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and
** lemon_addtext(). The first two are replacements for sprintf() and vsprintf().
** The third is a helper routine for vsnprintf() that adds texts to the end of a
** buffer, making sure the buffer is always zero-terminated.
**
** The string formatter is a minimal subset of stdlib sprintf() supporting only
** a few simply conversions:
**
** %d
** %s
** %.*s
**
*/
static void lemon_addtext(
char *zBuf, /* The buffer to which text is added */
int *pnUsed, /* Slots of the buffer used so far */
const char *zIn, /* Text to add */
int nIn, /* Bytes of text to add. -1 to use strlen() */
int iWidth /* Field width. Negative to left justify */
){
if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){}
while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; }
if( nIn==0 ) return;
memcpy(&zBuf[*pnUsed], zIn, nIn);
*pnUsed += nIn;
while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; }
zBuf[*pnUsed] = 0;
}
static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){
int i, j, k, c, size;
int nUsed = 0;
const char *z;
char zTemp[50];
str[0] = 0;
for(i=j=0; (c = zFormat[i])!=0; i++){
if( c=='%' ){
int iWidth = 0;
lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0);
c = zFormat[++i];
if( isdigit(c) || (c=='-' && isdigit(zFormat[i+1])) ){
if( c=='-' ) i++;
while( isdigit(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0';
if( c=='-' ) iWidth = -iWidth;
c = zFormat[i];
}
if( c=='d' ){
int v = va_arg(ap, int);
if( v<0 ){
lemon_addtext(str, &nUsed, "-", 1, iWidth);
v = -v;
}else if( v==0 ){
lemon_addtext(str, &nUsed, "0", 1, iWidth);
}
k = 0;
while( v>0 ){
k++;
zTemp[sizeof(zTemp)-k] = (v%10) + '0';
v /= 10;
}
lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth);
}else if( c=='s' ){
z = va_arg(ap, const char*);
lemon_addtext(str, &nUsed, z, -1, iWidth);
}else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){
i += 2;
k = va_arg(ap, int);
z = va_arg(ap, const char*);
lemon_addtext(str, &nUsed, z, k, iWidth);
}else if( c=='%' ){
lemon_addtext(str, &nUsed, "%", 1, 0);
}else{
fprintf(stderr, "illegal format\n");
exit(1);
}
j = i+1;
}
}
lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0);
return nUsed;
}
static int lemon_sprintf(char *str, const char *format, ...){
va_list ap;
int rc;
va_start(ap, format);
rc = lemon_vsprintf(str, format, ap);
va_end(ap);
return rc;
}
static void lemon_strcpy(char *dest, const char *src){
while( (*(dest++) = *(src++))!=0 ){}
}
static void lemon_strcat(char *dest, const char *src){
while( *dest ) dest++;
lemon_strcpy(dest, src);
}
/* a few forward declarations... */
struct rule;
struct lemon;
@ -1367,7 +1468,7 @@ static void handle_D_option(char *z){
fprintf(stderr,"out of memory\n");
exit(1);
}
strcpy(*paz, z);
lemon_strcpy(*paz, z);
for(z=*paz; *z && *z!='='; z++){}
*z = 0;
}
@ -1378,7 +1479,7 @@ static void handle_T_option(char *z){
if( user_templatename==0 ){
memory_error();
}
strcpy(user_templatename, z);
lemon_strcpy(user_templatename, z);
}
/* The main program. Parse the command line and do it... */
@ -1447,12 +1548,15 @@ int main(int argc, char **argv)
}
/* Count and index the symbols of the grammar */
lem.nsymbol = Symbol_count();
Symbol_new("{default}");
lem.nsymbol = Symbol_count();
lem.symbols = Symbol_arrayof();
for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp);
for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i;
qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp);
for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i;
while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; }
assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 );
lem.nsymbol = i - 1;
for(i=1; isupper(lem.symbols[i]->name[0]); i++);
lem.nterminal = i;
@ -1940,7 +2044,9 @@ enum e_state {
WAITING_FOR_DESTRUCTOR_SYMBOL,
WAITING_FOR_DATATYPE_SYMBOL,
WAITING_FOR_FALLBACK_ID,
WAITING_FOR_WILDCARD_ID
WAITING_FOR_WILDCARD_ID,
WAITING_FOR_CLASS_ID,
WAITING_FOR_CLASS_TOKEN
};
struct pstate {
char *filename; /* Name of the input file */
@ -1950,6 +2056,7 @@ struct pstate {
struct lemon *gp; /* Global state vector */
enum e_state state; /* The state of the parser */
struct symbol *fallback; /* The fallback token */
struct symbol *tkclass; /* Token class symbol */
struct symbol *lhs; /* Left-hand side of current rule */
const char *lhsalias; /* Alias for the LHS */
int nrhs; /* Number of right-hand side symbols seen */
@ -2254,6 +2361,8 @@ to follow the previous rule.");
psp->state = WAITING_FOR_FALLBACK_ID;
}else if( strcmp(x,"wildcard")==0 ){
psp->state = WAITING_FOR_WILDCARD_ID;
}else if( strcmp(x,"token_class")==0 ){
psp->state = WAITING_FOR_CLASS_ID;
}else{
ErrorMsg(psp->filename,psp->tokenlineno,
"Unknown declaration keyword: \"%%%s\".",x);
@ -2347,7 +2456,7 @@ to follow the previous rule.");
for(z=psp->filename, nBack=0; *z; z++){
if( *z=='\\' ) nBack++;
}
sprintf(zLine, "#line %d ", psp->tokenlineno);
lemon_sprintf(zLine, "#line %d ", psp->tokenlineno);
nLine = lemonStrlen(zLine);
n += nLine + lemonStrlen(psp->filename) + nBack;
}
@ -2422,6 +2531,40 @@ to follow the previous rule.");
}
}
break;
case WAITING_FOR_CLASS_ID:
if( !islower(x[0]) ){
ErrorMsg(psp->filename, psp->tokenlineno,
"%%token_class must be followed by an identifier: ", x);
psp->errorcnt++;
psp->state = RESYNC_AFTER_DECL_ERROR;
}else if( Symbol_find(x) ){
ErrorMsg(psp->filename, psp->tokenlineno,
"Symbol \"%s\" already used", x);
psp->errorcnt++;
psp->state = RESYNC_AFTER_DECL_ERROR;
}else{
psp->tkclass = Symbol_new(x);
psp->tkclass->type = MULTITERMINAL;
psp->state = WAITING_FOR_CLASS_TOKEN;
}
break;
case WAITING_FOR_CLASS_TOKEN:
if( x[0]=='.' ){
psp->state = WAITING_FOR_DECL_OR_RULE;
}else if( isupper(x[0]) || ((x[0]=='|' || x[0]=='/') && isupper(x[1])) ){
struct symbol *msp = psp->tkclass;
msp->nsubsym++;
msp->subsym = (struct symbol **) realloc(msp->subsym,
sizeof(struct symbol*)*msp->nsubsym);
if( !isupper(x[0]) ) x++;
msp->subsym[msp->nsubsym-1] = Symbol_new(x);
}else{
ErrorMsg(psp->filename, psp->tokenlineno,
"%%token_class argument \"%s\" should be a token", x);
psp->errorcnt++;
psp->state = RESYNC_AFTER_DECL_ERROR;
}
break;
case RESYNC_AFTER_RULE_ERROR:
/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
** break; */
@ -2716,10 +2859,10 @@ PRIVATE char *file_makename(struct lemon *lemp, const char *suffix)
fprintf(stderr,"Can't allocate space for a filename.\n");
exit(1);
}
strcpy(name,lemp->filename);
lemon_strcpy(name,lemp->filename);
cp = strrchr(name,'.');
if( cp ) *cp = 0;
strcat(name,suffix);
lemon_strcat(name,suffix);
return name;
}
@ -2776,11 +2919,13 @@ void Reprint(struct lemon *lemp)
printf(" ::=");
for(i=0; i<rp->nrhs; i++){
sp = rp->rhs[i];
printf(" %s", sp->name);
if( sp->type==MULTITERMINAL ){
printf(" %s", sp->subsym[0]->name);
for(j=1; j<sp->nsubsym; j++){
printf("|%s", sp->subsym[j]->name);
}
}else{
printf(" %s", sp->name);
}
/* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
}
@ -2802,11 +2947,13 @@ void ConfigPrint(FILE *fp, struct config *cfp)
if( i==cfp->dot ) fprintf(fp," *");
if( i==rp->nrhs ) break;
sp = rp->rhs[i];
fprintf(fp," %s", sp->name);
if( sp->type==MULTITERMINAL ){
fprintf(fp," %s", sp->subsym[0]->name);
for(j=1; j<sp->nsubsym; j++){
fprintf(fp,"|%s",sp->subsym[j]->name);
}
}else{
fprintf(fp," %s", sp->name);
}
}
}
@ -2916,7 +3063,7 @@ void ReportOutput(struct lemon *lemp)
while( cfp ){
char buf[20];
if( cfp->dot==cfp->rp->nrhs ){
sprintf(buf,"(%d)",cfp->rp->index);
lemon_sprintf(buf,"(%d)",cfp->rp->index);
fprintf(fp," %5s ",buf);
}else{
fprintf(fp," ");
@ -2981,7 +3128,7 @@ PRIVATE char *pathsearch(char *argv0, char *name, int modemask)
c = *cp;
*cp = 0;
path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 );
if( path ) sprintf(path,"%s/%s",argv0,name);
if( path ) lemon_sprintf(path,"%s/%s",argv0,name);
*cp = c;
}else{
pathlist = getenv("PATH");
@ -2990,13 +3137,13 @@ PRIVATE char *pathsearch(char *argv0, char *name, int modemask)
path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 );
if( (pathbuf != 0) && (path!=0) ){
pathbufptr = pathbuf;
strcpy(pathbuf, pathlist);
lemon_strcpy(pathbuf, pathlist);
while( *pathbuf ){
cp = strchr(pathbuf,':');
if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)];
c = *cp;
*cp = 0;
sprintf(path,"%s/%s",pathbuf,name);
lemon_sprintf(path,"%s/%s",pathbuf,name);
*cp = c;
if( c==0 ) pathbuf[0] = 0;
else pathbuf = &cp[1];
@ -3087,9 +3234,9 @@ PRIVATE FILE *tplt_open(struct lemon *lemp)
cp = strrchr(lemp->filename,'.');
if( cp ){
sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename);
lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename);
}else{
sprintf(buf,"%s.lt",lemp->filename);
lemon_sprintf(buf,"%s.lt",lemp->filename);
}
if( access(buf,004)==0 ){
tpltname = buf;
@ -3240,9 +3387,9 @@ PRIVATE char *append_str(const char *zText, int n, int p1, int p2){
while( n-- > 0 ){
c = *(zText++);
if( c=='%' && n>0 && zText[0]=='d' ){
sprintf(zInt, "%d", p1);
lemon_sprintf(zInt, "%d", p1);
p1 = p2;
strcpy(&z[used], zInt);
lemon_strcpy(&z[used], zInt);
used += lemonStrlen(&z[used]);
zText++;
n--;
@ -3467,7 +3614,7 @@ void print_stack_union(
fprintf(stderr,"Out of memory.\n");
exit(1);
}
strcpy(types[hash],stddt);
lemon_strcpy(types[hash],stddt);
}
}
@ -3553,9 +3700,11 @@ static void writeRuleText(FILE *out, struct rule *rp){
fprintf(out,"%s ::=", rp->lhs->name);
for(j=0; j<rp->nrhs; j++){
struct symbol *sp = rp->rhs[j];
fprintf(out," %s", sp->name);
if( sp->type==MULTITERMINAL ){
if( sp->type!=MULTITERMINAL ){
fprintf(out," %s", sp->name);
}else{
int k;
fprintf(out," %s", sp->subsym[0]->name);
for(k=1; k<sp->nsubsym; k++){
fprintf(out,"|%s",sp->subsym[k]->name);
}
@ -3856,7 +4005,7 @@ void ReportTable(
/* Generate a table containing the symbolic name of every symbol
*/
for(i=0; i<lemp->nsymbol; i++){
sprintf(line,"\"%s\",",lemp->symbols[i]->name);
lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name);
fprintf(out," %-15s",line);
if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
}
@ -4023,7 +4172,8 @@ void ReportHeader(struct lemon *lemp)
if( in ){
int nextChar;
for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
lemon_sprintf(pattern,"#define %s%-30s %3d\n",
prefix,lemp->symbols[i]->name,i);
if( strcmp(line,pattern) ) break;
}
nextChar = fgetc(in);
@ -4036,7 +4186,7 @@ void ReportHeader(struct lemon *lemp)
out = file_open(lemp,".h","wb");
if( out ){
for(i=1; i<lemp->nterminal; i++){
fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i);
}
fclose(out);
}
@ -4253,7 +4403,7 @@ const char *Strsafe(const char *y)
if( y==0 ) return 0;
z = Strsafe_find(y);
if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){
strcpy(cpy,y);
lemon_strcpy(cpy,y);
z = cpy;
Strsafe_insert(z);
}
@ -4406,11 +4556,15 @@ struct symbol *Symbol_new(const char *x)
return sp;
}
/* Compare two symbols for working purposes
/* Compare two symbols for sorting purposes. Return negative,
** zero, or positive if a is less then, equal to, or greater
** than b.
**
** Symbols that begin with upper case letters (terminals or tokens)
** must sort before symbols that begin with lower case letters
** (non-terminals). Other than that, the order does not matter.
** (non-terminals). And MULTITERMINAL symbols (created using the
** %token_class directive) must sort at the very end. Other than
** that, the order does not matter.
**
** We find experimentally that leaving the symbols in their original
** order (the order they appeared in the grammar file) gives the
@ -4418,12 +4572,11 @@ struct symbol *Symbol_new(const char *x)
*/
int Symbolcmpp(const void *_a, const void *_b)
{
const struct symbol **a = (const struct symbol **) _a;
const struct symbol **b = (const struct symbol **) _b;
int i1 = (**a).index + 10000000*((**a).name[0]>'Z');
int i2 = (**b).index + 10000000*((**b).name[0]>'Z');
assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 );
return i1-i2;
const struct symbol *a = *(const struct symbol **) _a;
const struct symbol *b = *(const struct symbol **) _b;
int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1;
int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1;
return i1==i2 ? a->index - b->index : i1 - i2;
}
/* There is one instance of the following structure for each