
- At the last minute and for no particularly good reason, I changed the WITHOUT token to be marked especially for lookahead, from the one in WITHOUT TIME to the one in WITHOUT UNIQUE. Study of upcoming patches (where a new WITHOUT ARRAY WRAPPER clause is added) showed me that the former was better, so put it back the way the original patch had it. - update exprTypmod() for JsonConstructorExpr to return the typmod of the RETURNING clause, as a comment there suggested. Perhaps it's possible for this to make a difference with datetime types, but I didn't try to build a test case. - The nodeFuncs.c support code for new nodes was calling walker() directly instead of the WALK() macro as introduced by commit 1c27d16e6e5c. Modernize that. Also add exprLocation() support for a couple of nodes that missed it. Lastly, reorder the code more sensibly. The WITHOUT_LA -> WITHOUT change means that stored rules containing either WITHOUT TIME ZONE or WITHOUT UNIQUE KEYS would change representation. Therefore, bump catversion. Discussion: https://postgr.es/m/20230329181708.e64g2tpy7jyufqkr@alvherre.pgsql
254 lines
5.9 KiB
C
254 lines
5.9 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* parser.c
|
|
* Main entry point/driver for PostgreSQL grammar
|
|
*
|
|
* This should match src/backend/parser/parser.c, except that we do not
|
|
* need to bother with re-entrant interfaces.
|
|
*
|
|
* Note: ECPG doesn't report error location like the backend does.
|
|
* This file will need work if we ever want it to.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/interfaces/ecpg/preproc/parser.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres_fe.h"
|
|
|
|
#include "preproc_extern.h"
|
|
#include "preproc.h"
|
|
|
|
|
|
static bool have_lookahead; /* is lookahead info valid? */
|
|
static int lookahead_token; /* one-token lookahead */
|
|
static YYSTYPE lookahead_yylval; /* yylval for lookahead token */
|
|
static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
|
|
static char *lookahead_yytext; /* start current token */
|
|
|
|
static bool check_uescapechar(unsigned char escape);
|
|
static bool ecpg_isspace(char ch);
|
|
|
|
|
|
/*
|
|
* Intermediate filter between parser and base lexer (base_yylex in scan.l).
|
|
*
|
|
* This filter is needed because in some cases the standard SQL grammar
|
|
* requires more than one token lookahead. We reduce these cases to one-token
|
|
* lookahead by replacing tokens here, in order to keep the grammar LALR(1).
|
|
*
|
|
* Using a filter is simpler than trying to recognize multiword tokens
|
|
* directly in scan.l, because we'd have to allow for comments between the
|
|
* words. Furthermore it's not clear how to do that without re-introducing
|
|
* scanner backtrack, which would cost more performance than this filter
|
|
* layer does.
|
|
*
|
|
* We also use this filter to convert UIDENT and USCONST sequences into
|
|
* plain IDENT and SCONST tokens. While that could be handled by additional
|
|
* productions in the main grammar, it's more efficient to do it like this.
|
|
*/
|
|
int
|
|
filtered_base_yylex(void)
|
|
{
|
|
int cur_token;
|
|
int next_token;
|
|
YYSTYPE cur_yylval;
|
|
YYLTYPE cur_yylloc;
|
|
char *cur_yytext;
|
|
|
|
/* Get next token --- we might already have it */
|
|
if (have_lookahead)
|
|
{
|
|
cur_token = lookahead_token;
|
|
base_yylval = lookahead_yylval;
|
|
base_yylloc = lookahead_yylloc;
|
|
base_yytext = lookahead_yytext;
|
|
have_lookahead = false;
|
|
}
|
|
else
|
|
cur_token = base_yylex();
|
|
|
|
/*
|
|
* If this token isn't one that requires lookahead, just return it.
|
|
*/
|
|
switch (cur_token)
|
|
{
|
|
case FORMAT:
|
|
case NOT:
|
|
case NULLS_P:
|
|
case WITH:
|
|
case WITHOUT:
|
|
case UIDENT:
|
|
case USCONST:
|
|
break;
|
|
default:
|
|
return cur_token;
|
|
}
|
|
|
|
/* Save and restore lexer output variables around the call */
|
|
cur_yylval = base_yylval;
|
|
cur_yylloc = base_yylloc;
|
|
cur_yytext = base_yytext;
|
|
|
|
/* Get next token, saving outputs into lookahead variables */
|
|
next_token = base_yylex();
|
|
|
|
lookahead_token = next_token;
|
|
lookahead_yylval = base_yylval;
|
|
lookahead_yylloc = base_yylloc;
|
|
lookahead_yytext = base_yytext;
|
|
|
|
base_yylval = cur_yylval;
|
|
base_yylloc = cur_yylloc;
|
|
base_yytext = cur_yytext;
|
|
|
|
have_lookahead = true;
|
|
|
|
/* Replace cur_token if needed, based on lookahead */
|
|
switch (cur_token)
|
|
{
|
|
case FORMAT:
|
|
/* Replace FORMAT by FORMAT_LA if it's followed by JSON */
|
|
switch (next_token)
|
|
{
|
|
case JSON:
|
|
cur_token = FORMAT_LA;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case NOT:
|
|
/* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
|
|
switch (next_token)
|
|
{
|
|
case BETWEEN:
|
|
case IN_P:
|
|
case LIKE:
|
|
case ILIKE:
|
|
case SIMILAR:
|
|
cur_token = NOT_LA;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case NULLS_P:
|
|
/* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
|
|
switch (next_token)
|
|
{
|
|
case FIRST_P:
|
|
case LAST_P:
|
|
cur_token = NULLS_LA;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case WITH:
|
|
/* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
|
|
switch (next_token)
|
|
{
|
|
case TIME:
|
|
case ORDINALITY:
|
|
cur_token = WITH_LA;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case WITHOUT:
|
|
/* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
|
|
switch (next_token)
|
|
{
|
|
case TIME:
|
|
cur_token = WITHOUT_LA;
|
|
break;
|
|
}
|
|
break;
|
|
case UIDENT:
|
|
case USCONST:
|
|
/* Look ahead for UESCAPE */
|
|
if (next_token == UESCAPE)
|
|
{
|
|
/* Yup, so get third token, which had better be SCONST */
|
|
const char *escstr;
|
|
|
|
/*
|
|
* Again save and restore lexer output variables around the
|
|
* call
|
|
*/
|
|
cur_yylval = base_yylval;
|
|
cur_yylloc = base_yylloc;
|
|
cur_yytext = base_yytext;
|
|
|
|
/* Get third token */
|
|
next_token = base_yylex();
|
|
|
|
if (next_token != SCONST)
|
|
mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
|
|
|
|
/*
|
|
* Save and check escape string, which the scanner returns
|
|
* with quotes
|
|
*/
|
|
escstr = base_yylval.str;
|
|
if (strlen(escstr) != 3 || !check_uescapechar(escstr[1]))
|
|
mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character");
|
|
|
|
base_yylval = cur_yylval;
|
|
base_yylloc = cur_yylloc;
|
|
base_yytext = cur_yytext;
|
|
|
|
/* Combine 3 tokens into 1 */
|
|
base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr);
|
|
|
|
/* Clear have_lookahead, thereby consuming all three tokens */
|
|
have_lookahead = false;
|
|
}
|
|
|
|
if (cur_token == UIDENT)
|
|
cur_token = IDENT;
|
|
else if (cur_token == USCONST)
|
|
cur_token = SCONST;
|
|
break;
|
|
}
|
|
|
|
return cur_token;
|
|
}
|
|
|
|
/*
|
|
* check_uescapechar() and ecpg_isspace() should match their equivalents
|
|
* in pgc.l.
|
|
*/
|
|
|
|
/* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
|
|
static bool
|
|
check_uescapechar(unsigned char escape)
|
|
{
|
|
if (isxdigit(escape)
|
|
|| escape == '+'
|
|
|| escape == '\''
|
|
|| escape == '"'
|
|
|| ecpg_isspace(escape))
|
|
return false;
|
|
else
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* ecpg_isspace() --- return true if flex scanner considers char whitespace
|
|
*/
|
|
static bool
|
|
ecpg_isspace(char ch)
|
|
{
|
|
if (ch == ' ' ||
|
|
ch == '\t' ||
|
|
ch == '\n' ||
|
|
ch == '\r' ||
|
|
ch == '\f')
|
|
return true;
|
|
return false;
|
|
}
|