Improve lexer's error reporting. You get the whole token mentioned now

in parse error messages, not just the part scanned by the last flex rule. For example, select "foo" "bar"; used to draw ERROR: parser: parse error at or near """ which was rather unhelpful. Now it gives ERROR: parser: parse error at or near ""bar"" Also, error messages concerning bitstring literals and suchlike will quote the source text at you, not the processed internal form of the literal.
2002-05-01 17:12:08 +00:00 · 2002-05-01 17:12:08 +00:00 · 61446e0927
commit 61446e0927
parent 241978b91b
3 changed files with 73 additions and 48 deletions
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.92 2002/04/20 21:56:14 petere Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.93 2002/05/01 17:12:07 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -56,6 +56,17 @@ static void addlit(char *ytext, int yleng);
 static void addlitchar(unsigned char ychar);
 static char *litbufdup(void);
 /*
 * When we parse a token that requires multiple lexer rules to process,
 * we set token_start to point at the true start of the token, for use
 * by yyerror().  yytext will point at just the text consumed by the last
 * rule, so it's not very helpful (eg, it might contain just the last
 * quote mark of a quoted identifier).  But to avoid cluttering every rule
 * with setting token_start, we allow token_start = NULL to denote that
 * it's okay to use yytext.
 */
 static char	   *token_start;
 /* Handles to the buffer that the lexer uses internally */
 static YY_BUFFER_STATE scanbufhandle;
 static char *scanbuf;
@ -208,7 +219,7 @@ non_newline		[^\n\r]
 comment			("--"{non_newline}*)
-whitespace		({space}|{comment})
+whitespace		({space}+|{comment})
 /*
 * SQL92 requires at least one newline in the whitespace separating
@ -235,9 +246,16 @@ other			.
 */
 %%
 %{
 					/* code to execute during start of each call of yylex() */
 					token_start = NULL;
 %}
 {whitespace}	{ /* ignore */ }
 {xcstart}		{
 					token_start = yytext;
 					xcdepth = 0;
 					BEGIN(xc);
 					/* Put back any characters past slash-star; see above */
@ -252,7 +270,11 @@ other			.
 <xc>{xcstop}	{
 					if (xcdepth <= 0)
 					{
 						BEGIN(INITIAL);
 						/* reset token_start for next token */
 						token_start = NULL;
 					}
 					else
 						xcdepth--;
 				}
@ -261,9 +283,10 @@ other			.
 <xc>{op_chars}	{ /* ignore */ }
-<xc><<EOF>>		{ elog(ERROR, "Unterminated /* comment"); }
+<xc><<EOF>>		{ yyerror("unterminated /* comment"); }
 {xbitstart}		{
 					token_start = yytext;
 					BEGIN(xbit);
 					startlit();
 					addlitchar('b');
@ -271,8 +294,7 @@ other			.
 <xbit>{xbitstop}	{
 					BEGIN(INITIAL);
 					if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0')
-						elog(ERROR, "invalid bit string input: '%s'",
+						yyerror("invalid bit string input");
 							 literalbuf);
 					yylval.str = litbufdup();
 					return BITCONST;
 				}
@ -284,9 +306,10 @@ other			.
 <xbit>{xbitcat}		{
 					/* ignore */
 				}
-<xbit><<EOF>>		{ elog(ERROR, "unterminated bit string literal"); }
+<xbit><<EOF>>		{ yyerror("unterminated bit string literal"); }
 {xhstart}		{
 					token_start = yytext;
 					BEGIN(xh);
 					startlit();
 				}
@ -303,14 +326,14 @@ other			.
 						|| val != (long) ((int32) val)
 #endif
 						)
-						elog(ERROR, "Bad hexadecimal integer input '%s'",
+						yyerror("bad hexadecimal integer input");
 							 literalbuf);
 					yylval.ival = val;
 					return ICONST;
 				}
-<xh><<EOF>>		{ elog(ERROR, "Unterminated hexadecimal integer"); }
+<xh><<EOF>>		{ yyerror("unterminated hexadecimal integer"); }
 {xqstart}		{
 					token_start = yytext;
 					BEGIN(xq);
 					startlit();
 				}
@ -335,30 +358,31 @@ other			.
 <xq>{xqcat}		{
 					/* ignore */
 				}
-<xq><<EOF>>		{ elog(ERROR, "Unterminated quoted string"); }
+<xq><<EOF>>		{ yyerror("unterminated quoted string"); }
 {xdstart}		{
 					token_start = yytext;
 					BEGIN(xd);
 					startlit();
 				}
 <xd>{xdstop}	{
 					BEGIN(INITIAL);
-					if (strlen(literalbuf) == 0)
+					if (literallen == 0)
-						elog(ERROR, "zero-length delimited identifier");
+						yyerror("zero-length delimited identifier");
-					if (strlen(literalbuf) >= NAMEDATALEN)
+					if (literallen >= NAMEDATALEN)
 					{
 #ifdef MULTIBYTE
 						int len;
-						len = pg_mbcliplen(literalbuf,strlen(literalbuf),NAMEDATALEN-1);
+#ifdef MULTIBYTE
-						elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"",
+						len = pg_mbcliplen(literalbuf, literallen,
 										   NAMEDATALEN-1);
 #else
 						len = NAMEDATALEN-1;
 #endif
 						elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"",
 							 literalbuf, len, literalbuf);
 						literalbuf[len] = '\0';
-#else
+						literallen = len;
 						elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"",
 							 literalbuf, NAMEDATALEN-1, literalbuf);
 						literalbuf[NAMEDATALEN-1] = '\0';
 #endif
 					}
 					yylval.str = litbufdup();
 					return IDENT;
@ -369,7 +393,7 @@ other			.
 <xd>{xdinside}	{
 					addlit(yytext, yyleng);
 				}
-<xd><<EOF>>		{ elog(ERROR, "Unterminated quoted identifier"); }
+<xd><<EOF>>		{ yyerror("unterminated quoted identifier"); }
 {typecast}		{ return TYPECAST; }
@ -383,8 +407,8 @@ other			.
 					 * character will match a prior rule, not this one.
 					 */
 					int		nchars = yyleng;
-					char   *slashstar = strstr((char*)yytext, "/*");
+					char   *slashstar = strstr(yytext, "/*");
-					char   *dashdash = strstr((char*)yytext, "--");
+					char   *dashdash = strstr(yytext, "--");
 					if (slashstar && dashdash)
 					{
@ -395,7 +419,7 @@ other			.
 					else if (!slashstar)
 						slashstar = dashdash;
 					if (slashstar)
-						nchars = slashstar - ((char*)yytext);
+						nchars = slashstar - yytext;
 					/*
 					 * For SQL92 compatibility, '+' and '-' cannot be the
@ -437,15 +461,15 @@ other			.
 					}
 					/* Convert "!=" operator to "<>" for compatibility */
-					if (strcmp((char*)yytext, "!=") == 0)
+					if (strcmp(yytext, "!=") == 0)
 						yylval.str = pstrdup("<>");
 					else
-						yylval.str = pstrdup((char*)yytext);
+						yylval.str = pstrdup(yytext);
 					return Op;
 				}
 {param}			{
-					yylval.ival = atol((char*)&yytext[1]);
+					yylval.ival = atol(yytext + 1);
 					return PARAM;
 				}
@ -454,7 +478,7 @@ other			.
 					char* endptr;
 					errno = 0;
-					val = strtol((char *)yytext, &endptr, 10);
+					val = strtol(yytext, &endptr, 10);
 					if (*endptr != '\0' || errno == ERANGE
 #ifdef HAVE_LONG_INT_64
 						/* if long > 32 bits, check for overflow of int4 */
@ -463,28 +487,29 @@ other			.
 						)
 					{
 						/* integer too large, treat it as a float */
-						yylval.str = pstrdup((char*)yytext);
+						yylval.str = pstrdup(yytext);
 						return FCONST;
 					}
 					yylval.ival = val;
 					return ICONST;
 				}
 {decimal}		{
-					yylval.str = pstrdup((char*)yytext);
+					yylval.str = pstrdup(yytext);
 					return FCONST;
 				}
 {real}			{
-					yylval.str = pstrdup((char*)yytext);
+					yylval.str = pstrdup(yytext);
 					return FCONST;
 				}
 {identifier}	{
 					ScanKeyword	   *keyword;
 					char		   *ident;
 					int				i;
 					/* Is it a keyword? */
-					keyword = ScanKeywordLookup((char*) yytext);
+					keyword = ScanKeywordLookup(yytext);
 					if (keyword != NULL)
 						return keyword->value;
@ -496,26 +521,25 @@ other			.
 					 * which seems appropriate under SQL99 rules, whereas
 					 * the keyword comparison was NOT locale-dependent.
 					 */
-					for (i = 0; yytext[i]; i++)
+					ident = pstrdup(yytext);
 					for (i = 0; ident[i]; i++)
 					{
-						if (isupper((unsigned char) yytext[i]))
+						if (isupper((unsigned char) ident[i]))
-							yytext[i] = tolower((unsigned char) yytext[i]);
+							ident[i] = tolower((unsigned char) ident[i]);
 					}
 					if (i >= NAMEDATALEN)
                    {
 #ifdef MULTIBYTE
 						int len;
-						len = pg_mbcliplen(yytext,i,NAMEDATALEN-1);
+#ifdef MULTIBYTE
-                        elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"",
+						len = pg_mbcliplen(ident, i, NAMEDATALEN-1);
                             yytext, len, yytext);
 						yytext[len] = '\0';
 #else
-                        elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"",
+						len = NAMEDATALEN-1;
                             yytext, NAMEDATALEN-1, yytext);
 						yytext[NAMEDATALEN-1] = '\0';
 #endif
                        elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"",
                             ident, len, ident);
 						ident[len] = '\0';
                    }
-					yylval.str = pstrdup((char*) yytext);
+					yylval.str = ident;
 					return IDENT;
 				}
@ -526,7 +550,8 @@ other			.
 void
 yyerror(const char *message)
 {
-	elog(ERROR, "parser: %s at or near \"%s\"", message, yytext);
+	elog(ERROR, "parser: %s at or near \"%s\"", message,
 		 token_start ? token_start : yytext);
 }
--- a/src/backend/po/nls.mk
+++ b/src/backend/po/nls.mk
@ -1,4 +1,4 @@
 CATALOG_NAME	:= postgres
 AVAIL_LANGUAGES	:= cs de hu ru zh_CN zh_TW
 GETTEXT_FILES	:= + gettext-files
-GETTEXT_TRIGGERS:= elog:2 postmaster_error
+GETTEXT_TRIGGERS:= elog:2 postmaster_error yyerror
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@ -17,7 +17,7 @@ SELECT 'first line'
 ' - next line' /* this comment is not allowed here */
 ' - third line'
 	AS "Illegal comment within continuation";
-ERROR:  parser: parse error at or near "'"
+ERROR:  parser: parse error at or near "' - third line'"
 --
 -- test conversions between various string types
 --