postgres/contrib/seg/segscan.l

%top{
/*
 * A scanner for EMP-style numeric ranges
 */
#include "postgres.h"

#include "nodes/miscnodes.h"

/*
 * NB: include segparse.h only AFTER including segdata.h, because segdata.h
 * contains the definition for SEG.
 */
#include "segdata.h"
#include "segparse.h"
}

%{
/* LCOV_EXCL_START */

/* No reason to constrain amount of data slurped */
#define YY_READ_BUF_SIZE 16777216

/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
#undef fprintf
#define fprintf(file, fmt, msg)  fprintf_to_ereport(fmt, msg)

static void
fprintf_to_ereport(const char *fmt, const char *msg)
{
	ereport(ERROR, (errmsg_internal("%s", msg)));
}

/* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf;
%}

%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="seg_yy"


range        (\.\.)(\.)?
plumin       (\'\+\-\')|(\(\+\-)\)
integer      [+-]?[0-9]+
real         [+-]?[0-9]+\.[0-9]+
float        ({integer}|{real})([eE]{integer})?

%%

{range}      seg_yylval.text = yytext; return RANGE;
{plumin}     seg_yylval.text = yytext; return PLUMIN;
{float}      seg_yylval.text = yytext; return SEGFLOAT;
\<           seg_yylval.text = "<"; return EXTENSION;
\>           seg_yylval.text = ">"; return EXTENSION;
\~           seg_yylval.text = "~"; return EXTENSION;
[ \t\n\r\f\v]+ /* discard spaces */
.            return yytext[0]; /* alert parser of the garbage */

%%

/* LCOV_EXCL_STOP */

void
seg_yyerror(SEG *result, struct Node *escontext, const char *message)
{
	/* if we already reported an error, don't overwrite it */
	if (SOFT_ERROR_OCCURRED(escontext))
		return;

	if (*yytext == YY_END_OF_BUFFER_CHAR)
	{
		errsave(escontext,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("bad seg representation"),
				 /* translator: %s is typically "syntax error" */
				 errdetail("%s at end of input", message)));
	}
	else
	{
		errsave(escontext,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("bad seg representation"),
				 /* translator: first %s is typically "syntax error" */
				 errdetail("%s at or near \"%s\"", message, yytext)));
	}
}


/*
 * Called before any actual parsing is done
 */
void
seg_scanner_init(const char *str)
{
	Size	slen = strlen(str);

	/*
	 * Might be left over after ereport()
	 */
	if (YY_CURRENT_BUFFER)
		yy_delete_buffer(YY_CURRENT_BUFFER);

	/*
	 * Make a scan buffer with special termination needed by flex.
	 */
	scanbuf = palloc(slen + 2);
	memcpy(scanbuf, str, slen);
	scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
	scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);

	BEGIN(INITIAL);
}


/*
 * Called after parsing is done to clean up after seg_scanner_init()
 */
void
seg_scanner_finish(void)
{
	yy_delete_buffer(scanbufhandle);
	pfree(scanbuf);
}
Build all Flex files standalone The proposed Meson build system will need a way to ignore certain generated files in order to coexist with the autoconf build system, and C files generated by Flex which are #include'd into .y files make this more difficult. In similar vein to 72b1e3a21, arrange for all Flex C files to compile to their own .o targets. Reviewed by Andres Freund Discussion: https://www.postgresql.org/message-id/20220810171935.7k5zgnjwqzalzmtm%40awork3.anarazel.de Discussion: https://www.postgresql.org/message-id/CAFBsxsF8Gc2StS3haXofshHCzqNMRXiSxvQEYGwnFsTmsdwNeg@mail.gmail.com 2022-09-04 07:33:31 +03:00			`%top{`
Remove useless whitespace at end of lines 2010-11-23 23:27:50 +03:00			`/*`
			`* A scanner for EMP-style numeric ranges`
			`*/`
Build all Flex files standalone The proposed Meson build system will need a way to ignore certain generated files in order to coexist with the autoconf build system, and C files generated by Flex which are #include'd into .y files make this more difficult. In similar vein to 72b1e3a21, arrange for all Flex C files to compile to their own .o targets. Reviewed by Andres Freund Discussion: https://www.postgresql.org/message-id/20220810171935.7k5zgnjwqzalzmtm%40awork3.anarazel.de Discussion: https://www.postgresql.org/message-id/CAFBsxsF8Gc2StS3haXofshHCzqNMRXiSxvQEYGwnFsTmsdwNeg@mail.gmail.com 2022-09-04 07:33:31 +03:00			`#include "postgres.h"`

Convert contrib/seg's input function to report errors softly Reviewed by Tom Lane Discussion: https://postgr.es/m/a8dc5700-c341-3ba8-0507-cc09881e6200@dunslane.net 2022-12-23 17:17:24 +03:00			`#include "nodes/miscnodes.h"`

Build all Flex files standalone The proposed Meson build system will need a way to ignore certain generated files in order to coexist with the autoconf build system, and C files generated by Flex which are #include'd into .y files make this more difficult. In similar vein to 72b1e3a21, arrange for all Flex C files to compile to their own .o targets. Reviewed by Andres Freund Discussion: https://www.postgresql.org/message-id/20220810171935.7k5zgnjwqzalzmtm%40awork3.anarazel.de Discussion: https://www.postgresql.org/message-id/CAFBsxsF8Gc2StS3haXofshHCzqNMRXiSxvQEYGwnFsTmsdwNeg@mail.gmail.com 2022-09-04 07:33:31 +03:00			`/*`
			`* NB: include segparse.h only AFTER including segdata.h, because segdata.h`
			`* contains the definition for SEG.`
			`*/`
			`#include "segdata.h"`
			`#include "segparse.h"`
			`}`
Gene Selkov's SEG datatype (GiST example code) 2000-12-11 23:40:33 +03:00
Build all Flex files standalone The proposed Meson build system will need a way to ignore certain generated files in order to coexist with the autoconf build system, and C files generated by Flex which are #include'd into .y files make this more difficult. In similar vein to 72b1e3a21, arrange for all Flex C files to compile to their own .o targets. Reviewed by Andres Freund Discussion: https://www.postgresql.org/message-id/20220810171935.7k5zgnjwqzalzmtm%40awork3.anarazel.de Discussion: https://www.postgresql.org/message-id/CAFBsxsF8Gc2StS3haXofshHCzqNMRXiSxvQEYGwnFsTmsdwNeg@mail.gmail.com 2022-09-04 07:33:31 +03:00			`%{`
Exclude flex-generated code from coverage testing Flex generates a lot of functions that are not actually used. In order to avoid coverage figures being ruined by that, mark up the part of the .l files where the generated code appears by lcov exclusion markers. That way, lcov will typically only reported on coverage for the .l file, which is under our control, but not for the .c file. Reviewed-by: Michael Paquier <michael.paquier@gmail.com> 2017-08-11 06:33:47 +03:00			`/* LCOV_EXCL_START */`

Make contrib/seg work with flex 2.5.31. Fix it up to have a real btree operator class, too, since in PG 7.4 you can't GROUP without one. 2003-09-14 06:18:49 +04:00			`/* No reason to constrain amount of data slurped */`
			`#define YY_READ_BUF_SIZE 16777216`
Gene Selkov's SEG datatype (GiST example code) 2000-12-11 23:40:33 +03:00
Ensure that in all flex lexers that are part of the backend, a yy_fatal_error() call results in elog(ERROR) not exit(). This was already fixed in the main lexer and plpgsql, but extend same technique to all the other dot-l files. Also, on review of the possible calls to yy_fatal_error(), it seems safe to use elog(ERROR) not elog(FATAL). 2003-05-30 02:30:02 +04:00			`/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */`
Suppress warnings on platforms where fprintf is a macro (eg, recent Fedora). This was already done by somebody for the core flex files, but these contrib files seem to have been missed. 2005-10-16 00:37:36 +04:00			`#undef fprintf`
Improve handling of ereport(ERROR) and elog(ERROR). In commit 71450d7fd6c7cf7b3e38ac56e363bff6a681973c, we added code to inform suitably-intelligent compilers that ereport() doesn't return if the elevel is ERROR or higher. This patch extends that to elog(), and also fixes a double-evaluation hazard that the previous commit created in ereport(), as well as reducing the emitted code size. The elog() improvement requires the compiler to support __VA_ARGS__, which should be available in just about anything nowadays since it's required by C99. But our minimum language baseline is still C89, so add a configure test for that. The previous commit assumed that ereport's elevel could be evaluated twice, which isn't terribly safe --- there are already counterexamples in xlog.c. On compilers that have __builtin_constant_p, we can use that to protect the second test, since there's no possible optimization gain if the compiler doesn't know the value of elevel. Otherwise, use a local variable inside the macros to prevent double evaluation. The local-variable solution is inferior because (a) it leads to useless code being emitted when elevel isn't constant, and (b) it increases the optimization level needed for the compiler to recognize that subsequent code is unreachable. But it seems better than not teaching non-gcc compilers about unreachability at all. Lastly, if the compiler has __builtin_unreachable(), we can use that instead of abort(), resulting in a noticeable code savings since no function call is actually emitted. However, it seems wise to do this only in non-assert builds. In an assert build, continue to use abort(), so that the behavior will be predictable and debuggable if the "impossible" happens. These changes involve making the ereport and elog macros emit do-while statement blocks not just expressions, which forces small changes in a few call sites. Andres Freund, Tom Lane, Heikki Linnakangas 2013-01-14 03:39:20 +04:00			`#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)`

			`static void`
			`fprintf_to_ereport(const char fmt, const char msg)`
			`{`
			`ereport(ERROR, (errmsg_internal("%s", msg)));`
			`}`
Ensure that in all flex lexers that are part of the backend, a yy_fatal_error() call results in elog(ERROR) not exit(). This was already fixed in the main lexer and plpgsql, but extend same technique to all the other dot-l files. Also, on review of the possible calls to yy_fatal_error(), it seems safe to use elog(ERROR) not elog(FATAL). 2003-05-30 02:30:02 +04:00
Make contrib/seg work with flex 2.5.31. Fix it up to have a real btree operator class, too, since in PG 7.4 you can't GROUP without one. 2003-09-14 06:18:49 +04:00			`/* Handles to the buffer that the lexer uses internally */`
			`static YY_BUFFER_STATE scanbufhandle;`
			`static char *scanbuf;`
Gene Selkov's SEG datatype (GiST example code) 2000-12-11 23:40:33 +03:00			`%}`

Since we're depending on %option noyywrap in the main scanner now, we may as well use it in all our flex files. Make all the flex files have a consistent set of options. 2002-07-30 20:33:08 +04:00			`%option 8bit`
			`%option never-interactive`
Add %option nodefault to all our flex lexers. Fix a couple of rule gaps exposed thereby. AFAICT these would not lead to any worse problems than junk emitted on the backend's stdout, but we should have the option to catch possible worse errors in future. 2004-02-25 01:06:32 +03:00			`%option nodefault`
Add %option noinput to contrib's flex scanners, to suppress gcc 4.3 warnings. Peter did this for core awhile ago but evidently missed contrib. 2008-08-26 03:12:45 +04:00			`%option noinput`
Since we're depending on %option noyywrap in the main scanner now, we may as well use it in all our flex files. Make all the flex files have a consistent set of options. 2002-07-30 20:33:08 +04:00			`%option nounput`
			`%option noyywrap`
Add "%option warn" to all flex input files that lacked it. This is recommended in the flex manual, and there seems no good reason not to use it everywhere. 2011-08-25 21:55:57 +04:00			`%option warn`
Make contrib/seg work with flex 2.5.31. Fix it up to have a real btree operator class, too, since in PG 7.4 you can't GROUP without one. 2003-09-14 06:18:49 +04:00			`%option prefix="seg_yy"`
Since we're depending on %option noyywrap in the main scanner now, we may as well use it in all our flex files. Make all the flex files have a consistent set of options. 2002-07-30 20:33:08 +04:00

Gene Selkov's SEG datatype (GiST example code) 2000-12-11 23:40:33 +03:00			`range (\.\.)(\.)?`
			`plumin (\'\+\-\')\|(\(\+\-)\)`
			`integer [+-]?[0-9]+`
			`real [+-]?[0-9]+\.[0-9]+`
			`float ({integer}\|{real})([eE]{integer})?`

			`%%`

Build all Flex files standalone The proposed Meson build system will need a way to ignore certain generated files in order to coexist with the autoconf build system, and C files generated by Flex which are #include'd into .y files make this more difficult. In similar vein to 72b1e3a21, arrange for all Flex C files to compile to their own .o targets. Reviewed by Andres Freund Discussion: https://www.postgresql.org/message-id/20220810171935.7k5zgnjwqzalzmtm%40awork3.anarazel.de Discussion: https://www.postgresql.org/message-id/CAFBsxsF8Gc2StS3haXofshHCzqNMRXiSxvQEYGwnFsTmsdwNeg@mail.gmail.com 2022-09-04 07:33:31 +03:00			`{range} seg_yylval.text = yytext; return RANGE;`
			`{plumin} seg_yylval.text = yytext; return PLUMIN;`
			`{float} seg_yylval.text = yytext; return SEGFLOAT;`
			`\< seg_yylval.text = "<"; return EXTENSION;`
			`\> seg_yylval.text = ">"; return EXTENSION;`
			`\~ seg_yylval.text = "~"; return EXTENSION;`
Handle \v as a whitespace character in parsers This commit comes as a continuation of the discussion that has led to d522b05, as \v was handled inconsistently when parsing array values or anything going through the parsers, and changing a parser behavior in stable branches is a scary thing to do. The parsing of array values now uses the more central scanner_isspace() and array_isspace() is removed. As pointing out by Peter Eisentraut, fix a confusing reference to horizontal space in the parsers with the term "horiz_space". \f was included in this set since 3cfdd8f from 2000, but it is not horizontal. "horiz_space" is renamed to "non_newline_space", to refer to all whitespace characters except newlines. The changes impact the parsers for the backend, psql, seg, cube, ecpg and replication commands. Note that JSON should not escape \v, as per RFC 7159, so these are not touched. Reviewed-by: Peter Eisentraut, Tom Lane Discussion: https://postgr.es/m/ZJKcjNwWHHvw9ksQ@paquier.xyz 2023-07-06 02:16:24 +03:00			`[ \t\n\r\f\v]+ /* discard spaces */`
Gene Selkov's SEG datatype (GiST example code) 2000-12-11 23:40:33 +03:00			`. return yytext[0]; /* alert parser of the garbage */`

			`%%`

Exclude flex-generated code from coverage testing Flex generates a lot of functions that are not actually used. In order to avoid coverage figures being ruined by that, mark up the part of the .l files where the generated code appears by lcov exclusion markers. That way, lcov will typically only reported on coverage for the .l file, which is under our control, but not for the .c file. Reviewed-by: Michael Paquier <michael.paquier@gmail.com> 2017-08-11 06:33:47 +03:00			`/* LCOV_EXCL_STOP */`

Add macros wrapping all usage of gcc's __attribute__. Until now __attribute__() was defined to be empty for all compilers but gcc. That's problematic because it prevents using it in other compilers; which is necessary e.g. for atomics portability. It's also just generally dubious to do so in a header as widely included as c.h. Instead add pg_attribute_format_arg, pg_attribute_printf, pg_attribute_noreturn macros which are implemented in the compilers that understand them. Also add pg_attribute_noreturn and pg_attribute_packed, but don't provide fallbacks, since they can affect functionality. This means that external code that, possibly unwittingly, relied on __attribute__ defined to be empty on !gcc compilers may now run into warnings or errors on those compilers. But there shouldn't be many occurances of that and it's hard to work around... Discussion: 54B58BA3.8040302@ohmu.fi Author: Oskari Saarenmaa, with some minor changes by me. 2015-03-11 16:19:54 +03:00			`void`
Convert contrib/seg's input function to report errors softly Reviewed by Tom Lane Discussion: https://postgr.es/m/a8dc5700-c341-3ba8-0507-cc09881e6200@dunslane.net 2022-12-23 17:17:24 +03:00			`seg_yyerror(SEG result, struct Node escontext, const char *message)`
Make contrib/seg work with flex 2.5.31. Fix it up to have a real btree operator class, too, since in PG 7.4 you can't GROUP without one. 2003-09-14 06:18:49 +04:00			`{`
Convert contrib/seg's input function to report errors softly Reviewed by Tom Lane Discussion: https://postgr.es/m/a8dc5700-c341-3ba8-0507-cc09881e6200@dunslane.net 2022-12-23 17:17:24 +03:00			`/* if we already reported an error, don't overwrite it */`
			`if (SOFT_ERROR_OCCURRED(escontext))`
			`return;`

Make contrib/seg work with flex 2.5.31. Fix it up to have a real btree operator class, too, since in PG 7.4 you can't GROUP without one. 2003-09-14 06:18:49 +04:00			`if (*yytext == YY_END_OF_BUFFER_CHAR)`
			`{`
Convert contrib/seg's input function to report errors softly Reviewed by Tom Lane Discussion: https://postgr.es/m/a8dc5700-c341-3ba8-0507-cc09881e6200@dunslane.net 2022-12-23 17:17:24 +03:00			`errsave(escontext,`
Make contrib/seg work with flex 2.5.31. Fix it up to have a real btree operator class, too, since in PG 7.4 you can't GROUP without one. 2003-09-14 06:18:49 +04:00			`(errcode(ERRCODE_SYNTAX_ERROR),`
			`errmsg("bad seg representation"),`
			`/* translator: %s is typically "syntax error" */`
			`errdetail("%s at end of input", message)));`
			`}`
			`else`
			`{`
Convert contrib/seg's input function to report errors softly Reviewed by Tom Lane Discussion: https://postgr.es/m/a8dc5700-c341-3ba8-0507-cc09881e6200@dunslane.net 2022-12-23 17:17:24 +03:00			`errsave(escontext,`
Make contrib/seg work with flex 2.5.31. Fix it up to have a real btree operator class, too, since in PG 7.4 you can't GROUP without one. 2003-09-14 06:18:49 +04:00			`(errcode(ERRCODE_SYNTAX_ERROR),`
			`errmsg("bad seg representation"),`
			`/* translator: first %s is typically "syntax error" */`
			`errdetail("%s at or near \"%s\"", message, yytext)));`
			`}`
			`}`


			`/*`
			`* Called before any actual parsing is done`
			`*/`
			`void`
			`seg_scanner_init(const char *str)`
			`{`
			`Size slen = strlen(str);`

			`/*`
			`* Might be left over after ereport()`
			`*/`
			`if (YY_CURRENT_BUFFER)`
			`yy_delete_buffer(YY_CURRENT_BUFFER);`

			`/*`
			`* Make a scan buffer with special termination needed by flex.`
			`*/`
			`scanbuf = palloc(slen + 2);`
			`memcpy(scanbuf, str, slen);`
			`scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;`
			`scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);`

			`BEGIN(INITIAL);`
			`}`

Gene Selkov's SEG datatype (GiST example code) 2000-12-11 23:40:33 +03:00
Make contrib/seg work with flex 2.5.31. Fix it up to have a real btree operator class, too, since in PG 7.4 you can't GROUP without one. 2003-09-14 06:18:49 +04:00			`/*`
			`* Called after parsing is done to clean up after seg_scanner_init()`
			`*/`
			`void`
			`seg_scanner_finish(void)`
			`{`
			`yy_delete_buffer(scanbufhandle);`
			`pfree(scanbuf);`
Gene Selkov's SEG datatype (GiST example code) 2000-12-11 23:40:33 +03:00			`}`