2021-10-29 00:51:43 +03:00
|
|
|
/* $NetBSD: parse.c,v 1.43 2021/10/28 21:51:43 rillig Exp $ */
|
1997-01-09 23:18:21 +03:00
|
|
|
|
2019-04-04 18:22:13 +03:00
|
|
|
/*-
|
|
|
|
* SPDX-License-Identifier: BSD-4-Clause
|
2003-08-07 15:13:06 +04:00
|
|
|
*
|
1997-10-18 20:04:21 +04:00
|
|
|
* Copyright (c) 1985 Sun Microsystems, Inc.
|
2019-04-04 18:22:13 +03:00
|
|
|
* Copyright (c) 1980, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
1993-04-09 16:58:42 +04:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
|
|
* must display the following acknowledgement:
|
|
|
|
* This product includes software developed by the University of
|
|
|
|
* California, Berkeley and its contributors.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
1997-10-18 20:04:21 +04:00
|
|
|
#if 0
|
|
|
|
static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93";
|
2019-04-04 18:22:13 +03:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
#if defined(__NetBSD__)
|
|
|
|
__RCSID("$FreeBSD$");
|
1997-10-18 20:04:21 +04:00
|
|
|
#else
|
2019-04-04 18:22:13 +03:00
|
|
|
__FBSDID("$FreeBSD: head/usr.bin/indent/parse.c 337651 2018-08-11 19:20:06Z pstef $");
|
|
|
|
#endif
|
1993-04-09 16:58:42 +04:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
#include <assert.h>
|
2019-04-04 18:22:13 +03:00
|
|
|
#include <err.h>
|
1993-04-09 16:58:42 +04:00
|
|
|
#include <stdio.h>
|
2021-03-07 13:42:48 +03:00
|
|
|
|
2019-04-04 18:22:13 +03:00
|
|
|
#include "indent.h"
|
|
|
|
|
|
|
|
static void reduce(void);
|
1993-04-09 16:58:42 +04:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
#ifdef debug
|
2021-10-25 23:32:38 +03:00
|
|
|
static const char *
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
psym_name(parser_symbol psym)
|
|
|
|
{
|
|
|
|
static const char *const name[] = {
|
|
|
|
"semicolon",
|
|
|
|
"lbrace",
|
|
|
|
"rbrace",
|
|
|
|
"decl",
|
|
|
|
"stmt",
|
|
|
|
"stmt_list",
|
|
|
|
"for_exprs",
|
|
|
|
"if_expr",
|
|
|
|
"if_expr_stmt",
|
|
|
|
"if_expr_stmt_else",
|
|
|
|
"else",
|
|
|
|
"switch_expr",
|
|
|
|
"do",
|
|
|
|
"do_stmt",
|
|
|
|
"while_expr",
|
|
|
|
};
|
|
|
|
|
|
|
|
assert(array_length(name) == (int)psym_while_expr + 1);
|
|
|
|
|
|
|
|
return name[psym];
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2021-10-08 00:43:20 +03:00
|
|
|
/*
|
|
|
|
* Shift the token onto the parser stack, or reduce it by combining it with
|
|
|
|
* previous tokens.
|
|
|
|
*/
|
1997-10-19 07:17:12 +04:00
|
|
|
void
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
parse(parser_symbol psym)
|
1993-04-09 16:58:42 +04:00
|
|
|
{
|
2021-10-25 23:32:38 +03:00
|
|
|
debug_println("parse token: '%s'", psym_name(psym));
|
1993-04-09 16:58:42 +04:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
if (psym != psym_else) {
|
|
|
|
while (ps.s_sym[ps.tos] == psym_if_expr_stmt) {
|
|
|
|
ps.s_sym[ps.tos] = psym_stmt;
|
2021-10-09 00:48:33 +03:00
|
|
|
reduce();
|
|
|
|
}
|
2019-04-04 18:22:13 +03:00
|
|
|
}
|
1993-04-09 16:58:42 +04:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
switch (psym) {
|
1993-04-09 16:58:42 +04:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_decl:
|
2021-10-25 22:56:03 +03:00
|
|
|
ps.search_stmt = opt.brace_same_line;
|
2019-04-04 18:22:13 +03:00
|
|
|
/* indicate that following brace should be on same line */
|
2021-10-08 00:52:54 +03:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
if (ps.s_sym[ps.tos] != psym_decl) { /* only put one declaration
|
2019-04-04 18:22:13 +03:00
|
|
|
* onto stack */
|
|
|
|
break_comma = true; /* while in declaration, newline should be
|
|
|
|
* forced after comma */
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[++ps.tos] = psym_decl;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_ind_level[ps.tos] = ps.ind_level_follow;
|
2019-04-04 18:22:13 +03:00
|
|
|
|
2021-10-08 00:38:25 +03:00
|
|
|
if (opt.ljust_decl) {
|
2019-04-04 18:22:13 +03:00
|
|
|
ps.ind_level = 0;
|
2021-10-05 09:24:06 +03:00
|
|
|
for (int i = ps.tos - 1; i > 0; --i)
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
if (ps.s_sym[i] == psym_decl)
|
2019-04-04 18:22:13 +03:00
|
|
|
++ps.ind_level; /* indentation is number of
|
|
|
|
* declaration levels deep we are */
|
2021-09-25 23:56:53 +03:00
|
|
|
ps.ind_level_follow = ps.ind_level;
|
2019-04-04 18:22:13 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_if_expr: /* 'if' '(' <expr> ')' */
|
|
|
|
if (ps.s_sym[ps.tos] == psym_if_expr_stmt_else && opt.else_if) {
|
2021-03-09 22:14:39 +03:00
|
|
|
/*
|
2021-10-08 00:38:25 +03:00
|
|
|
* Reduce "else if" to "if". This saves a lot of stack space in
|
2021-09-26 22:37:11 +03:00
|
|
|
* case of a long "if-else-if ... else-if" sequence.
|
2021-03-09 22:14:39 +03:00
|
|
|
*/
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.ind_level_follow = ps.s_ind_level[ps.tos--];
|
2021-03-09 22:14:39 +03:00
|
|
|
}
|
2019-04-04 18:22:13 +03:00
|
|
|
/* FALLTHROUGH */
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_do:
|
2021-10-26 22:36:30 +03:00
|
|
|
case psym_for_exprs: /* 'for' (...) */
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[++ps.tos] = psym;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_ind_level[ps.tos] = ps.ind_level = ps.ind_level_follow;
|
2021-09-25 23:56:53 +03:00
|
|
|
++ps.ind_level_follow; /* subsequent statements should be indented 1 */
|
2021-10-25 22:56:03 +03:00
|
|
|
ps.search_stmt = opt.brace_same_line;
|
2019-04-04 18:22:13 +03:00
|
|
|
break;
|
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_lbrace:
|
2021-10-09 02:47:40 +03:00
|
|
|
break_comma = false; /* don't break comma in an initializer list */
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
if (ps.s_sym[ps.tos] == psym_stmt || ps.s_sym[ps.tos] == psym_decl
|
|
|
|
|| ps.s_sym[ps.tos] == psym_stmt_list)
|
2021-10-24 22:14:33 +03:00
|
|
|
++ps.ind_level_follow; /* it is a random, isolated stmt group
|
|
|
|
* or a declaration */
|
2019-04-04 18:22:13 +03:00
|
|
|
else {
|
2021-09-25 10:55:24 +03:00
|
|
|
if (code.s == code.e) {
|
2021-10-08 00:43:20 +03:00
|
|
|
/* it is a group as part of a while, for, etc. */
|
2019-04-04 18:22:13 +03:00
|
|
|
--ps.ind_level;
|
2021-10-08 00:52:54 +03:00
|
|
|
|
2019-04-04 18:22:13 +03:00
|
|
|
/*
|
2021-10-08 00:43:20 +03:00
|
|
|
* for a switch, brace should be two levels out from the code
|
2019-04-04 18:22:13 +03:00
|
|
|
*/
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
if (ps.s_sym[ps.tos] == psym_switch_expr &&
|
|
|
|
opt.case_indent >= 1)
|
2019-04-04 18:22:13 +03:00
|
|
|
--ps.ind_level;
|
|
|
|
}
|
|
|
|
}
|
1997-10-19 07:17:12 +04:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[++ps.tos] = psym_lbrace;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_ind_level[ps.tos] = ps.ind_level;
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[++ps.tos] = psym_stmt;
|
2019-04-04 18:22:13 +03:00
|
|
|
/* allow null stmt between braces */
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_ind_level[ps.tos] = ps.ind_level_follow;
|
2019-04-04 18:22:13 +03:00
|
|
|
break;
|
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_while_expr: /* 'while' '(' <expr> ')' */
|
|
|
|
if (ps.s_sym[ps.tos] == psym_do_stmt) {
|
2019-04-04 18:22:13 +03:00
|
|
|
/* it is matched with do stmt */
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.ind_level = ps.ind_level_follow = ps.s_ind_level[ps.tos];
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[++ps.tos] = psym_while_expr;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_ind_level[ps.tos] = ps.ind_level = ps.ind_level_follow;
|
2021-10-08 00:52:54 +03:00
|
|
|
|
2021-03-13 02:10:18 +03:00
|
|
|
} else { /* it is a while loop */
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[++ps.tos] = psym_while_expr;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_ind_level[ps.tos] = ps.ind_level_follow;
|
2021-09-25 23:56:53 +03:00
|
|
|
++ps.ind_level_follow;
|
2021-10-25 22:56:03 +03:00
|
|
|
ps.search_stmt = opt.brace_same_line;
|
2019-04-04 18:22:13 +03:00
|
|
|
}
|
1997-10-19 07:17:12 +04:00
|
|
|
|
2019-04-04 18:22:13 +03:00
|
|
|
break;
|
1997-10-19 07:17:12 +04:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_else:
|
|
|
|
if (ps.s_sym[ps.tos] != psym_if_expr_stmt)
|
2019-10-19 18:44:31 +03:00
|
|
|
diag(1, "Unmatched 'else'");
|
2019-04-04 18:22:13 +03:00
|
|
|
else {
|
2021-10-20 08:26:46 +03:00
|
|
|
/* The indentation for 'else' should be the same as for 'if'. */
|
|
|
|
ps.ind_level = ps.s_ind_level[ps.tos];
|
2021-10-08 00:38:25 +03:00
|
|
|
ps.ind_level_follow = ps.ind_level + 1;
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[ps.tos] = psym_if_expr_stmt_else;
|
2019-04-04 18:22:13 +03:00
|
|
|
/* remember if with else */
|
2021-10-25 22:56:03 +03:00
|
|
|
ps.search_stmt = opt.brace_same_line || opt.else_if;
|
2019-04-04 18:22:13 +03:00
|
|
|
}
|
|
|
|
break;
|
1997-10-19 07:17:12 +04:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_rbrace:
|
2021-03-09 22:14:39 +03:00
|
|
|
/* stack should have <lbrace> <stmt> or <lbrace> <stmt_list> */
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
if (ps.tos > 0 && ps.s_sym[ps.tos - 1] == psym_lbrace) {
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.ind_level = ps.ind_level_follow = ps.s_ind_level[--ps.tos];
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[ps.tos] = psym_stmt;
|
2021-03-13 02:10:18 +03:00
|
|
|
} else
|
2019-10-19 18:44:31 +03:00
|
|
|
diag(1, "Statement nesting error");
|
2019-04-04 18:22:13 +03:00
|
|
|
break;
|
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_switch_expr: /* had switch (...) */
|
|
|
|
ps.s_sym[++ps.tos] = psym_switch_expr;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_case_ind_level[ps.tos] = case_ind;
|
2019-04-04 18:22:13 +03:00
|
|
|
/* save current case indent level */
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_ind_level[ps.tos] = ps.ind_level_follow;
|
2021-10-05 09:55:24 +03:00
|
|
|
/* cases should be one level deeper than the switch */
|
|
|
|
case_ind = (float)ps.ind_level_follow + opt.case_indent;
|
|
|
|
/* statements should be two levels deeper */
|
|
|
|
ps.ind_level_follow += (int)opt.case_indent + 1;
|
2021-10-25 22:56:03 +03:00
|
|
|
ps.search_stmt = opt.brace_same_line;
|
2019-04-04 18:22:13 +03:00
|
|
|
break;
|
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_semicolon: /* this indicates a simple stmt */
|
2019-04-04 18:22:13 +03:00
|
|
|
break_comma = false; /* turn off flag to break after commas in a
|
|
|
|
* declaration */
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[++ps.tos] = psym_stmt;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.s_ind_level[ps.tos] = ps.ind_level;
|
2019-04-04 18:22:13 +03:00
|
|
|
break;
|
|
|
|
|
2021-10-08 00:38:25 +03:00
|
|
|
default:
|
2019-10-19 18:44:31 +03:00
|
|
|
diag(1, "Unknown code to parser");
|
2019-04-04 18:22:13 +03:00
|
|
|
return;
|
2021-10-08 00:38:25 +03:00
|
|
|
}
|
2019-04-04 18:22:13 +03:00
|
|
|
|
|
|
|
if (ps.tos >= STACKSIZE - 1)
|
|
|
|
errx(1, "Parser stack overflow");
|
|
|
|
|
|
|
|
reduce(); /* see if any reduction can be done */
|
1993-04-09 16:58:42 +04:00
|
|
|
|
|
|
|
#ifdef debug
|
2021-03-07 14:32:05 +03:00
|
|
|
printf("parse stack:");
|
2021-10-05 09:24:06 +03:00
|
|
|
for (int i = 1; i <= ps.tos; ++i)
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
printf(" ('%s' at %d)", psym_name(ps.s_sym[i]), ps.s_ind_level[i]);
|
2021-03-07 14:32:05 +03:00
|
|
|
if (ps.tos == 0)
|
2021-09-26 22:37:11 +03:00
|
|
|
printf(" empty");
|
2019-04-04 18:22:13 +03:00
|
|
|
printf("\n");
|
1993-04-09 16:58:42 +04:00
|
|
|
#endif
|
|
|
|
}
|
2019-04-04 18:22:13 +03:00
|
|
|
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
void
|
|
|
|
parse_hd(stmt_head hd)
|
|
|
|
{
|
|
|
|
static const parser_symbol psym[] = {
|
|
|
|
[hd_for] = psym_for_exprs,
|
|
|
|
[hd_if] = psym_if_expr,
|
|
|
|
[hd_switch] = psym_switch_expr,
|
|
|
|
[hd_while] = psym_while_expr
|
|
|
|
};
|
|
|
|
parse(psym[hd]);
|
|
|
|
}
|
|
|
|
|
2021-03-09 21:21:01 +03:00
|
|
|
/*----------------------------------------------*\
|
|
|
|
| REDUCTION PHASE |
|
|
|
|
\*----------------------------------------------*/
|
|
|
|
|
1993-04-09 16:58:42 +04:00
|
|
|
/*
|
2021-03-09 21:21:01 +03:00
|
|
|
* Try to combine the statement on the top of the parse stack with the symbol
|
|
|
|
* directly below it, replacing these two symbols with a single symbol.
|
|
|
|
*/
|
2021-09-25 20:36:51 +03:00
|
|
|
static bool
|
2021-03-09 21:21:01 +03:00
|
|
|
reduce_stmt(void)
|
|
|
|
{
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
switch (ps.s_sym[ps.tos - 1]) {
|
2021-03-09 21:21:01 +03:00
|
|
|
|
2021-10-29 00:51:43 +03:00
|
|
|
case psym_stmt:
|
|
|
|
case psym_stmt_list:
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[--ps.tos] = psym_stmt_list;
|
2021-03-09 21:21:01 +03:00
|
|
|
return true;
|
|
|
|
|
2021-10-29 00:51:43 +03:00
|
|
|
case psym_do:
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[--ps.tos] = psym_do_stmt;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.ind_level_follow = ps.s_ind_level[ps.tos];
|
2021-03-09 21:21:01 +03:00
|
|
|
return true;
|
|
|
|
|
2021-10-29 00:51:43 +03:00
|
|
|
case psym_if_expr:
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[--ps.tos] = psym_if_expr_stmt;
|
2021-03-09 21:21:01 +03:00
|
|
|
int i = ps.tos - 1;
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
while (ps.s_sym[i] != psym_stmt &&
|
2021-10-26 22:36:30 +03:00
|
|
|
ps.s_sym[i] != psym_stmt_list &&
|
|
|
|
ps.s_sym[i] != psym_lbrace)
|
2021-03-09 21:21:01 +03:00
|
|
|
--i;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.ind_level_follow = ps.s_ind_level[i];
|
2021-03-09 21:21:01 +03:00
|
|
|
/*
|
2021-10-29 00:51:43 +03:00
|
|
|
* For the time being, assume that there is no 'else' on this 'if',
|
|
|
|
* and set the indentation level accordingly. If an 'else' is
|
|
|
|
* scanned, it will be fixed up later.
|
2021-03-09 21:21:01 +03:00
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
|
2021-10-29 00:51:43 +03:00
|
|
|
case psym_switch_expr:
|
2021-10-20 08:26:46 +03:00
|
|
|
case_ind = ps.s_case_ind_level[ps.tos - 1];
|
2021-03-09 21:21:01 +03:00
|
|
|
/* FALLTHROUGH */
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
case psym_decl: /* finish of a declaration */
|
2021-10-29 00:51:43 +03:00
|
|
|
case psym_if_expr_stmt_else:
|
|
|
|
case psym_for_exprs:
|
|
|
|
case psym_while_expr:
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
ps.s_sym[--ps.tos] = psym_stmt;
|
2021-10-20 08:26:46 +03:00
|
|
|
ps.ind_level_follow = ps.s_ind_level[ps.tos];
|
2021-03-09 21:21:01 +03:00
|
|
|
return true;
|
|
|
|
|
2021-10-29 00:51:43 +03:00
|
|
|
default:
|
2021-03-09 21:21:01 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Repeatedly try to reduce the top two symbols on the parse stack to a
|
|
|
|
* single symbol, until no more reductions are possible.
|
1997-10-19 07:17:12 +04:00
|
|
|
*
|
1993-04-09 16:58:42 +04:00
|
|
|
* On each reduction, ps.i_l_follow (the indentation for the following line)
|
|
|
|
* is set to the indentation level associated with the old TOS.
|
|
|
|
*/
|
2019-04-04 18:22:13 +03:00
|
|
|
static void
|
2002-05-27 02:53:38 +04:00
|
|
|
reduce(void)
|
1993-04-09 16:58:42 +04:00
|
|
|
{
|
2021-03-09 21:21:01 +03:00
|
|
|
again:
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
if (ps.s_sym[ps.tos] == psym_stmt) {
|
2021-03-09 21:21:01 +03:00
|
|
|
if (reduce_stmt())
|
|
|
|
goto again;
|
indent: split type token_type into 3 separate types
Previously, token_type was used for 3 different purposes:
1. symbol types from the lexer
2. symbol types on the parser stack
3. kind of control statement for 'if (expr)' and similar statements
Splitting the 41 constants into separate types makes it immediately
clear that the parser stack never handles comments, preprocessing lines,
newlines, form feeds, the inner structure of expressions.
Previously, the constant switch_expr was especially confusing since it
was used for 3 different purposes: when returned from lexi, it
represented the keyword 'switch', in the parser stack it represented
'switch (expr)', and it was used for a statement head as well.
The only overlap between the lexer symbols and the parser symbols are
'{' and '}', and the keywords 'do' and 'else'. To increase confusion,
the constants of the previous token_type were in apparently random
order and before 2021, they had cryptic, highly abbreviated names.
No functional change.
2021-10-25 03:54:37 +03:00
|
|
|
} else if (ps.s_sym[ps.tos] == psym_while_expr) {
|
|
|
|
if (ps.s_sym[ps.tos - 1] == psym_do_stmt) {
|
2021-03-09 21:21:01 +03:00
|
|
|
ps.tos -= 2;
|
|
|
|
goto again;
|
1993-04-09 16:58:42 +04:00
|
|
|
}
|
2019-04-04 18:22:13 +03:00
|
|
|
}
|
1993-04-09 16:58:42 +04:00
|
|
|
}
|