From f391d9dc93a24923c57bb0e044161d3f0b840770 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 7 Oct 2024 12:22:10 -0400 Subject: [PATCH] Convert tab-complete's long else-if chain to a switch statement. Rename tab-complete.c to tab-complete.in.c, create the preprocessor script gen_tabcomplete.pl, and install Makefile/meson.build rules to create tab-complete.c from tab-complete.in.c. The preprocessor converts match_previous_words' else-if chain into a switch and populates tcpatterns[] with the data needed by the driver loop. The initial HeadMatches/TailMatches/Matches test in each else-if arm is now performed in a table-driven loop. Where we get a match, the corresponding switch case is invoked to see if the match succeeds. (It might not, if there were additional conditions in the original else-if test.) The total number of string comparisons done is just about the same as it was in the previous coding; however, now that we have table-driven logic underlying the handmade rules, there is room to improve that. For now I haven't bothered because tab completion is still plenty fast enough for human use. If the number of rules keeps increasing, we might someday need to do more in that area. The immediate benefit of all this thrashing is that C compilers frequently don't deal well with long else-if chains. On gcc 8.5.0, this reduces the compile time of tab-complete.c by about a factor of four, while MSVC is reported to crash outright with the previous coding. Discussion: https://postgr.es/m/2208466.1720729502@sss.pgh.pa.us --- src/bin/psql/.gitignore | 1 + src/bin/psql/Makefile | 5 +- src/bin/psql/gen_tabcomplete.pl | 306 ++++++++++++++++++ src/bin/psql/meson.build | 12 +- .../{tab-complete.c => tab-complete.in.c} | 2 +- 5 files changed, 323 insertions(+), 3 deletions(-) create mode 100644 src/bin/psql/gen_tabcomplete.pl rename src/bin/psql/{tab-complete.c => tab-complete.in.c} (99%) diff --git a/src/bin/psql/.gitignore b/src/bin/psql/.gitignore index 10b6dd3a6b..7272f6e35d 100644 --- a/src/bin/psql/.gitignore +++ b/src/bin/psql/.gitignore @@ -1,4 +1,5 @@ /psqlscanslash.c +/tab-complete.c /sql_help.h /sql_help.c /psql diff --git a/src/bin/psql/Makefile b/src/bin/psql/Makefile index 374c4c3ab8..62636d2663 100644 --- a/src/bin/psql/Makefile +++ b/src/bin/psql/Makefile @@ -62,6 +62,9 @@ psqlscanslash.c: FLEXFLAGS = -Cfe -p -p psqlscanslash.c: FLEX_NO_BACKUP=yes psqlscanslash.c: FLEX_FIX_WARNING=yes +tab-complete.c: gen_tabcomplete.pl tab-complete.in.c + $(PERL) $^ --outfile $@ + install: all installdirs $(INSTALL_PROGRAM) psql$(X) '$(DESTDIR)$(bindir)/psql$(X)' $(INSTALL_DATA) $(srcdir)/psqlrc.sample '$(DESTDIR)$(datadir)/psqlrc.sample' @@ -75,7 +78,7 @@ uninstall: clean distclean: rm -f psql$(X) $(OBJS) lex.backup rm -rf tmp_check - rm -f sql_help.h sql_help.c psqlscanslash.c + rm -f sql_help.h sql_help.c psqlscanslash.c tab-complete.c check: $(prove_check) diff --git a/src/bin/psql/gen_tabcomplete.pl b/src/bin/psql/gen_tabcomplete.pl new file mode 100644 index 0000000000..9e4c977cc5 --- /dev/null +++ b/src/bin/psql/gen_tabcomplete.pl @@ -0,0 +1,306 @@ +#---------------------------------------------------------------------- +# +# gen_tabcomplete.pl +# Perl script that transforms tab-complete.in.c to tab-complete.c. +# +# This script converts a C else-if chain into a switch statement. +# The else-if statements to be processed must appear at single-tab-stop +# indentation between lines reading +# /* BEGIN GEN_TABCOMPLETE */ +# /* END GEN_TABCOMPLETE */ +# The first clause in each if-condition must be a call of one of the +# functions Matches, HeadMatches, TailMatches, MatchesCS, HeadMatchesCS, +# or TailMatchesCS. Its argument(s) must be string literals or macros +# that expand to string literals or NULL. These clauses are removed from +# the code and replaced by "break; case N:", where N is a unique number +# for each such case label. +# The BEGIN GEN_TABCOMPLETE and END GEN_TABCOMPLETE lines are replaced +# by "switch (pattern_id) {" and "}" wrapping to make a valid switch. +# The remainder of the code is copied verbatim. +# +# An if-condition can also be an OR ("||") of several *Matches function +# calls, or it can be an AND ("&&") of a *Matches call with some other +# condition. For example, +# +# else if (HeadMatches("DROP", "DATABASE") && ends_with(prev_wd, '(')) +# +# will be transformed to +# +# break; +# case N: +# if (ends_with(prev_wd, '(')) +# +# In addition, there must be one input line that reads +# /* Insert tab-completion pattern data here. */ +# This line is replaced in the output file by macro calls, one for each +# replaced match condition. The output for the above example would be +# TCPAT(N, HeadMatch, "DROP", "DATABASE"), +# where N is the replacement case label, "HeadMatch" is the original +# function name minus "es", and the rest are the function arguments. +# The tab-completion data line must appear before BEGIN GEN_TABCOMPLETE. +# +# +# Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/bin/psql/gen_tabcomplete.pl +# +#---------------------------------------------------------------------- + +use strict; +use warnings FATAL => 'all'; +use Getopt::Long; + +my $outfile = ''; + +GetOptions('outfile=s' => \$outfile) or die "$0: wrong arguments"; + +open my $infh, '<', $ARGV[0] + or die "$0: could not open input file '$ARGV[0]': $!\n"; + +my $outfh; +if ($outfile) +{ + open $outfh, '>', $outfile + or die "$0: could not open output file '$outfile': $!\n"; +} +else +{ + $outfh = *STDOUT; +} + +# Opening boilerplate for output file. +printf $outfh <) +{ + chomp; + last if m|^\s*/\* Insert tab-completion pattern data here\. \*/\s*$|; + print $outfh "$_\n"; +} + +# $table_data collects what we will substitute for the "pattern data" line. +my $table_data = ''; +# $output_code collects code that we can't emit till after $table_data. +my $output_code = ''; +# last case label assigned +my $last_case_label = 0; + +# We emit #line directives to keep the output file's line numbering in sync +# with the line numbering of the original, to simplify compiler error message +# reading and debugging. +my $next_line_no = $. + 1; +$output_code .= "#line ${next_line_no} \"tab-complete.in.c\"\n"; + +# Scan until we find the BEGIN GEN_TABCOMPLETE line. +# Add the scanned code to $output_code verbatim. +while (<$infh>) +{ + chomp; + last if m|^\s*/\* BEGIN GEN_TABCOMPLETE \*/\s*$|; + $output_code .= $_ . "\n"; +} + +# Emit the switch-starting lines. +$output_code .= "\tswitch (pattern_id)\n"; +$output_code .= "\t{\n"; + +# Keep line numbering in sync. +$next_line_no = $. + 1; +$output_code .= "#line ${next_line_no} \"tab-complete.in.c\"\n"; + +# Scan input file, collecting outer-level else-if conditions +# to pass to process_else_if. +# Lines that aren't else-if conditions go to $output_code verbatim. +# True if we're handling a multiline else-if condition +my $in_else_if = 0; +# The accumulated line +my $else_if_line; +my $else_if_lineno; + +while (<$infh>) +{ + chomp; + last if m|^\s*/\* END GEN_TABCOMPLETE \*/\s*$|; + if ($in_else_if) + { + my $rest = $_; + # collapse leading whitespace + $rest =~ s/^\s+//; + $else_if_line .= ' ' . $rest; + # Double right paren is currently sufficient to detect completion + if ($else_if_line =~ m/\)\)$/) + { + process_else_if($else_if_line, $else_if_lineno, $.); + $in_else_if = 0; + } + } + elsif (m/^\telse if \(/) + { + $else_if_line = $_; + $else_if_lineno = $.; + # Double right paren is currently sufficient to detect completion + if ($else_if_line =~ m/\)\)$/) + { + process_else_if($else_if_line, $else_if_lineno, $.); + } + else + { + $in_else_if = 1; + } + } + else + { + $output_code .= $_ . "\n"; + } +} + +die "unfinished else-if" if $in_else_if; + +# Emit the switch-ending lines. +$output_code .= "\tbreak;\n"; +$output_code .= "\tdefault:\n"; +$output_code .= "\t\tAssert(false);\n"; +$output_code .= "\t\tbreak;\n"; +$output_code .= "\t}\n"; + +# Keep line numbering in sync. +$next_line_no = $. + 1; +$output_code .= "#line ${next_line_no} \"tab-complete.in.c\"\n"; + +# Scan the rest, adding it to $output_code verbatim. +while (<$infh>) +{ + chomp; + $output_code .= $_ . "\n"; +} + +# Dump out the table data. +print $outfh $table_data; + +# Dump out the modified code, and we're done! +print $outfh $output_code; + +close($infh); +close($outfh); + +# Disassemble an else-if condition. +# Add the generated table-contents macro(s) to $table_data, +# and add the replacement case label(s) to $output_code. +sub process_else_if +{ + my ($else_if_line, $else_if_lineno, $end_lineno) = @_; + + # Strip the initial "else if (", which we know is there + $else_if_line =~ s/^\telse if \(//; + + # Handle OR'd conditions + my $isfirst = 1; + while ($else_if_line =~ + s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\s*\|\|\s*// + ) + { + my $typ = $1; + my $cs = $2; + my $args = $3; + process_match($typ, $cs, $args, $else_if_lineno, $isfirst); + $isfirst = 0; + } + + # Check for AND'd condition + if ($else_if_line =~ + s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\s*&&\s*// + ) + { + my $typ = $1; + my $cs = $2; + my $args = $3; + warn + "could not process OR/ANDed if condition at line $else_if_lineno\n" + if !$isfirst; + process_match($typ, $cs, $args, $else_if_lineno, $isfirst); + $isfirst = 0; + # approximate line positioning of AND'd condition + $output_code .= "#line ${end_lineno} \"tab-complete.in.c\"\n"; + $output_code .= "\tif ($else_if_line\n"; + } + elsif ($else_if_line =~ + s/^(Head|Tail|)Matches(CS|)\((("[^"]*"|MatchAnyExcept\("[^"]*"\)|[A-Za-z,\s])+)\)\)$// + ) + { + my $typ = $1; + my $cs = $2; + my $args = $3; + process_match($typ, $cs, $args, $else_if_lineno, $isfirst); + $isfirst = 0; + } + else + { + warn + "could not process if condition at line $else_if_lineno: the rest looks like $else_if_line\n"; + $output_code .= "\telse if ($else_if_line\n"; + } + + # Keep line numbering in sync. + if ($end_lineno != $else_if_lineno) + { + my $next_lineno = $end_lineno + 1; + $output_code .= "#line ${next_lineno} \"tab-complete.in.c\"\n"; + } +} + +sub process_match +{ + my ($typ, $cs, $args, $lineno, $isfirst) = @_; + + # Assign a new case label only for the first pattern in an OR group. + if ($isfirst) + { + $last_case_label++; + + # We intentionally keep the "break;" and the "case" on one line, so + # that they have the same line number as the original "else if"'s + # first line. This avoids misleading displays in, e.g., lcov. + $output_code .= "\t"; + $output_code .= "break; " if $last_case_label > 1; + $output_code .= "case $last_case_label:\n"; + } + + $table_data .= + "\tTCPAT(${last_case_label}, ${typ}Match${cs}, ${args}),\n"; +} + + +sub usage +{ + die <] input_file + --outfile Output file (default is stdout) + +gen_tabcomplete.pl transforms tab-complete.in.c to tab-complete.c. +EOM +} diff --git a/src/bin/psql/meson.build b/src/bin/psql/meson.build index f3a6392138..b7c026c900 100644 --- a/src/bin/psql/meson.build +++ b/src/bin/psql/meson.build @@ -13,7 +13,6 @@ psql_sources = files( 'prompt.c', 'startup.c', 'stringutils.c', - 'tab-complete.c', 'variables.c', ) @@ -24,6 +23,17 @@ psqlscanslash = custom_target('psqlscanslash', generated_sources += psqlscanslash psql_sources += psqlscanslash +tabcomplete = custom_target('tabcomplete', + input: 'tab-complete.in.c', + output: 'tab-complete.c', + command: [ + perl, files('gen_tabcomplete.pl'), files('tab-complete.in.c'), + '--outfile', '@OUTPUT@', '@INPUT@', + ], +) +generated_sources += tabcomplete +psql_sources += tabcomplete + sql_help = custom_target('psql_help', output: ['sql_help.c', 'sql_help.h'], depfile: 'sql_help.dep', diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.in.c similarity index 99% rename from src/bin/psql/tab-complete.c rename to src/bin/psql/tab-complete.in.c index cc3a8b7607..b4efb127dc 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.in.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2024, PostgreSQL Global Development Group * - * src/bin/psql/tab-complete.c + * src/bin/psql/tab-complete.in.c * * Note: this will compile and work as-is if SWITCH_CONVERSION_APPLIED * is not defined. However, the expected usage is that it's first run