From f5a227895e178bf528b18f82bbe554435fb3e64f Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Mon, 1 Apr 2024 15:25:25 +0900 Subject: [PATCH] Add new COPY option LOG_VERBOSITY. This commit adds a new COPY option LOG_VERBOSITY, which controls the amount of messages emitted during processing. Valid values are 'default' and 'verbose'. This is currently used in COPY FROM when ON_ERROR option is set to ignore. If 'verbose' is specified, a NOTICE message is emitted for each discarded row, providing additional information such as line number, column name, and the malformed value. This helps users to identify problematic rows that failed to load. Author: Bharath Rupireddy Reviewed-by: Michael Paquier, Atsushi Torikoshi, Masahiko Sawada Discussion: https://www.postgresql.org/message-id/CALj2ACUk700cYhx1ATRQyRw-fBM%2BaRo6auRAitKGff7XNmYfqQ%40mail.gmail.com --- doc/src/sgml/ref/copy.sgml | 25 +++++++++++++++-- src/backend/commands/copy.c | 32 ++++++++++++++++++++++ src/backend/commands/copyfrom.c | 10 +++---- src/backend/commands/copyfromparse.c | 35 ++++++++++++++++++++++++ src/backend/parser/gram.y | 1 + src/bin/psql/tab-complete.c | 6 +++- src/include/commands/copy.h | 11 ++++++++ src/test/regress/expected/copy2.out | 41 +++++++++++++++++++++++++++- src/test/regress/sql/copy2.sql | 24 +++++++++++++++- src/tools/pgindent/typedefs.list | 1 + 10 files changed, 175 insertions(+), 11 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 6c83e30ed0..33ce7c4ea6 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -45,6 +45,7 @@ COPY { table_name [ ( column_name [, ...] ) | * } ON_ERROR 'error_action' ENCODING 'encoding_name' + LOG_VERBOSITY mode @@ -400,8 +401,12 @@ COPY { table_name [ ( FORMAT is text or csv. - A NOTICE message containing the ignored row count is emitted at the end - of the COPY FROM if at least one row was discarded. + A NOTICE message containing the ignored row count is + emitted at the end of the COPY FROM if at least one + row was discarded. When LOG_VERBOSITY option is set to + verbose, a NOTICE message + containing the line of the input file and the column name whose input + conversion has failed is emitted for each discarded row. @@ -418,6 +423,22 @@ COPY { table_name [ ( + + LOG_VERBOSITY + + + Specify the amount of messages emitted by a COPY + command: default or verbose. If + verbose is specified, additional messages are emitted + during processing. + + + This is currently used in COPY FROM command when + ON_ERROR option is set to ignore. + + + + WHERE diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 28cf8b040a..f75e1d700d 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -422,6 +422,30 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) return COPY_ON_ERROR_STOP; /* keep compiler quiet */ } +/* + * Extract a CopyLogVerbosityChoice value from a DefElem. + */ +static CopyLogVerbosityChoice +defGetCopyLogVerbosityChoice(DefElem *def, ParseState *pstate) +{ + char *sval; + + /* + * Allow "default", or "verbose" values. + */ + sval = defGetString(def); + if (pg_strcasecmp(sval, "default") == 0) + return COPY_LOG_VERBOSITY_DEFAULT; + if (pg_strcasecmp(sval, "verbose") == 0) + return COPY_LOG_VERBOSITY_VERBOSE; + + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY LOG_VERBOSITY \"%s\" not recognized", sval), + parser_errposition(pstate, def->location))); + return COPY_LOG_VERBOSITY_DEFAULT; /* keep compiler quiet */ +} + /* * Process the statement option list for COPY. * @@ -448,6 +472,7 @@ ProcessCopyOptions(ParseState *pstate, bool freeze_specified = false; bool header_specified = false; bool on_error_specified = false; + bool log_verbosity_specified = false; ListCell *option; /* Support external use for option sanity checking */ @@ -607,6 +632,13 @@ ProcessCopyOptions(ParseState *pstate, on_error_specified = true; opts_out->on_error = defGetCopyOnErrorChoice(defel, pstate, is_from); } + else if (strcmp(defel->defname, "log_verbosity") == 0) + { + if (log_verbosity_specified) + errorConflictingDefElem(defel, pstate); + log_verbosity_specified = true; + opts_out->log_verbosity = defGetCopyLogVerbosityChoice(defel, pstate); + } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index b673636977..9d2900041e 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -101,8 +101,6 @@ typedef struct CopyMultiInsertInfo /* non-export function prototypes */ -static char *limit_printout_length(const char *str); - static void ClosePipeFromProgram(CopyFromState cstate); /* @@ -141,7 +139,7 @@ CopyFromErrorCallback(void *arg) /* error is relevant to a particular column */ char *attval; - attval = limit_printout_length(cstate->cur_attval); + attval = CopyLimitPrintoutLength(cstate->cur_attval); errcontext("COPY %s, line %llu, column %s: \"%s\"", cstate->cur_relname, (unsigned long long) cstate->cur_lineno, @@ -168,7 +166,7 @@ CopyFromErrorCallback(void *arg) { char *lineval; - lineval = limit_printout_length(cstate->line_buf.data); + lineval = CopyLimitPrintoutLength(cstate->line_buf.data); errcontext("COPY %s, line %llu: \"%s\"", cstate->cur_relname, (unsigned long long) cstate->cur_lineno, lineval); @@ -189,8 +187,8 @@ CopyFromErrorCallback(void *arg) * * Returns a pstrdup'd copy of the input. */ -static char * -limit_printout_length(const char *str) +char * +CopyLimitPrintoutLength(const char *str) { #define MAX_COPY_DATA_DISPLAY 100 diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 5682d5d054..7ddd27f5c6 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -967,7 +967,42 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, (Node *) cstate->escontext, &values[m])) { + Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP); + cstate->num_errors++; + + if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE) + { + /* + * Since we emit line number and column info in the below + * notice message, we suppress error context information + * other than the relation name. + */ + Assert(!cstate->relname_only); + cstate->relname_only = true; + + if (cstate->cur_attval) + { + char *attval; + + attval = CopyLimitPrintoutLength(cstate->cur_attval); + ereport(NOTICE, + errmsg("skipping row due to data type incompatibility at line %llu for column %s: \"%s\"", + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname, + attval)); + pfree(attval); + } + else + ereport(NOTICE, + errmsg("skipping row due to data type incompatibility at line %llu for column %s: null input", + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname)); + + /* reset relname_only */ + cstate->relname_only = false; + } + return true; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 682748eb4b..f1af6147c3 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -3530,6 +3530,7 @@ copy_generic_opt_arg: opt_boolean_or_string { $$ = (Node *) makeString($1); } | NumericOnly { $$ = (Node *) $1; } | '*' { $$ = (Node *) makeNode(A_Star); } + | DEFAULT { $$ = (Node *) makeString("default"); } | '(' copy_generic_opt_arg_list ')' { $$ = (Node *) $2; } | /* EMPTY */ { $$ = NULL; } ; diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index fc6865fc70..82eb3955ab 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2904,7 +2904,7 @@ psql_completion(const char *text, int start, int end) COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL", "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE", "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT", - "ON_ERROR"); + "ON_ERROR", "LOG_VERBOSITY"); /* Complete COPY FROM|TO filename WITH (FORMAT */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT")) @@ -2914,6 +2914,10 @@ psql_completion(const char *text, int start, int end) else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "ON_ERROR")) COMPLETE_WITH("stop", "ignore"); + /* Complete COPY FROM filename WITH (LOG_VERBOSITY */ + else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "LOG_VERBOSITY")) + COMPLETE_WITH("default", "verbose"); + /* Complete COPY FROM WITH () */ else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAny, "WITH", MatchAny)) COMPLETE_WITH("WHERE"); diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index b3da3cb0be..141fd48dc1 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -40,6 +40,15 @@ typedef enum CopyOnErrorChoice COPY_ON_ERROR_IGNORE, /* ignore errors */ } CopyOnErrorChoice; +/* + * Represents verbosity of logged messages by COPY command. + */ +typedef enum CopyLogVerbosityChoice +{ + COPY_LOG_VERBOSITY_DEFAULT = 0, /* logs no additional messages, default */ + COPY_LOG_VERBOSITY_VERBOSE, /* logs additional messages */ +} CopyLogVerbosityChoice; + /* * A struct to hold COPY options, in a parsed form. All of these are related * to formatting, except for 'freeze', which doesn't really belong here, but @@ -73,6 +82,7 @@ typedef struct CopyFormatOptions bool *force_null_flags; /* per-column CSV FN flags */ bool convert_selectively; /* do selective binary conversion? */ CopyOnErrorChoice on_error; /* what to do when error happened */ + CopyLogVerbosityChoice log_verbosity; /* verbosity of logged messages */ List *convert_select; /* list of column names (can be NIL) */ } CopyFormatOptions; @@ -97,6 +107,7 @@ extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, extern bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields); extern void CopyFromErrorCallback(void *arg); +extern char *CopyLimitPrintoutLength(const char *str); extern uint64 CopyFrom(CopyFromState cstate); diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index f98c2d1c4e..931542f268 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -81,6 +81,10 @@ COPY x from stdin (on_error ignore, on_error ignore); ERROR: conflicting or redundant options LINE 1: COPY x from stdin (on_error ignore, on_error ignore); ^ +COPY x from stdin (log_verbosity default, log_verbosity verbose); +ERROR: conflicting or redundant options +LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb... + ^ -- incorrect options COPY x to stdin (format BINARY, delimiter ','); ERROR: cannot specify DELIMITER in BINARY mode @@ -108,6 +112,10 @@ COPY x to stdin (format BINARY, on_error unsupported); ERROR: COPY ON_ERROR cannot be used with COPY TO LINE 1: COPY x to stdin (format BINARY, on_error unsupported); ^ +COPY x to stdout (log_verbosity unsupported); +ERROR: COPY LOG_VERBOSITY "unsupported" not recognized +LINE 1: COPY x to stdout (log_verbosity unsupported); + ^ -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; ERROR: column "d" specified more than once @@ -729,8 +737,31 @@ CREATE TABLE check_ign_err (n int, m int[], k int); COPY check_ign_err FROM STDIN WITH (on_error stop); ERROR: invalid input syntax for type integer: "a" CONTEXT: COPY check_ign_err, line 2, column n: "a" -COPY check_ign_err FROM STDIN WITH (on_error ignore); +-- want context for notices +\set SHOW_CONTEXT always +COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose); +NOTICE: skipping row due to data type incompatibility at line 2 for column n: "a" +CONTEXT: COPY check_ign_err +NOTICE: skipping row due to data type incompatibility at line 3 for column k: "3333333333" +CONTEXT: COPY check_ign_err +NOTICE: skipping row due to data type incompatibility at line 4 for column m: "{a, 4}" +CONTEXT: COPY check_ign_err +NOTICE: skipping row due to data type incompatibility at line 5 for column n: "" +CONTEXT: COPY check_ign_err +NOTICE: skipping row due to data type incompatibility at line 7 for column m: "a" +CONTEXT: COPY check_ign_err +NOTICE: skipping row due to data type incompatibility at line 8 for column k: "a" +CONTEXT: COPY check_ign_err NOTICE: 6 rows were skipped due to data type incompatibility +-- tests for on_error option with log_verbosity and null constraint via domain +CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; +CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); +COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); +NOTICE: skipping row due to data type incompatibility at line 2 for column l: null input +CONTEXT: COPY check_ign_err2 +NOTICE: 1 row was skipped due to data type incompatibility +-- reset context choice +\set SHOW_CONTEXT errors SELECT * FROM check_ign_err; n | m | k ---+-----+--- @@ -739,6 +770,12 @@ SELECT * FROM check_ign_err; 8 | {8} | 8 (3 rows) +SELECT * FROM check_ign_err2; + n | m | k | l +---+-----+---+------- + 1 | {1} | 1 | 'foo' +(1 row) + -- test datatype error that can't be handled as soft: should fail CREATE TABLE hard_err(foo widget); COPY hard_err FROM STDIN WITH (on_error ignore); @@ -767,6 +804,8 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE check_ign_err2; +DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; -- -- COPY FROM ... DEFAULT diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index afaaa37e52..8b14962194 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -67,6 +67,7 @@ COPY x from stdin (force_null (a), force_null (b)); COPY x from stdin (convert_selectively (a), convert_selectively (b)); COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii'); COPY x from stdin (on_error ignore, on_error ignore); +COPY x from stdin (log_verbosity default, log_verbosity verbose); -- incorrect options COPY x to stdin (format BINARY, delimiter ','); @@ -80,6 +81,7 @@ COPY x to stdin (format CSV, force_not_null(a)); COPY x to stdout (format TEXT, force_null(a)); COPY x to stdin (format CSV, force_null(a)); COPY x to stdin (format BINARY, on_error unsupported); +COPY x to stdout (log_verbosity unsupported); -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; @@ -508,7 +510,11 @@ a {2} 2 5 {5} 5 \. -COPY check_ign_err FROM STDIN WITH (on_error ignore); + +-- want context for notices +\set SHOW_CONTEXT always + +COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose); 1 {1} 1 a {2} 2 3 {3} 3333333333 @@ -519,8 +525,22 @@ a {2} 2 7 {7} a 8 {8} 8 \. + +-- tests for on_error option with log_verbosity and null constraint via domain +CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; +CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); +COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); +1 {1} 1 'foo' +2 {2} 2 \N +\. + +-- reset context choice +\set SHOW_CONTEXT errors + SELECT * FROM check_ign_err; +SELECT * FROM check_ign_err2; + -- test datatype error that can't be handled as soft: should fail CREATE TABLE hard_err(foo widget); COPY hard_err FROM STDIN WITH (on_error ignore); @@ -552,6 +572,8 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE check_ign_err2; +DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; -- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a8d7bed411..9add48f992 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -480,6 +480,7 @@ CopyFromState CopyFromStateData CopyHeaderChoice CopyInsertMethod +CopyLogVerbosityChoice CopyMultiInsertBuffer CopyMultiInsertInfo CopyOnErrorChoice