From 729439607ad210dbb446e31754e8627d7e3f7dda Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <msawada@postgresql.org>
Date: Thu, 25 Jan 2024 10:57:41 +0900
Subject: [PATCH] Add progress reporting of skipped tuples during COPY FROM.

9e2d870119 enabled the COPY command to skip malformed data, however
there was no visibility into how many tuples were actually skipped
during the COPY FROM.

This commit adds a new "tuples_skipped" column to
pg_stat_progress_copy view to report the number of tuples that were
skipped because they contain malformed data.

Bump catalog version.

Author: Atsushi Torikoshi
Reviewed-by: Masahiko Sawada
Discussion: https://postgr.es/m/d12fd8c99adcae2744212cb23feff6ed%40oss.nttdata.com
---
 doc/src/sgml/monitoring.sgml         | 12 ++++++++++++
 src/backend/catalog/system_views.sql |  3 ++-
 src/backend/commands/copyfrom.c      |  5 +++++
 src/include/catalog/catversion.h     |  2 +-
 src/include/commands/progress.h      |  1 +
 src/test/regress/expected/rules.out  |  3 ++-
 6 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 6e74138a69..d9b8b37585 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -5780,6 +5780,18 @@ FROM pg_stat_get_backend_idset() AS backendid;
        <command>WHERE</command> clause of the <command>COPY</command> command.
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>tuples_skipped</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of tuples skipped because they contain malformed data.
+       This counter only advances when a value other than
+       <literal>stop</literal> is specified to the <literal>ON_ERROR</literal>
+       option.
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index e43e36f5ac..6288270e2b 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1318,7 +1318,8 @@ CREATE VIEW pg_stat_progress_copy AS
         S.param1 AS bytes_processed,
         S.param2 AS bytes_total,
         S.param3 AS tuples_processed,
-        S.param4 AS tuples_excluded
+        S.param4 AS tuples_excluded,
+        S.param7 AS tuples_skipped
     FROM pg_stat_get_progress_info('COPY') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 173a736ad5..1fe70b9133 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -650,6 +650,7 @@ CopyFrom(CopyFromState cstate)
 	CopyMultiInsertInfo multiInsertInfo = {0};	/* pacify compiler */
 	int64		processed = 0;
 	int64		excluded = 0;
+	int64		skipped = 0;
 	bool		has_before_insert_row_trig;
 	bool		has_instead_insert_row_trig;
 	bool		leafpart_use_multi_insert = false;
@@ -1012,6 +1013,10 @@ CopyFrom(CopyFromState cstate)
 				 */
 				cstate->escontext->error_occurred = false;
 
+			/* Report that this tuple was skipped by the ON_ERROR clause */
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED,
+										 ++skipped);
+
 			continue;
 		}
 
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 43d83672a6..23944db9e6 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202401241
+#define CATALOG_VERSION_NO	202401251
 
 #endif
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index a458c8c50a..73afa77a9c 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -142,6 +142,7 @@
 #define PROGRESS_COPY_TUPLES_EXCLUDED 3
 #define PROGRESS_COPY_COMMAND 4
 #define PROGRESS_COPY_TYPE 5
+#define PROGRESS_COPY_TUPLES_SKIPPED 6
 
 /* Commands of COPY (as advertised via PROGRESS_COPY_COMMAND) */
 #define PROGRESS_COPY_COMMAND_FROM 1
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 55f2e95352..5e846b01e6 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1988,7 +1988,8 @@ pg_stat_progress_copy| SELECT s.pid,
     s.param1 AS bytes_processed,
     s.param2 AS bytes_total,
     s.param3 AS tuples_processed,
-    s.param4 AS tuples_excluded
+    s.param4 AS tuples_excluded,
+    s.param7 AS tuples_skipped
    FROM (pg_stat_get_progress_info('COPY'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_progress_create_index| SELECT s.pid,