Add header matching mode to COPY FROM
COPY FROM supports the HEADER option to silently discard the header line from a CSV or text file. It is possible to load by mistake a file that matches the expected format, for example, if two text columns have been swapped, resulting in garbage in the database. This adds a new option value HEADER MATCH that checks the column names in the header line against the actual column names and errors out if they do not match. Author: Rémi Lapeyre <remi.lapeyre@lenstra.fr> Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Discussion: https://www.postgresql.org/message-id/flat/CAF1-J-0PtCWMeLtswwGV2M70U26n4g33gpe1rcKQqe6wVQDrFA@mail.gmail.com
This commit is contained in:
parent
edcedcc2c7
commit
072132f04e
@ -113,6 +113,21 @@ CREATE FOREIGN TABLE agg_bad (
|
|||||||
) SERVER file_server
|
) SERVER file_server
|
||||||
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
|
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
|
||||||
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
|
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
|
||||||
|
-- test header matching
|
||||||
|
\set filename :abs_srcdir '/data/list1.csv'
|
||||||
|
CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server
|
||||||
|
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
|
||||||
|
SELECT * FROM header_match;
|
||||||
|
1 | foo
|
||||||
|
---+-----
|
||||||
|
1 | bar
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server
|
||||||
|
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
|
||||||
|
SELECT * FROM header_doesnt_match; -- ERROR
|
||||||
|
ERROR: column name mismatch in header line field 1: got "1", expected "a"
|
||||||
|
CONTEXT: COPY header_doesnt_match, line 1: "1,foo"
|
||||||
-- per-column options tests
|
-- per-column options tests
|
||||||
\set filename :abs_srcdir '/data/text.csv'
|
\set filename :abs_srcdir '/data/text.csv'
|
||||||
CREATE FOREIGN TABLE text_csv (
|
CREATE FOREIGN TABLE text_csv (
|
||||||
@ -464,12 +479,14 @@ SET ROLE regress_file_fdw_superuser;
|
|||||||
-- cleanup
|
-- cleanup
|
||||||
RESET ROLE;
|
RESET ROLE;
|
||||||
DROP EXTENSION file_fdw CASCADE;
|
DROP EXTENSION file_fdw CASCADE;
|
||||||
NOTICE: drop cascades to 7 other objects
|
NOTICE: drop cascades to 9 other objects
|
||||||
DETAIL: drop cascades to server file_server
|
DETAIL: drop cascades to server file_server
|
||||||
drop cascades to user mapping for regress_file_fdw_superuser on server file_server
|
drop cascades to user mapping for regress_file_fdw_superuser on server file_server
|
||||||
drop cascades to user mapping for regress_no_priv_user on server file_server
|
drop cascades to user mapping for regress_no_priv_user on server file_server
|
||||||
drop cascades to foreign table agg_text
|
drop cascades to foreign table agg_text
|
||||||
drop cascades to foreign table agg_csv
|
drop cascades to foreign table agg_csv
|
||||||
drop cascades to foreign table agg_bad
|
drop cascades to foreign table agg_bad
|
||||||
|
drop cascades to foreign table header_match
|
||||||
|
drop cascades to foreign table header_doesnt_match
|
||||||
drop cascades to foreign table text_csv
|
drop cascades to foreign table text_csv
|
||||||
DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
|
DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;
|
||||||
|
@ -103,6 +103,15 @@ CREATE FOREIGN TABLE agg_bad (
|
|||||||
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
|
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
|
||||||
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
|
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
|
||||||
|
|
||||||
|
-- test header matching
|
||||||
|
\set filename :abs_srcdir '/data/list1.csv'
|
||||||
|
CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server
|
||||||
|
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
|
||||||
|
SELECT * FROM header_match;
|
||||||
|
CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server
|
||||||
|
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
|
||||||
|
SELECT * FROM header_doesnt_match; -- ERROR
|
||||||
|
|
||||||
-- per-column options tests
|
-- per-column options tests
|
||||||
\set filename :abs_srcdir '/data/text.csv'
|
\set filename :abs_srcdir '/data/text.csv'
|
||||||
CREATE FOREIGN TABLE text_csv (
|
CREATE FOREIGN TABLE text_csv (
|
||||||
|
@ -36,7 +36,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
|||||||
FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
|
FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
|
||||||
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
|
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
|
||||||
NULL '<replaceable class="parameter">null_string</replaceable>'
|
NULL '<replaceable class="parameter">null_string</replaceable>'
|
||||||
HEADER [ <replaceable class="parameter">boolean</replaceable> ]
|
HEADER [ <replaceable class="parameter">boolean</replaceable> | <literal>match</literal> ]
|
||||||
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
|
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
|
||||||
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
|
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
|
||||||
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
|
||||||
@ -276,7 +276,11 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
|
|||||||
<para>
|
<para>
|
||||||
Specifies that the file contains a header line with the names of each
|
Specifies that the file contains a header line with the names of each
|
||||||
column in the file. On output, the first line contains the column
|
column in the file. On output, the first line contains the column
|
||||||
names from the table, and on input, the first line is ignored.
|
names from the table. On input, the first line is discarded when this
|
||||||
|
option is set to <literal>true</literal> (or equivalent Boolean value).
|
||||||
|
If this option is set to <literal>match</literal>, the number and names
|
||||||
|
of the columns in the header line must match the actual column names of
|
||||||
|
the table, otherwise an error is raised.
|
||||||
This option is not allowed when using <literal>binary</literal> format.
|
This option is not allowed when using <literal>binary</literal> format.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
@ -313,6 +313,64 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
|
|||||||
table_close(rel, NoLock);
|
table_close(rel, NoLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extract a CopyHeaderChoice value from a DefElem. This is like
|
||||||
|
* defGetBoolean() but also accepts the special value "match".
|
||||||
|
*/
|
||||||
|
static CopyHeaderChoice
|
||||||
|
defGetCopyHeaderChoice(DefElem *def)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If no parameter given, assume "true" is meant.
|
||||||
|
*/
|
||||||
|
if (def->arg == NULL)
|
||||||
|
return COPY_HEADER_TRUE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow 0, 1, "true", "false", "on", "off", or "match".
|
||||||
|
*/
|
||||||
|
switch (nodeTag(def->arg))
|
||||||
|
{
|
||||||
|
case T_Integer:
|
||||||
|
switch (intVal(def->arg))
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return COPY_HEADER_FALSE;
|
||||||
|
case 1:
|
||||||
|
return COPY_HEADER_TRUE;
|
||||||
|
default:
|
||||||
|
/* otherwise, error out below */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
char *sval = defGetString(def);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The set of strings accepted here should match up with the
|
||||||
|
* grammar's opt_boolean_or_string production.
|
||||||
|
*/
|
||||||
|
if (pg_strcasecmp(sval, "true") == 0)
|
||||||
|
return COPY_HEADER_TRUE;
|
||||||
|
if (pg_strcasecmp(sval, "false") == 0)
|
||||||
|
return COPY_HEADER_FALSE;
|
||||||
|
if (pg_strcasecmp(sval, "on") == 0)
|
||||||
|
return COPY_HEADER_TRUE;
|
||||||
|
if (pg_strcasecmp(sval, "off") == 0)
|
||||||
|
return COPY_HEADER_FALSE;
|
||||||
|
if (pg_strcasecmp(sval, "match") == 0)
|
||||||
|
return COPY_HEADER_MATCH;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||||
|
errmsg("%s requires a Boolean value or \"match\"",
|
||||||
|
def->defname)));
|
||||||
|
return COPY_HEADER_FALSE; /* keep compiler quiet */
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Process the statement option list for COPY.
|
* Process the statement option list for COPY.
|
||||||
*
|
*
|
||||||
@ -394,7 +452,7 @@ ProcessCopyOptions(ParseState *pstate,
|
|||||||
if (header_specified)
|
if (header_specified)
|
||||||
errorConflictingDefElem(defel, pstate);
|
errorConflictingDefElem(defel, pstate);
|
||||||
header_specified = true;
|
header_specified = true;
|
||||||
opts_out->header_line = defGetBoolean(defel);
|
opts_out->header_line = defGetCopyHeaderChoice(defel);
|
||||||
}
|
}
|
||||||
else if (strcmp(defel->defname, "quote") == 0)
|
else if (strcmp(defel->defname, "quote") == 0)
|
||||||
{
|
{
|
||||||
|
@ -72,6 +72,7 @@
|
|||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
#include "pgstat.h"
|
#include "pgstat.h"
|
||||||
#include "port/pg_bswap.h"
|
#include "port/pg_bswap.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
|
|
||||||
@ -758,12 +759,58 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
|
|||||||
/* only available for text or csv input */
|
/* only available for text or csv input */
|
||||||
Assert(!cstate->opts.binary);
|
Assert(!cstate->opts.binary);
|
||||||
|
|
||||||
/* on input just throw the header line away */
|
/* on input check that the header line is correct if needed */
|
||||||
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
|
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
|
||||||
{
|
{
|
||||||
|
ListCell *cur;
|
||||||
|
TupleDesc tupDesc;
|
||||||
|
|
||||||
|
tupDesc = RelationGetDescr(cstate->rel);
|
||||||
|
|
||||||
cstate->cur_lineno++;
|
cstate->cur_lineno++;
|
||||||
if (CopyReadLine(cstate))
|
done = CopyReadLine(cstate);
|
||||||
return false; /* done */
|
|
||||||
|
if (cstate->opts.header_line == COPY_HEADER_MATCH)
|
||||||
|
{
|
||||||
|
int fldnum;
|
||||||
|
|
||||||
|
if (cstate->opts.csv_mode)
|
||||||
|
fldct = CopyReadAttributesCSV(cstate);
|
||||||
|
else
|
||||||
|
fldct = CopyReadAttributesText(cstate);
|
||||||
|
|
||||||
|
if (fldct != list_length(cstate->attnumlist))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
|
||||||
|
errmsg("wrong number of fields in header line: field count is %d, expected %d",
|
||||||
|
fldct, list_length(cstate->attnumlist))));
|
||||||
|
|
||||||
|
fldnum = 0;
|
||||||
|
foreach(cur, cstate->attnumlist)
|
||||||
|
{
|
||||||
|
int attnum = lfirst_int(cur);
|
||||||
|
char *colName = cstate->raw_fields[attnum - 1];
|
||||||
|
Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
|
||||||
|
|
||||||
|
fldnum++;
|
||||||
|
|
||||||
|
if (colName == NULL)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
|
||||||
|
errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
|
||||||
|
fldnum, cstate->opts.null_print, NameStr(attr->attname))));
|
||||||
|
|
||||||
|
if (namestrcmp(&attr->attname, colName) != 0) {
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
|
||||||
|
errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
|
||||||
|
fldnum, colName, NameStr(attr->attname))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (done)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cstate->cur_lineno++;
|
cstate->cur_lineno++;
|
||||||
|
@ -19,6 +19,17 @@
|
|||||||
#include "parser/parse_node.h"
|
#include "parser/parse_node.h"
|
||||||
#include "tcop/dest.h"
|
#include "tcop/dest.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Represents whether a header line should be present, and whether it must
|
||||||
|
* match the actual names (which implies "true").
|
||||||
|
*/
|
||||||
|
typedef enum CopyHeaderChoice
|
||||||
|
{
|
||||||
|
COPY_HEADER_FALSE = 0,
|
||||||
|
COPY_HEADER_TRUE,
|
||||||
|
COPY_HEADER_MATCH,
|
||||||
|
} CopyHeaderChoice;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A struct to hold COPY options, in a parsed form. All of these are related
|
* A struct to hold COPY options, in a parsed form. All of these are related
|
||||||
* to formatting, except for 'freeze', which doesn't really belong here, but
|
* to formatting, except for 'freeze', which doesn't really belong here, but
|
||||||
@ -32,7 +43,7 @@ typedef struct CopyFormatOptions
|
|||||||
bool binary; /* binary format? */
|
bool binary; /* binary format? */
|
||||||
bool freeze; /* freeze rows on loading? */
|
bool freeze; /* freeze rows on loading? */
|
||||||
bool csv_mode; /* Comma Separated Value format? */
|
bool csv_mode; /* Comma Separated Value format? */
|
||||||
bool header_line; /* header line? */
|
CopyHeaderChoice header_line; /* header line? */
|
||||||
char *null_print; /* NULL marker string (server encoding!) */
|
char *null_print; /* NULL marker string (server encoding!) */
|
||||||
int null_print_len; /* length of same */
|
int null_print_len; /* length of same */
|
||||||
char *null_print_client; /* same converted to file encoding */
|
char *null_print_client; /* same converted to file encoding */
|
||||||
|
@ -176,3 +176,26 @@ INFO: progress: {"type": "FILE", "command": "COPY FROM", "relname": "tab_progre
|
|||||||
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
|
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
|
||||||
drop function notice_after_tab_progress_reporting();
|
drop function notice_after_tab_progress_reporting();
|
||||||
drop table tab_progress_reporting;
|
drop table tab_progress_reporting;
|
||||||
|
-- Test header matching feature
|
||||||
|
create table header_copytest (
|
||||||
|
a int,
|
||||||
|
b int,
|
||||||
|
c text
|
||||||
|
);
|
||||||
|
copy header_copytest from stdin with (header wrong_choice);
|
||||||
|
ERROR: header requires a Boolean value or "match"
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
ERROR: column name mismatch in header line field 3: got null value ("\N"), expected "c"
|
||||||
|
CONTEXT: COPY header_copytest, line 1: "a b \N"
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
ERROR: wrong number of fields in header line: field count is 2, expected 3
|
||||||
|
CONTEXT: COPY header_copytest, line 1: "a b"
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
ERROR: wrong number of fields in header line: field count is 4, expected 3
|
||||||
|
CONTEXT: COPY header_copytest, line 1: "a b c d"
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
ERROR: column name mismatch in header line field 3: got "d", expected "c"
|
||||||
|
CONTEXT: COPY header_copytest, line 1: "a b d"
|
||||||
|
copy header_copytest from stdin with (header match, format csv);
|
||||||
|
drop table header_copytest;
|
||||||
|
@ -197,3 +197,36 @@ copy tab_progress_reporting from :'filename'
|
|||||||
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
|
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
|
||||||
drop function notice_after_tab_progress_reporting();
|
drop function notice_after_tab_progress_reporting();
|
||||||
drop table tab_progress_reporting;
|
drop table tab_progress_reporting;
|
||||||
|
|
||||||
|
-- Test header matching feature
|
||||||
|
create table header_copytest (
|
||||||
|
a int,
|
||||||
|
b int,
|
||||||
|
c text
|
||||||
|
);
|
||||||
|
copy header_copytest from stdin with (header wrong_choice);
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
a b c
|
||||||
|
1 2 foo
|
||||||
|
\.
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
a b \N
|
||||||
|
1 2 foo
|
||||||
|
\.
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
a b
|
||||||
|
1 2
|
||||||
|
\.
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
a b c d
|
||||||
|
1 2 foo bar
|
||||||
|
\.
|
||||||
|
copy header_copytest from stdin with (header match);
|
||||||
|
a b d
|
||||||
|
1 2 foo
|
||||||
|
\.
|
||||||
|
copy header_copytest from stdin with (header match, format csv);
|
||||||
|
a,b,c
|
||||||
|
1,2,foo
|
||||||
|
\.
|
||||||
|
drop table header_copytest;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user