Provide a FORCE NULL option to COPY in CSV mode.

This forces an input field containing the quoted null string to be
returned as a NULL. Without this option, only unquoted null strings
behave this way. This helps where some CSV producers insist on quoting
every field, whether or not it is needed. The option takes a list of
fields, and only applies to those columns. There is an equivalent
column-level option added to file_fdw.

Ian Barwick, with some tweaking by Andrew Dunstan, reviewed by Payal
Singh.
This commit is contained in:
Andrew Dunstan 2014-03-04 17:31:59 -05:00
parent e2a0fc5363
commit 3b5e03dca2
10 changed files with 308 additions and 35 deletions

View File

@ -1,4 +1,5 @@
AAA,aaa
XYZ,xyz
NULL,NULL
ABC,abc
AAA,aaa,123,""
XYZ,xyz,"",321
NULL,NULL,NULL,NULL
NULL,NULL,"NULL",NULL
ABC,abc,"",""

1 AAA aaa 123
2 XYZ xyz 321
3 NULL NULL NULL NULL
4 ABC NULL abc NULL NULL NULL
5 ABC abc

View File

@ -48,9 +48,9 @@ struct FileFdwOption
/*
* Valid options for file_fdw.
* These options are based on the options for COPY FROM command.
* But note that force_not_null is handled as a boolean option attached to
* each column, not as a table option.
* These options are based on the options for the COPY FROM command.
* But note that force_not_null and force_null are handled as boolean options
* attached to a column, not as table options.
*
* Note: If you are adding new option for user mapping, you need to modify
* fileGetOptions(), which currently doesn't bother to look at user mappings.
@ -69,7 +69,7 @@ static const struct FileFdwOption valid_options[] = {
{"null", ForeignTableRelationId},
{"encoding", ForeignTableRelationId},
{"force_not_null", AttributeRelationId},
{"force_null", AttributeRelationId},
/*
* force_quote is not supported by file_fdw because it's for COPY TO.
*/
@ -187,6 +187,7 @@ file_fdw_validator(PG_FUNCTION_ARGS)
Oid catalog = PG_GETARG_OID(1);
char *filename = NULL;
DefElem *force_not_null = NULL;
DefElem *force_null = NULL;
List *other_options = NIL;
ListCell *cell;
@ -243,10 +244,10 @@ file_fdw_validator(PG_FUNCTION_ARGS)
}
/*
* Separate out filename and force_not_null, since ProcessCopyOptions
* won't accept them. (force_not_null only comes in a boolean
* per-column flavor here.)
* Separate out filename and column-specific options, since
* ProcessCopyOptions won't accept them.
*/
if (strcmp(def->defname, "filename") == 0)
{
if (filename)
@ -255,16 +256,42 @@ file_fdw_validator(PG_FUNCTION_ARGS)
errmsg("conflicting or redundant options")));
filename = defGetString(def);
}
/*
* force_not_null is a boolean option; after validation we can discard
* it - it will be retrieved later in get_file_fdw_attribute_options()
*/
else if (strcmp(def->defname, "force_not_null") == 0)
{
if (force_not_null)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
errmsg("conflicting or redundant options"),
errhint("option \"force_not_null\" supplied more than once for a column")));
if(force_null)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
errhint("option \"force_not_null\" cannot be used together with \"force_null\"")));
force_not_null = def;
/* Don't care what the value is, as long as it's a legal boolean */
(void) defGetBoolean(def);
}
/* See comments for force_not_null above */
else if (strcmp(def->defname, "force_null") == 0)
{
if (force_null)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
errhint("option \"force_null\" supplied more than once for a column")));
if(force_not_null)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
errhint("option \"force_null\" cannot be used together with \"force_not_null\"")));
force_null = def;
(void) defGetBoolean(def);
}
else
other_options = lappend(other_options, def);
}
@ -369,8 +396,9 @@ fileGetOptions(Oid foreigntableid,
* Retrieve per-column generic options from pg_attribute and construct a list
* of DefElems representing them.
*
* At the moment we only have "force_not_null", which should be combined into
* a single DefElem listing all such columns, since that's what COPY expects.
* At the moment we only have "force_not_null", and "force_null",
* which should each be combined into a single DefElem listing all such
* columns, since that's what COPY expects.
*/
static List *
get_file_fdw_attribute_options(Oid relid)
@ -380,6 +408,9 @@ get_file_fdw_attribute_options(Oid relid)
AttrNumber natts;
AttrNumber attnum;
List *fnncolumns = NIL;
List *fncolumns = NIL;
List *options = NIL;
rel = heap_open(relid, AccessShareLock);
tupleDesc = RelationGetDescr(rel);
@ -410,17 +441,29 @@ get_file_fdw_attribute_options(Oid relid)
fnncolumns = lappend(fnncolumns, makeString(attname));
}
}
else if (strcmp(def->defname, "force_null") == 0)
{
if (defGetBoolean(def))
{
char *attname = pstrdup(NameStr(attr->attname));
fncolumns = lappend(fncolumns, makeString(attname));
}
}
/* maybe in future handle other options here */
}
}
heap_close(rel, AccessShareLock);
/* Return DefElem only when some column(s) have force_not_null */
/* Return DefElem only when some column(s) have force_not_null / force_null options set */
if (fnncolumns != NIL)
return list_make1(makeDefElem("force_not_null", (Node *) fnncolumns));
else
return NIL;
options = lappend(options, makeDefElem("force_not_null", (Node *) fnncolumns));
if (fncolumns != NIL)
options = lappend(options,makeDefElem("force_null", (Node *) fncolumns));
return options;
}
/*

View File

@ -81,11 +81,14 @@ OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', deli
-- per-column options tests
CREATE FOREIGN TABLE text_csv (
word1 text OPTIONS (force_not_null 'true'),
word2 text OPTIONS (force_not_null 'off')
word2 text OPTIONS (force_not_null 'off'),
word3 text OPTIONS (force_null 'true'),
word4 text OPTIONS (force_null 'off')
) SERVER file_server
OPTIONS (format 'text', filename '@abs_srcdir@/data/text.csv', null 'NULL');
SELECT * FROM text_csv; -- ERROR
ALTER FOREIGN TABLE text_csv OPTIONS (SET format 'csv');
\pset null _null_
SELECT * FROM text_csv;
-- force_not_null is not allowed to be specified at any foreign object level:
@ -94,6 +97,18 @@ ALTER SERVER file_server OPTIONS (ADD force_not_null '*'); -- ERROR
CREATE USER MAPPING FOR public SERVER file_server OPTIONS (force_not_null '*'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR
-- force_not_null cannot be specified together with force_null
ALTER FOREIGN TABLE text_csv ALTER COLUMN word1 OPTIONS (force_null 'true'); --ERROR
-- force_null is not allowed to be specified at any foreign object level:
ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_null '*'); -- ERROR
ALTER SERVER file_server OPTIONS (ADD force_null '*'); -- ERROR
CREATE USER MAPPING FOR public SERVER file_server OPTIONS (force_null '*'); -- ERROR
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_null '*'); -- ERROR
-- force_null cannot be specified together with force_not_null
ALTER FOREIGN TABLE text_csv ALTER COLUMN word3 OPTIONS (force_not_null 'true'); --ERROR
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
SELECT * FROM agg_csv ORDER BY a;

View File

@ -96,20 +96,24 @@ OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', deli
-- per-column options tests
CREATE FOREIGN TABLE text_csv (
word1 text OPTIONS (force_not_null 'true'),
word2 text OPTIONS (force_not_null 'off')
word2 text OPTIONS (force_not_null 'off'),
word3 text OPTIONS (force_null 'true'),
word4 text OPTIONS (force_null 'off')
) SERVER file_server
OPTIONS (format 'text', filename '@abs_srcdir@/data/text.csv', null 'NULL');
SELECT * FROM text_csv; -- ERROR
ERROR: COPY force not null available only in CSV mode
ALTER FOREIGN TABLE text_csv OPTIONS (SET format 'csv');
\pset null _null_
SELECT * FROM text_csv;
word1 | word2
-------+-------
AAA | aaa
XYZ | xyz
NULL |
ABC | abc
(4 rows)
word1 | word2 | word3 | word4
-------+--------+--------+--------
AAA | aaa | 123 |
XYZ | xyz | | 321
NULL | _null_ | _null_ | _null_
NULL | _null_ | _null_ | _null_
ABC | abc | |
(5 rows)
-- force_not_null is not allowed to be specified at any foreign object level:
ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_not_null '*'); -- ERROR
@ -124,6 +128,27 @@ HINT: There are no valid options in this context.
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR
ERROR: invalid option "force_not_null"
HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding
-- force_not_null cannot be specified together with force_null
ALTER FOREIGN TABLE text_csv ALTER COLUMN word1 OPTIONS (force_null 'true'); --ERROR
ERROR: conflicting or redundant options
HINT: option "force_null" cannot be used together with "force_not_null"
-- force_null is not allowed to be specified at any foreign object level:
ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_null '*'); -- ERROR
ERROR: invalid option "force_null"
HINT: There are no valid options in this context.
ALTER SERVER file_server OPTIONS (ADD force_null '*'); -- ERROR
ERROR: invalid option "force_null"
HINT: There are no valid options in this context.
CREATE USER MAPPING FOR public SERVER file_server OPTIONS (force_null '*'); -- ERROR
ERROR: invalid option "force_null"
HINT: There are no valid options in this context.
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_null '*'); -- ERROR
ERROR: invalid option "force_null"
HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding
-- force_null cannot be specified together with force_not_null
ALTER FOREIGN TABLE text_csv ALTER COLUMN word3 OPTIONS (force_not_null 'true'); --ERROR
ERROR: conflicting or redundant options
HINT: option "force_not_null" cannot be used together with "force_null"
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
a | b

View File

@ -112,11 +112,11 @@
</variablelist>
<para>
Note that while <command>COPY</> allows options such as OIDS and HEADER
Note that while <command>COPY</> allows options such as OIDS and HEADER
to be specified without a corresponding value, the foreign data wrapper
syntax requires a value to be present in all cases. To activate
syntax requires a value to be present in all cases. To activate
<command>COPY</> options normally supplied without a value, you can
instead pass the value TRUE.
instead pass the value TRUE.
</para>
<para>
@ -140,6 +140,21 @@
</listitem>
</varlistentry>
<varlistentry>
<term><literal>force_null</literal></term>
<listitem>
<para>
This is a Boolean option. If true, it specifies that values of the
column which match the null string are returned as <literal>NULL</>
even if the value is quoted. Without this option, only unquoted
values matching the null string are returned as <literal>NULL</>.
This has the same effect as listing the column in
<command>COPY</>'s <literal>FORCE_NULL</literal> option.
</para>
</listitem>
</varlistentry>
</variablelist>
<para>

View File

@ -42,6 +42,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
</synopsis>
</refsynopsisdiv>
@ -328,6 +329,20 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</listitem>
</varlistentry>
<varlistentry>
<term><literal>FORCE_NULL</></term>
<listitem>
<para>
Match the specified columns' values against the null string, even
if it has been quoted, and if a match is found set the value to
<literal>NULL</>. In the default case where the null string is empty,
this converts a quoted empty string into NULL.
This option is allowed only in <command>COPY FROM</>, and only when
using <literal>CSV</> format.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>ENCODING</></term>
<listitem>
@ -637,7 +652,9 @@ COPY <replaceable class="parameter">count</replaceable>
string, while an empty string data value is written with double quotes
(<literal>""</>). Reading values follows similar rules. You can
use <literal>FORCE_NOT_NULL</> to prevent <literal>NULL</> input
comparisons for specific columns.
comparisons for specific columns. You can also use
<literal>FORCE_NULL</> to convert quoted null string data values to
<literal>NULL</>.
</para>
<para>

View File

@ -125,6 +125,8 @@ typedef struct CopyStateData
bool *force_quote_flags; /* per-column CSV FQ flags */
List *force_notnull; /* list of column names */
bool *force_notnull_flags; /* per-column CSV FNN flags */
List *force_null; /* list of column names */
bool *force_null_flags; /* per-column CSV FN flags */
bool convert_selectively; /* do selective binary conversion? */
List *convert_select; /* list of column names (can be NIL) */
bool *convert_select_flags; /* per-column CSV/TEXT CS flags */
@ -1019,6 +1021,20 @@ ProcessCopyOptions(CopyState cstate,
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname)));
}
else if (strcmp(defel->defname, "force_null") == 0)
{
if (cstate->force_null)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
if (defel->arg && IsA(defel->arg, List))
cstate->force_null = (List *) defel->arg;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname)));
}
else if (strcmp(defel->defname, "convert_selectively") == 0)
{
/*
@ -1178,6 +1194,17 @@ ProcessCopyOptions(CopyState cstate,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force not null only available using COPY FROM")));
/* Check force_null */
if (!cstate->csv_mode && cstate->force_null != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force null available only in CSV mode")));
if (cstate->force_null != NIL && !is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force null only available using COPY FROM")));
/* Don't allow the delimiter to appear in the null string. */
if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
ereport(ERROR,
@ -1385,6 +1412,28 @@ BeginCopy(bool is_from,
}
}
/* Convert FORCE NULL name list to per-column flags, check validity */
cstate->force_null_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
if (cstate->force_null)
{
List *attnums;
ListCell *cur;
attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_null);
foreach(cur, attnums)
{
int attnum = lfirst_int(cur);
if (!list_member_int(cstate->attnumlist, attnum))
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("FORCE NULL column \"%s\" not referenced by COPY",
NameStr(tupDesc->attrs[attnum - 1]->attname))));
cstate->force_null_flags[attnum - 1] = true;
}
}
/* Convert convert_selectively name list to per-column flags */
if (cstate->convert_selectively)
{
@ -2810,11 +2859,28 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
continue;
}
if (cstate->csv_mode && string == NULL &&
cstate->force_notnull_flags[m])
if (cstate->csv_mode)
{
/* Go ahead and read the NULL string */
string = cstate->null_print;
if(string == NULL &&
cstate->force_notnull_flags[m])
{
/*
* FORCE_NOT_NULL option is set and column is NULL -
* convert it to the NULL string.
*/
string = cstate->null_print;
}
else if(string != NULL && cstate->force_null_flags[m]
&& strcmp(string,cstate->null_print) == 0 )
{
/*
* FORCE_NULL option is set and column matches the NULL string.
* It must have been quoted, or otherwise the string would already
* have been set to NULL.
* Convert it to NULL as specified.
*/
string = NULL;
}
}
cstate->cur_attname = NameStr(attr[m]->attname);

View File

@ -2548,6 +2548,10 @@ copy_opt_item:
{
$$ = makeDefElem("force_not_null", (Node *)$4);
}
| FORCE NULL_P columnList
{
$$ = makeDefElem("force_null", (Node *)$3);
}
| ENCODING Sconst
{
$$ = makeDefElem("encoding", (Node *)makeString($2));

View File

@ -382,6 +382,54 @@ SELECT * FROM vistest;
e
(2 rows)
-- Test FORCE_NOT_NULL and FORCE_NULL options
-- should succeed with "b" set to an empty string and "c" set to NULL
CREATE TEMP TABLE forcetest (
a INT NOT NULL,
b TEXT NOT NULL,
c TEXT,
d TEXT,
e TEXT
);
\pset null NULL
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c));
COMMIT;
SELECT b, c FROM forcetest WHERE a = 1;
b | c
---+------
| NULL
(1 row)
-- should succeed with no effect ("b" remains an empty string, "c" remains NULL)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c));
COMMIT;
SELECT b, c FROM forcetest WHERE a = 2;
b | c
---+------
| NULL
(1 row)
-- should fail with not-null constraint violation
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b), FORCE_NOT_NULL(c));
ERROR: null value in column "b" violates not-null constraint
DETAIL: Failing row contains (3, null, , null, null).
CONTEXT: COPY forcetest, line 1: "3,,"""
ROLLBACK;
-- should fail with "not referenced by COPY" error
BEGIN;
COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b));
ERROR: FORCE NOT NULL column "b" not referenced by COPY
ROLLBACK;
-- should fail with "not referenced by COPY" error
BEGIN;
COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b));
ERROR: FORCE NULL column "b" not referenced by COPY
ROLLBACK;
\pset null ''
DROP TABLE forcetest;
DROP TABLE vistest;
DROP FUNCTION truncate_in_subxact();
DROP TABLE x, y;

View File

@ -270,6 +270,45 @@ e
SELECT * FROM vistest;
COMMIT;
SELECT * FROM vistest;
-- Test FORCE_NOT_NULL and FORCE_NULL options
-- should succeed with "b" set to an empty string and "c" set to NULL
CREATE TEMP TABLE forcetest (
a INT NOT NULL,
b TEXT NOT NULL,
c TEXT,
d TEXT,
e TEXT
);
\pset null NULL
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c));
1,,""
\.
COMMIT;
SELECT b, c FROM forcetest WHERE a = 1;
-- should succeed with no effect ("b" remains an empty string, "c" remains NULL)
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b), FORCE_NULL(c));
2,,""
\.
COMMIT;
SELECT b, c FROM forcetest WHERE a = 2;
-- should fail with not-null constraint violation
BEGIN;
COPY forcetest (a, b, c) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b), FORCE_NOT_NULL(c));
3,,""
\.
ROLLBACK;
-- should fail with "not referenced by COPY" error
BEGIN;
COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NOT_NULL(b));
ROLLBACK;
-- should fail with "not referenced by COPY" error
BEGIN;
COPY forcetest (d, e) FROM STDIN WITH (FORMAT csv, FORCE_NULL(b));
ROLLBACK;
\pset null ''
DROP TABLE forcetest;
DROP TABLE vistest;
DROP FUNCTION truncate_in_subxact();
DROP TABLE x, y;