diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 6b5bcd83c5..95fec8d386 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -3243,7 +3243,17 @@ CopyReadAttributesText(CopyState cstate) start_ptr = cur_ptr; cstate->raw_fields[fieldno] = output_ptr; - /* Scan data for field */ + /* + * Scan data for field. + * + * Note that in this loop, we are scanning to locate the end of field + * and also speculatively performing de-escaping. Once we find the + * end-of-field, we can match the raw field contents against the null + * marker string. Only after that comparison fails do we know that + * de-escaping is actually the right thing to do; therefore we *must + * not* throw any syntax errors before we've done the null-marker + * check. + */ for (;;) { char c; @@ -3356,26 +3366,29 @@ CopyReadAttributesText(CopyState cstate) *output_ptr++ = c; } - /* Terminate attribute value in output area */ - *output_ptr++ = '\0'; - - /* - * If we de-escaped a non-7-bit-ASCII char, make sure we still have - * valid data for the db encoding. Avoid calling strlen here for the - * sake of efficiency. - */ - if (saw_non_ascii) - { - char *fld = cstate->raw_fields[fieldno]; - - pg_verifymbstr(fld, output_ptr - (fld + 1), false); - } - /* Check whether raw input matched null marker */ input_len = end_ptr - start_ptr; if (input_len == cstate->null_print_len && strncmp(start_ptr, cstate->null_print, input_len) == 0) cstate->raw_fields[fieldno] = NULL; + else + { + /* + * At this point we know the field is supposed to contain data. + * + * If we de-escaped any non-7-bit-ASCII chars, make sure the + * resulting string is valid data for the db encoding. + */ + if (saw_non_ascii) + { + char *fld = cstate->raw_fields[fieldno]; + + pg_verifymbstr(fld, output_ptr - fld, false); + } + } + + /* Terminate attribute value in output area */ + *output_ptr++ = '\0'; fieldno++; /* Done if we hit EOL instead of a delim */ diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 8e2bc0c250..2449eefb16 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -239,6 +239,22 @@ a\. \.b c\.d "\." +-- test handling of nonstandard null marker that violates escaping rules +CREATE TEMP TABLE testnull(a int, b text); +INSERT INTO testnull VALUES (1, E'\\0'), (NULL, NULL); +COPY testnull TO stdout WITH NULL AS E'\\0'; +1 \\0 +\0 \0 +COPY testnull FROM stdin WITH NULL AS E'\\0'; +SELECT * FROM testnull; + a | b +----+---- + 1 | \0 + | + 42 | \0 + | +(4 rows) + DROP TABLE x, y; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after(); diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 6322c8fba4..1961446fdb 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -164,6 +164,21 @@ c\.d COPY testeoc TO stdout CSV; +-- test handling of nonstandard null marker that violates escaping rules + +CREATE TEMP TABLE testnull(a int, b text); +INSERT INTO testnull VALUES (1, E'\\0'), (NULL, NULL); + +COPY testnull TO stdout WITH NULL AS E'\\0'; + +COPY testnull FROM stdin WITH NULL AS E'\\0'; +42 \\0 +\0 \0 +\. + +SELECT * FROM testnull; + + DROP TABLE x, y; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after();