Refactor CopyReadAttributes{CSV,Text}() to use a callback in COPY FROM

CopyReadAttributes{CSV,Text}() are used to parse lines for text and CSV
format.  This reduces the number of "if" branches that need to be
checked when parsing fields in CSV and text mode when dealing with a
COPY FROM, something that can become more noticeable with more
attributes and more lines to process.

Extracted from a larger patch by the same author.

Author: Sutou Kouhei
Discussion: https://postgr.es/m/20231204.153548.2126325458835528809.kou@clear-code.com
This commit is contained in:
Michael Paquier 2024-02-05 09:46:02 +09:00
parent 774bcffe4a
commit 95fb5b4902
3 changed files with 30 additions and 16 deletions

View File

@ -1776,6 +1776,11 @@ BeginCopyFrom(ParseState *pstate,
cstate->max_fields = attr_count; cstate->max_fields = attr_count;
cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *)); cstate->raw_fields = (char **) palloc(attr_count * sizeof(char *));
if (cstate->opts.csv_mode)
cstate->copy_read_attributes = CopyReadAttributesCSV;
else
cstate->copy_read_attributes = CopyReadAttributesText;
} }
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);

View File

@ -25,10 +25,10 @@
* is copied into 'line_buf', with quotes and escape characters still * is copied into 'line_buf', with quotes and escape characters still
* intact. * intact.
* *
* 4. CopyReadAttributesText/CSV() function takes the input line from * 4. CopyReadAttributesText/CSV() function (via copy_read_attribute) takes
* 'line_buf', and splits it into fields, unescaping the data as required. * the input line from 'line_buf', and splits it into fields, unescaping
* The fields are stored in 'attribute_buf', and 'raw_fields' array holds * the data as required. The fields are stored in 'attribute_buf', and
* pointers to each field. * 'raw_fields' array holds pointers to each field.
* *
* If encoding conversion is not required, a shortcut is taken in step 2 to * If encoding conversion is not required, a shortcut is taken in step 2 to
* avoid copying the data unnecessarily. The 'input_buf' pointer is set to * avoid copying the data unnecessarily. The 'input_buf' pointer is set to
@ -152,8 +152,6 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
/* non-export function prototypes */ /* non-export function prototypes */
static bool CopyReadLine(CopyFromState cstate); static bool CopyReadLine(CopyFromState cstate);
static bool CopyReadLineText(CopyFromState cstate); static bool CopyReadLineText(CopyFromState cstate);
static int CopyReadAttributesText(CopyFromState cstate);
static int CopyReadAttributesCSV(CopyFromState cstate);
static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
Oid typioparam, int32 typmod, Oid typioparam, int32 typmod,
bool *isnull); bool *isnull);
@ -775,10 +773,7 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
{ {
int fldnum; int fldnum;
if (cstate->opts.csv_mode) fldct = cstate->copy_read_attributes(cstate);
fldct = CopyReadAttributesCSV(cstate);
else
fldct = CopyReadAttributesText(cstate);
if (fldct != list_length(cstate->attnumlist)) if (fldct != list_length(cstate->attnumlist))
ereport(ERROR, ereport(ERROR,
@ -830,10 +825,7 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
return false; return false;
/* Parse the line into de-escaped field values */ /* Parse the line into de-escaped field values */
if (cstate->opts.csv_mode) fldct = cstate->copy_read_attributes(cstate);
fldct = CopyReadAttributesCSV(cstate);
else
fldct = CopyReadAttributesText(cstate);
*fields = cstate->raw_fields; *fields = cstate->raw_fields;
*nfields = fldct; *nfields = fldct;
@ -1502,7 +1494,7 @@ GetDecimalFromHex(char hex)
* *
* The return value is the number of fields actually read. * The return value is the number of fields actually read.
*/ */
static int int
CopyReadAttributesText(CopyFromState cstate) CopyReadAttributesText(CopyFromState cstate)
{ {
char delimc = cstate->opts.delim[0]; char delimc = cstate->opts.delim[0];
@ -1756,7 +1748,7 @@ CopyReadAttributesText(CopyFromState cstate)
* CopyReadAttributesText, except we parse the fields according to * CopyReadAttributesText, except we parse the fields according to
* "standard" (i.e. common) CSV usage. * "standard" (i.e. common) CSV usage.
*/ */
static int int
CopyReadAttributesCSV(CopyFromState cstate) CopyReadAttributesCSV(CopyFromState cstate)
{ {
char delimc = cstate->opts.delim[0]; char delimc = cstate->opts.delim[0];

View File

@ -52,6 +52,13 @@ typedef enum CopyInsertMethod
* ExecForeignBatchInsert only if valid */ * ExecForeignBatchInsert only if valid */
} CopyInsertMethod; } CopyInsertMethod;
/*
* Per-format callback to parse a line into separate fields.
*
* Returns the number of fields read.
*/
typedef int (*CopyReadAttributes) (CopyFromState cstate);
/* /*
* This struct contains all the state variables used throughout a COPY FROM * This struct contains all the state variables used throughout a COPY FROM
* operation. * operation.
@ -130,6 +137,12 @@ typedef struct CopyFromStateData
int max_fields; int max_fields;
char **raw_fields; char **raw_fields;
/*
* Per-format callback to parse lines, then fill raw_fields and
* attribute_buf.
*/
CopyReadAttributes copy_read_attributes;
/* /*
* Similarly, line_buf holds the whole input line being processed. The * Similarly, line_buf holds the whole input line being processed. The
* input cycle is first to read the whole line into line_buf, and then * input cycle is first to read the whole line into line_buf, and then
@ -183,4 +196,8 @@ typedef struct CopyFromStateData
extern void ReceiveCopyBegin(CopyFromState cstate); extern void ReceiveCopyBegin(CopyFromState cstate);
extern void ReceiveCopyBinaryHeader(CopyFromState cstate); extern void ReceiveCopyBinaryHeader(CopyFromState cstate);
/* Callbacks for copy_read_attributes */
extern int CopyReadAttributesCSV(CopyFromState cstate);
extern int CopyReadAttributesText(CopyFromState cstate);
#endif /* COPYFROM_INTERNAL_H */ #endif /* COPYFROM_INTERNAL_H */