Fix some corner-case bugs in _sendSQLLine's parsing of SQL commands

> found in a pg_dump archive.  It had problems with dollar-quote tags
broken across bufferload boundaries (this may explain bug report from
Rod Taylor), also with dollar-quote literals of the form $a$a$...,
and was also confused about the rules for backslash in double quoted
identifiers (ie, they're not special).  Also put in placeholder support
for E'...' literals --- this will need more work later.
This commit is contained in:
Tom Lane 2005-09-11 04:10:25 +00:00
parent e1a7d1b9f3
commit c7d1a8d428
2 changed files with 181 additions and 207 deletions

View File

@ -17,7 +17,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.66 2005/07/27 12:44:10 neilc Exp $ * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.67 2005/09/11 04:10:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -136,22 +136,24 @@ typedef struct _outputContext
typedef enum typedef enum
{ {
SQL_SCAN = 0, SQL_SCAN = 0, /* normal */
SQL_IN_SQL_COMMENT, SQL_IN_SQL_COMMENT, /* -- comment */
SQL_IN_EXT_COMMENT, SQL_IN_EXT_COMMENT, /* slash-star comment */
SQL_IN_QUOTE, SQL_IN_SINGLE_QUOTE, /* '...' literal */
SQL_IN_DOLLARTAG, SQL_IN_E_QUOTE, /* E'...' literal */
SQL_IN_DOLLARQUOTE SQL_IN_DOUBLE_QUOTE, /* "..." identifier */
SQL_IN_DOLLAR_TAG, /* possible dollar-quote starting tag */
SQL_IN_DOLLAR_QUOTE /* body of dollar quote */
} sqlparseState; } sqlparseState;
typedef struct typedef struct
{ {
int backSlash; sqlparseState state; /* see above */
sqlparseState state; char lastChar; /* preceding char, or '\0' initially */
char lastChar; bool backSlash; /* next char is backslash quoted? */
char quoteChar; int braceDepth; /* parenthesis nesting depth */
int braceDepth; PQExpBuffer tagBuf; /* dollar quote tag (NULL if not created) */
PQExpBuffer tagBuf; int minTagEndPos; /* first possible end position of $-quote */
} sqlparseInfo; } sqlparseInfo;
typedef enum typedef enum

View File

@ -5,7 +5,7 @@
* Implements the basic DB functions used by the archiver. * Implements the basic DB functions used by the archiver.
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.64 2005/07/27 05:14:12 neilc Exp $ * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.65 2005/09/11 04:10:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -36,8 +36,8 @@ static void notice_processor(void *arg, const char *message);
static char *_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos); static char *_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos);
static char *_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos); static char *_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos);
static int _isIdentChar(unsigned char c); static bool _isIdentChar(unsigned char c);
static int _isDQChar(unsigned char c, int atStart); static bool _isDQChar(unsigned char c, bool atStart);
#define DB_MAX_ERR_STMT 128 #define DB_MAX_ERR_STMT 128
@ -410,215 +410,187 @@ _sendCopyLine(ArchiveHandle *AH, char *qry, char *eos)
} }
/* /*
* Used by ExecuteSqlCommandBuf to send one buffered line of SQL (not data for the copy command). * Used by ExecuteSqlCommandBuf to send one buffered line of SQL
* (not data for the copy command).
*/ */
static char * static char *
_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos) _sendSQLLine(ArchiveHandle *AH, char *qry, char *eos)
{ {
int pos = 0; /* Current position */
char *sqlPtr;
int consumed;
int startDT = 0;
/* /*
* The following is a mini state machine to assess the end of an SQL * The following is a mini state machine to assess the end of an SQL
* statement. It really only needs to parse good SQL, or at least * statement. It really only needs to parse good SQL, or at least
* that's the theory... End-of-statement is assumed to be an unquoted, * that's the theory... End-of-statement is assumed to be an unquoted,
* un commented semi-colon. * un-commented semi-colon that's not within any parentheses.
*
* Note: the input can be split into bufferloads at arbitrary boundaries.
* Therefore all state must be kept in AH->sqlparse, not in local
* variables of this routine. We assume that AH->sqlparse was
* filled with zeroes when created.
*/ */
for (; qry < eos; qry++)
/*
* fprintf(stderr, "Buffer at start is: '%s'\n\n", AH->sqlBuf->data);
*/
for (pos = 0; pos < (eos - qry); pos++)
{ {
appendPQExpBufferChar(AH->sqlBuf, qry[pos]); switch (AH->sqlparse.state)
/* fprintf(stderr, " %c",qry[pos]); */
/* Loop until character consumed */
do
{ {
/* case SQL_SCAN: /* Default state == 0, set in _allocAH */
* If a character needs to be scanned in a different state, if (*qry == ';' && AH->sqlparse.braceDepth == 0)
* consumed can be set to 0 to avoid advancing. Care must be {
* taken to ensure internal state is not damaged. /*
*/ * We've found the end of a statement. Send it and
consumed = 1; * reset the buffer.
*/
switch (AH->sqlparse.state) appendPQExpBufferChar(AH->sqlBuf, ';'); /* inessential */
{ ExecuteSqlCommand(AH, AH->sqlBuf,
"could not execute query");
case SQL_SCAN: /* Default state == 0, set in _allocAH */ resetPQExpBuffer(AH->sqlBuf);
if (qry[pos] == ';' && AH->sqlparse.braceDepth == 0) AH->sqlparse.lastChar = '\0';
{
/*
* We've got the end of a statement. Send It &
* reset the buffer.
*/
/*
* fprintf(stderr, " sending: '%s'\n\n",
* AH->sqlBuf->data);
*/
ExecuteSqlCommand(AH, AH->sqlBuf, "could not execute query");
resetPQExpBuffer(AH->sqlBuf);
AH->sqlparse.lastChar = '\0';
/*
* Remove any following newlines - so that
* embedded COPY commands don't get a starting
* newline.
*/
pos++;
for (; pos < (eos - qry) && qry[pos] == '\n'; pos++);
/* We've got our line, so exit */
return qry + pos;
}
else
{
/*
* Look for normal boring quote chars, or
* dollar-quotes. We make the assumption that
* $-quotes will not have an ident character
* before them in all pg_dump output.
*/
if (qry[pos] == '"'
|| qry[pos] == '\''
|| (qry[pos] == '$' && _isIdentChar(AH->sqlparse.lastChar) == 0)
)
{
/* fprintf(stderr,"[startquote]\n"); */
AH->sqlparse.state = SQL_IN_QUOTE;
AH->sqlparse.quoteChar = qry[pos];
AH->sqlparse.backSlash = 0;
if (qry[pos] == '$')
{
/* override the state */
AH->sqlparse.state = SQL_IN_DOLLARTAG;
/* Used for checking first char of tag */
startDT = 1;
/* We store the tag for later comparison. */
AH->sqlparse.tagBuf = createPQExpBuffer();
/* Get leading $ */
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
}
}
else if (qry[pos] == '-' && AH->sqlparse.lastChar == '-')
AH->sqlparse.state = SQL_IN_SQL_COMMENT;
else if (qry[pos] == '*' && AH->sqlparse.lastChar == '/')
AH->sqlparse.state = SQL_IN_EXT_COMMENT;
else if (qry[pos] == '(')
AH->sqlparse.braceDepth++;
else if (qry[pos] == ')')
AH->sqlparse.braceDepth--;
AH->sqlparse.lastChar = qry[pos];
}
break;
case SQL_IN_DOLLARTAG:
/* /*
* Like a quote, we look for a closing char *but* we * Remove any following newlines - so that
* only allow a very limited set of contained chars, * embedded COPY commands don't get a starting newline.
* and no escape chars. If invalid chars are found, we
* abort tag processing.
*/ */
qry++;
while (qry < eos && *qry == '\n')
qry++;
if (qry[pos] == '$') /* We've finished one line, so exit */
{ return qry;
/* fprintf(stderr,"[endquote]\n"); */ }
/* Get trailing $ */ else if (*qry == '\'')
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]); {
AH->sqlparse.state = SQL_IN_DOLLARQUOTE; if (AH->sqlparse.lastChar == 'E')
} AH->sqlparse.state = SQL_IN_E_QUOTE;
else else
{ AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
if (_isDQChar(qry[pos], startDT)) AH->sqlparse.backSlash = false;
{ }
/* Valid, so add */ else if (*qry == '"')
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]); {
} AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
else }
{ /*
/* * Look for dollar-quotes. We make the assumption that
* Jump back to 'scan' state, we're not really * $-quotes will not have an ident character just
* in a tag, and valid tag chars do not * before them in pg_dump output. XXX is this
* include the various chars we look for in * good enough?
* this state machine, so it's safe to just */
* jump from this state back to SCAN. We set else if (*qry == '$' && !_isIdentChar(AH->sqlparse.lastChar))
* consumed = 0 so that this char gets {
* rescanned in new state. AH->sqlparse.state = SQL_IN_DOLLAR_TAG;
*/ /* initialize separate buffer with possible tag */
destroyPQExpBuffer(AH->sqlparse.tagBuf); if (AH->sqlparse.tagBuf == NULL)
AH->sqlparse.state = SQL_SCAN; AH->sqlparse.tagBuf = createPQExpBuffer();
consumed = 0; else
} resetPQExpBuffer(AH->sqlparse.tagBuf);
} appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry);
startDT = 0; }
break; else if (*qry == '-' && AH->sqlparse.lastChar == '-')
AH->sqlparse.state = SQL_IN_SQL_COMMENT;
else if (*qry == '*' && AH->sqlparse.lastChar == '/')
AH->sqlparse.state = SQL_IN_EXT_COMMENT;
else if (*qry == '(')
AH->sqlparse.braceDepth++;
else if (*qry == ')')
AH->sqlparse.braceDepth--;
break;
case SQL_IN_SQL_COMMENT:
if (*qry == '\n')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_DOLLARQUOTE: case SQL_IN_EXT_COMMENT:
/*
* This isn't fully correct, because we don't account for
* nested slash-stars, but pg_dump never emits such.
*/
if (AH->sqlparse.lastChar == '*' && *qry == '/')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_SINGLE_QUOTE:
/* We needn't handle '' specially */
if (*qry == '\'' && !AH->sqlparse.backSlash)
AH->sqlparse.state = SQL_SCAN;
else if (*qry == '\\')
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
else
AH->sqlparse.backSlash = false;
break;
case SQL_IN_E_QUOTE:
/*
* Eventually we will need to handle '' specially, because
* after E'...''... we should still be in E_QUOTE state.
*
* XXX problem: how do we tell whether the dump was made
* by a version that thinks backslashes aren't special
* in non-E literals??
*/
if (*qry == '\'' && !AH->sqlparse.backSlash)
AH->sqlparse.state = SQL_SCAN;
else if (*qry == '\\')
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
else
AH->sqlparse.backSlash = false;
break;
case SQL_IN_DOUBLE_QUOTE:
/* We needn't handle "" specially */
if (*qry == '"')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_DOLLAR_TAG:
if (*qry == '$')
{
/* Do not add the closing $ to tagBuf */
AH->sqlparse.state = SQL_IN_DOLLAR_QUOTE;
AH->sqlparse.minTagEndPos = AH->sqlBuf->len + AH->sqlparse.tagBuf->len + 1;
}
else if (_isDQChar(*qry, (AH->sqlparse.tagBuf->len == 1)))
{
/* Valid, so add to tag */
appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry);
}
else
{
/* /*
* Comparing the entire string backwards each time is * Ooops, we're not really in a dollar-tag. Valid tag
* NOT efficient, but dollar quotes in pg_dump are * chars do not include the various chars we look for
* small and the code is a lot simpler. * in this state machine, so it's safe to just jump
* from this state back to SCAN. We have to back up
* the qry pointer so that the current character gets
* rescanned in SCAN state; and then "continue" so that
* the bottom-of-loop actions aren't done yet.
*/ */
sqlPtr = AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len; AH->sqlparse.state = SQL_SCAN;
qry--;
continue;
}
break;
if (strncmp(AH->sqlparse.tagBuf->data, sqlPtr, AH->sqlparse.tagBuf->len) == 0) case SQL_IN_DOLLAR_QUOTE:
{ /*
/* End of $-quote */ * If we are at a $, see whether what precedes it matches
AH->sqlparse.state = SQL_SCAN; * tagBuf. (Remember that the trailing $ of the tag was
destroyPQExpBuffer(AH->sqlparse.tagBuf); * not added to tagBuf.) However, don't compare until we
} * have enough data to be a possible match --- this is
break; * needed to avoid false match on '$a$a$...'
*/
if (*qry == '$' &&
AH->sqlBuf->len >= AH->sqlparse.minTagEndPos &&
strcmp(AH->sqlparse.tagBuf->data,
AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len) == 0)
AH->sqlparse.state = SQL_SCAN;
break;
}
case SQL_IN_SQL_COMMENT: appendPQExpBufferChar(AH->sqlBuf, *qry);
if (qry[pos] == '\n') AH->sqlparse.lastChar = *qry;
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_EXT_COMMENT:
if (AH->sqlparse.lastChar == '*' && qry[pos] == '/')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_QUOTE:
if (!AH->sqlparse.backSlash && AH->sqlparse.quoteChar == qry[pos])
{
/* fprintf(stderr,"[endquote]\n"); */
AH->sqlparse.state = SQL_SCAN;
}
else
{
if (qry[pos] == '\\')
{
if (AH->sqlparse.lastChar == '\\')
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
else
AH->sqlparse.backSlash = 1;
}
else
AH->sqlparse.backSlash = 0;
}
break;
}
} while (consumed == 0);
AH->sqlparse.lastChar = qry[pos];
/* fprintf(stderr, "\n"); */
} }
/* /*
* If we get here, we've processed entire string with no complete SQL * If we get here, we've processed entire bufferload with no complete SQL
* stmt * stmt
*/ */
return eos; return eos;
@ -673,7 +645,7 @@ CommitTransaction(ArchiveHandle *AH)
destroyPQExpBuffer(qry); destroyPQExpBuffer(qry);
} }
static int static bool
_isIdentChar(unsigned char c) _isIdentChar(unsigned char c)
{ {
if ((c >= 'a' && c <= 'z') if ((c >= 'a' && c <= 'z')
@ -684,22 +656,22 @@ _isIdentChar(unsigned char c)
|| (c >= (unsigned char) '\200') /* no need to check <= || (c >= (unsigned char) '\200') /* no need to check <=
* \377 */ * \377 */
) )
return 1; return true;
else else
return 0; return false;
} }
static int static bool
_isDQChar(unsigned char c, int atStart) _isDQChar(unsigned char c, bool atStart)
{ {
if ((c >= 'a' && c <= 'z') if ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z') || (c >= 'A' && c <= 'Z')
|| (c == '_') || (c == '_')
|| (atStart == 0 && c >= '0' && c <= '9') || (!atStart && c >= '0' && c <= '9')
|| (c >= (unsigned char) '\200') /* no need to check <= || (c >= (unsigned char) '\200') /* no need to check <=
* \377 */ * \377 */
) )
return 1; return true;
else else
return 0; return false;
} }