Fix some corner-case bugs in _sendSQLLine's parsing of SQL commands
> found in a pg_dump archive. It had problems with dollar-quote tags broken across bufferload boundaries (this may explain bug report from Rod Taylor), also with dollar-quote literals of the form $a$a$..., and was also confused about the rules for backslash in double quoted identifiers (ie, they're not special). Also put in placeholder support for E'...' literals --- this will need more work later.
This commit is contained in:
parent
e1a7d1b9f3
commit
c7d1a8d428
@ -17,7 +17,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.66 2005/07/27 12:44:10 neilc Exp $
|
||||
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.67 2005/09/11 04:10:25 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -136,22 +136,24 @@ typedef struct _outputContext
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SQL_SCAN = 0,
|
||||
SQL_IN_SQL_COMMENT,
|
||||
SQL_IN_EXT_COMMENT,
|
||||
SQL_IN_QUOTE,
|
||||
SQL_IN_DOLLARTAG,
|
||||
SQL_IN_DOLLARQUOTE
|
||||
SQL_SCAN = 0, /* normal */
|
||||
SQL_IN_SQL_COMMENT, /* -- comment */
|
||||
SQL_IN_EXT_COMMENT, /* slash-star comment */
|
||||
SQL_IN_SINGLE_QUOTE, /* '...' literal */
|
||||
SQL_IN_E_QUOTE, /* E'...' literal */
|
||||
SQL_IN_DOUBLE_QUOTE, /* "..." identifier */
|
||||
SQL_IN_DOLLAR_TAG, /* possible dollar-quote starting tag */
|
||||
SQL_IN_DOLLAR_QUOTE /* body of dollar quote */
|
||||
} sqlparseState;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int backSlash;
|
||||
sqlparseState state;
|
||||
char lastChar;
|
||||
char quoteChar;
|
||||
int braceDepth;
|
||||
PQExpBuffer tagBuf;
|
||||
sqlparseState state; /* see above */
|
||||
char lastChar; /* preceding char, or '\0' initially */
|
||||
bool backSlash; /* next char is backslash quoted? */
|
||||
int braceDepth; /* parenthesis nesting depth */
|
||||
PQExpBuffer tagBuf; /* dollar quote tag (NULL if not created) */
|
||||
int minTagEndPos; /* first possible end position of $-quote */
|
||||
} sqlparseInfo;
|
||||
|
||||
typedef enum
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Implements the basic DB functions used by the archiver.
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.64 2005/07/27 05:14:12 neilc Exp $
|
||||
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.65 2005/09/11 04:10:25 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -36,8 +36,8 @@ static void notice_processor(void *arg, const char *message);
|
||||
static char *_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos);
|
||||
static char *_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos);
|
||||
|
||||
static int _isIdentChar(unsigned char c);
|
||||
static int _isDQChar(unsigned char c, int atStart);
|
||||
static bool _isIdentChar(unsigned char c);
|
||||
static bool _isDQChar(unsigned char c, bool atStart);
|
||||
|
||||
#define DB_MAX_ERR_STMT 128
|
||||
|
||||
@ -410,215 +410,187 @@ _sendCopyLine(ArchiveHandle *AH, char *qry, char *eos)
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by ExecuteSqlCommandBuf to send one buffered line of SQL (not data for the copy command).
|
||||
* Used by ExecuteSqlCommandBuf to send one buffered line of SQL
|
||||
* (not data for the copy command).
|
||||
*/
|
||||
static char *
|
||||
_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos)
|
||||
{
|
||||
int pos = 0; /* Current position */
|
||||
char *sqlPtr;
|
||||
int consumed;
|
||||
int startDT = 0;
|
||||
|
||||
/*
|
||||
* The following is a mini state machine to assess the end of an SQL
|
||||
* statement. It really only needs to parse good SQL, or at least
|
||||
* that's the theory... End-of-statement is assumed to be an unquoted,
|
||||
* un commented semi-colon.
|
||||
* un-commented semi-colon that's not within any parentheses.
|
||||
*
|
||||
* Note: the input can be split into bufferloads at arbitrary boundaries.
|
||||
* Therefore all state must be kept in AH->sqlparse, not in local
|
||||
* variables of this routine. We assume that AH->sqlparse was
|
||||
* filled with zeroes when created.
|
||||
*/
|
||||
|
||||
/*
|
||||
* fprintf(stderr, "Buffer at start is: '%s'\n\n", AH->sqlBuf->data);
|
||||
*/
|
||||
|
||||
for (pos = 0; pos < (eos - qry); pos++)
|
||||
for (; qry < eos; qry++)
|
||||
{
|
||||
appendPQExpBufferChar(AH->sqlBuf, qry[pos]);
|
||||
/* fprintf(stderr, " %c",qry[pos]); */
|
||||
|
||||
/* Loop until character consumed */
|
||||
do
|
||||
switch (AH->sqlparse.state)
|
||||
{
|
||||
/*
|
||||
* If a character needs to be scanned in a different state,
|
||||
* consumed can be set to 0 to avoid advancing. Care must be
|
||||
* taken to ensure internal state is not damaged.
|
||||
*/
|
||||
consumed = 1;
|
||||
|
||||
switch (AH->sqlparse.state)
|
||||
{
|
||||
|
||||
case SQL_SCAN: /* Default state == 0, set in _allocAH */
|
||||
if (qry[pos] == ';' && AH->sqlparse.braceDepth == 0)
|
||||
{
|
||||
/*
|
||||
* We've got the end of a statement. Send It &
|
||||
* reset the buffer.
|
||||
*/
|
||||
|
||||
/*
|
||||
* fprintf(stderr, " sending: '%s'\n\n",
|
||||
* AH->sqlBuf->data);
|
||||
*/
|
||||
ExecuteSqlCommand(AH, AH->sqlBuf, "could not execute query");
|
||||
resetPQExpBuffer(AH->sqlBuf);
|
||||
AH->sqlparse.lastChar = '\0';
|
||||
|
||||
/*
|
||||
* Remove any following newlines - so that
|
||||
* embedded COPY commands don't get a starting
|
||||
* newline.
|
||||
*/
|
||||
pos++;
|
||||
for (; pos < (eos - qry) && qry[pos] == '\n'; pos++);
|
||||
|
||||
/* We've got our line, so exit */
|
||||
return qry + pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Look for normal boring quote chars, or
|
||||
* dollar-quotes. We make the assumption that
|
||||
* $-quotes will not have an ident character
|
||||
* before them in all pg_dump output.
|
||||
*/
|
||||
if (qry[pos] == '"'
|
||||
|| qry[pos] == '\''
|
||||
|| (qry[pos] == '$' && _isIdentChar(AH->sqlparse.lastChar) == 0)
|
||||
)
|
||||
{
|
||||
/* fprintf(stderr,"[startquote]\n"); */
|
||||
AH->sqlparse.state = SQL_IN_QUOTE;
|
||||
AH->sqlparse.quoteChar = qry[pos];
|
||||
AH->sqlparse.backSlash = 0;
|
||||
if (qry[pos] == '$')
|
||||
{
|
||||
/* override the state */
|
||||
AH->sqlparse.state = SQL_IN_DOLLARTAG;
|
||||
/* Used for checking first char of tag */
|
||||
startDT = 1;
|
||||
/* We store the tag for later comparison. */
|
||||
AH->sqlparse.tagBuf = createPQExpBuffer();
|
||||
/* Get leading $ */
|
||||
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
|
||||
}
|
||||
}
|
||||
else if (qry[pos] == '-' && AH->sqlparse.lastChar == '-')
|
||||
AH->sqlparse.state = SQL_IN_SQL_COMMENT;
|
||||
else if (qry[pos] == '*' && AH->sqlparse.lastChar == '/')
|
||||
AH->sqlparse.state = SQL_IN_EXT_COMMENT;
|
||||
else if (qry[pos] == '(')
|
||||
AH->sqlparse.braceDepth++;
|
||||
else if (qry[pos] == ')')
|
||||
AH->sqlparse.braceDepth--;
|
||||
|
||||
AH->sqlparse.lastChar = qry[pos];
|
||||
}
|
||||
break;
|
||||
|
||||
case SQL_IN_DOLLARTAG:
|
||||
case SQL_SCAN: /* Default state == 0, set in _allocAH */
|
||||
if (*qry == ';' && AH->sqlparse.braceDepth == 0)
|
||||
{
|
||||
/*
|
||||
* We've found the end of a statement. Send it and
|
||||
* reset the buffer.
|
||||
*/
|
||||
appendPQExpBufferChar(AH->sqlBuf, ';'); /* inessential */
|
||||
ExecuteSqlCommand(AH, AH->sqlBuf,
|
||||
"could not execute query");
|
||||
resetPQExpBuffer(AH->sqlBuf);
|
||||
AH->sqlparse.lastChar = '\0';
|
||||
|
||||
/*
|
||||
* Like a quote, we look for a closing char *but* we
|
||||
* only allow a very limited set of contained chars,
|
||||
* and no escape chars. If invalid chars are found, we
|
||||
* abort tag processing.
|
||||
* Remove any following newlines - so that
|
||||
* embedded COPY commands don't get a starting newline.
|
||||
*/
|
||||
qry++;
|
||||
while (qry < eos && *qry == '\n')
|
||||
qry++;
|
||||
|
||||
if (qry[pos] == '$')
|
||||
{
|
||||
/* fprintf(stderr,"[endquote]\n"); */
|
||||
/* Get trailing $ */
|
||||
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
|
||||
AH->sqlparse.state = SQL_IN_DOLLARQUOTE;
|
||||
}
|
||||
/* We've finished one line, so exit */
|
||||
return qry;
|
||||
}
|
||||
else if (*qry == '\'')
|
||||
{
|
||||
if (AH->sqlparse.lastChar == 'E')
|
||||
AH->sqlparse.state = SQL_IN_E_QUOTE;
|
||||
else
|
||||
{
|
||||
if (_isDQChar(qry[pos], startDT))
|
||||
{
|
||||
/* Valid, so add */
|
||||
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Jump back to 'scan' state, we're not really
|
||||
* in a tag, and valid tag chars do not
|
||||
* include the various chars we look for in
|
||||
* this state machine, so it's safe to just
|
||||
* jump from this state back to SCAN. We set
|
||||
* consumed = 0 so that this char gets
|
||||
* rescanned in new state.
|
||||
*/
|
||||
destroyPQExpBuffer(AH->sqlparse.tagBuf);
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
consumed = 0;
|
||||
}
|
||||
}
|
||||
startDT = 0;
|
||||
break;
|
||||
AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
|
||||
AH->sqlparse.backSlash = false;
|
||||
}
|
||||
else if (*qry == '"')
|
||||
{
|
||||
AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
|
||||
}
|
||||
/*
|
||||
* Look for dollar-quotes. We make the assumption that
|
||||
* $-quotes will not have an ident character just
|
||||
* before them in pg_dump output. XXX is this
|
||||
* good enough?
|
||||
*/
|
||||
else if (*qry == '$' && !_isIdentChar(AH->sqlparse.lastChar))
|
||||
{
|
||||
AH->sqlparse.state = SQL_IN_DOLLAR_TAG;
|
||||
/* initialize separate buffer with possible tag */
|
||||
if (AH->sqlparse.tagBuf == NULL)
|
||||
AH->sqlparse.tagBuf = createPQExpBuffer();
|
||||
else
|
||||
resetPQExpBuffer(AH->sqlparse.tagBuf);
|
||||
appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry);
|
||||
}
|
||||
else if (*qry == '-' && AH->sqlparse.lastChar == '-')
|
||||
AH->sqlparse.state = SQL_IN_SQL_COMMENT;
|
||||
else if (*qry == '*' && AH->sqlparse.lastChar == '/')
|
||||
AH->sqlparse.state = SQL_IN_EXT_COMMENT;
|
||||
else if (*qry == '(')
|
||||
AH->sqlparse.braceDepth++;
|
||||
else if (*qry == ')')
|
||||
AH->sqlparse.braceDepth--;
|
||||
break;
|
||||
|
||||
case SQL_IN_SQL_COMMENT:
|
||||
if (*qry == '\n')
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
break;
|
||||
|
||||
case SQL_IN_DOLLARQUOTE:
|
||||
case SQL_IN_EXT_COMMENT:
|
||||
/*
|
||||
* This isn't fully correct, because we don't account for
|
||||
* nested slash-stars, but pg_dump never emits such.
|
||||
*/
|
||||
if (AH->sqlparse.lastChar == '*' && *qry == '/')
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
break;
|
||||
|
||||
case SQL_IN_SINGLE_QUOTE:
|
||||
/* We needn't handle '' specially */
|
||||
if (*qry == '\'' && !AH->sqlparse.backSlash)
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
else if (*qry == '\\')
|
||||
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
|
||||
else
|
||||
AH->sqlparse.backSlash = false;
|
||||
break;
|
||||
|
||||
case SQL_IN_E_QUOTE:
|
||||
/*
|
||||
* Eventually we will need to handle '' specially, because
|
||||
* after E'...''... we should still be in E_QUOTE state.
|
||||
*
|
||||
* XXX problem: how do we tell whether the dump was made
|
||||
* by a version that thinks backslashes aren't special
|
||||
* in non-E literals??
|
||||
*/
|
||||
if (*qry == '\'' && !AH->sqlparse.backSlash)
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
else if (*qry == '\\')
|
||||
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
|
||||
else
|
||||
AH->sqlparse.backSlash = false;
|
||||
break;
|
||||
|
||||
case SQL_IN_DOUBLE_QUOTE:
|
||||
/* We needn't handle "" specially */
|
||||
if (*qry == '"')
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
break;
|
||||
|
||||
case SQL_IN_DOLLAR_TAG:
|
||||
if (*qry == '$')
|
||||
{
|
||||
/* Do not add the closing $ to tagBuf */
|
||||
AH->sqlparse.state = SQL_IN_DOLLAR_QUOTE;
|
||||
AH->sqlparse.minTagEndPos = AH->sqlBuf->len + AH->sqlparse.tagBuf->len + 1;
|
||||
}
|
||||
else if (_isDQChar(*qry, (AH->sqlparse.tagBuf->len == 1)))
|
||||
{
|
||||
/* Valid, so add to tag */
|
||||
appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Comparing the entire string backwards each time is
|
||||
* NOT efficient, but dollar quotes in pg_dump are
|
||||
* small and the code is a lot simpler.
|
||||
* Ooops, we're not really in a dollar-tag. Valid tag
|
||||
* chars do not include the various chars we look for
|
||||
* in this state machine, so it's safe to just jump
|
||||
* from this state back to SCAN. We have to back up
|
||||
* the qry pointer so that the current character gets
|
||||
* rescanned in SCAN state; and then "continue" so that
|
||||
* the bottom-of-loop actions aren't done yet.
|
||||
*/
|
||||
sqlPtr = AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len;
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
qry--;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
|
||||
if (strncmp(AH->sqlparse.tagBuf->data, sqlPtr, AH->sqlparse.tagBuf->len) == 0)
|
||||
{
|
||||
/* End of $-quote */
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
destroyPQExpBuffer(AH->sqlparse.tagBuf);
|
||||
}
|
||||
break;
|
||||
case SQL_IN_DOLLAR_QUOTE:
|
||||
/*
|
||||
* If we are at a $, see whether what precedes it matches
|
||||
* tagBuf. (Remember that the trailing $ of the tag was
|
||||
* not added to tagBuf.) However, don't compare until we
|
||||
* have enough data to be a possible match --- this is
|
||||
* needed to avoid false match on '$a$a$...'
|
||||
*/
|
||||
if (*qry == '$' &&
|
||||
AH->sqlBuf->len >= AH->sqlparse.minTagEndPos &&
|
||||
strcmp(AH->sqlparse.tagBuf->data,
|
||||
AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len) == 0)
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
break;
|
||||
}
|
||||
|
||||
case SQL_IN_SQL_COMMENT:
|
||||
if (qry[pos] == '\n')
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
break;
|
||||
|
||||
case SQL_IN_EXT_COMMENT:
|
||||
if (AH->sqlparse.lastChar == '*' && qry[pos] == '/')
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
break;
|
||||
|
||||
case SQL_IN_QUOTE:
|
||||
|
||||
if (!AH->sqlparse.backSlash && AH->sqlparse.quoteChar == qry[pos])
|
||||
{
|
||||
/* fprintf(stderr,"[endquote]\n"); */
|
||||
AH->sqlparse.state = SQL_SCAN;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (qry[pos] == '\\')
|
||||
{
|
||||
if (AH->sqlparse.lastChar == '\\')
|
||||
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
|
||||
else
|
||||
AH->sqlparse.backSlash = 1;
|
||||
}
|
||||
else
|
||||
AH->sqlparse.backSlash = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
} while (consumed == 0);
|
||||
|
||||
AH->sqlparse.lastChar = qry[pos];
|
||||
/* fprintf(stderr, "\n"); */
|
||||
appendPQExpBufferChar(AH->sqlBuf, *qry);
|
||||
AH->sqlparse.lastChar = *qry;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we get here, we've processed entire string with no complete SQL
|
||||
* If we get here, we've processed entire bufferload with no complete SQL
|
||||
* stmt
|
||||
*/
|
||||
return eos;
|
||||
@ -673,7 +645,7 @@ CommitTransaction(ArchiveHandle *AH)
|
||||
destroyPQExpBuffer(qry);
|
||||
}
|
||||
|
||||
static int
|
||||
static bool
|
||||
_isIdentChar(unsigned char c)
|
||||
{
|
||||
if ((c >= 'a' && c <= 'z')
|
||||
@ -684,22 +656,22 @@ _isIdentChar(unsigned char c)
|
||||
|| (c >= (unsigned char) '\200') /* no need to check <=
|
||||
* \377 */
|
||||
)
|
||||
return 1;
|
||||
return true;
|
||||
else
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
_isDQChar(unsigned char c, int atStart)
|
||||
static bool
|
||||
_isDQChar(unsigned char c, bool atStart)
|
||||
{
|
||||
if ((c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c == '_')
|
||||
|| (atStart == 0 && c >= '0' && c <= '9')
|
||||
|| (!atStart && c >= '0' && c <= '9')
|
||||
|| (c >= (unsigned char) '\200') /* no need to check <=
|
||||
* \377 */
|
||||
)
|
||||
return 1;
|
||||
return true;
|
||||
else
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user