From c7d1a8d428eb4270e3d51ecbf61b88bd96dc0f6a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 11 Sep 2005 04:10:25 +0000 Subject: [PATCH] Fix some corner-case bugs in _sendSQLLine's parsing of SQL commands > found in a pg_dump archive. It had problems with dollar-quote tags broken across bufferload boundaries (this may explain bug report from Rod Taylor), also with dollar-quote literals of the form $a$a$..., and was also confused about the rules for backslash in double quoted identifiers (ie, they're not special). Also put in placeholder support for E'...' literals --- this will need more work later. --- src/bin/pg_dump/pg_backup_archiver.h | 28 ++- src/bin/pg_dump/pg_backup_db.c | 360 ++++++++++++--------------- 2 files changed, 181 insertions(+), 207 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index d9a09e3285..e7a05283d7 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -17,7 +17,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.66 2005/07/27 12:44:10 neilc Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.67 2005/09/11 04:10:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -136,22 +136,24 @@ typedef struct _outputContext typedef enum { - SQL_SCAN = 0, - SQL_IN_SQL_COMMENT, - SQL_IN_EXT_COMMENT, - SQL_IN_QUOTE, - SQL_IN_DOLLARTAG, - SQL_IN_DOLLARQUOTE + SQL_SCAN = 0, /* normal */ + SQL_IN_SQL_COMMENT, /* -- comment */ + SQL_IN_EXT_COMMENT, /* slash-star comment */ + SQL_IN_SINGLE_QUOTE, /* '...' literal */ + SQL_IN_E_QUOTE, /* E'...' literal */ + SQL_IN_DOUBLE_QUOTE, /* "..." identifier */ + SQL_IN_DOLLAR_TAG, /* possible dollar-quote starting tag */ + SQL_IN_DOLLAR_QUOTE /* body of dollar quote */ } sqlparseState; typedef struct { - int backSlash; - sqlparseState state; - char lastChar; - char quoteChar; - int braceDepth; - PQExpBuffer tagBuf; + sqlparseState state; /* see above */ + char lastChar; /* preceding char, or '\0' initially */ + bool backSlash; /* next char is backslash quoted? */ + int braceDepth; /* parenthesis nesting depth */ + PQExpBuffer tagBuf; /* dollar quote tag (NULL if not created) */ + int minTagEndPos; /* first possible end position of $-quote */ } sqlparseInfo; typedef enum diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c index 66833ebdb1..2065f59a4d 100644 --- a/src/bin/pg_dump/pg_backup_db.c +++ b/src/bin/pg_dump/pg_backup_db.c @@ -5,7 +5,7 @@ * Implements the basic DB functions used by the archiver. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.64 2005/07/27 05:14:12 neilc Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.65 2005/09/11 04:10:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,8 +36,8 @@ static void notice_processor(void *arg, const char *message); static char *_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos); static char *_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos); -static int _isIdentChar(unsigned char c); -static int _isDQChar(unsigned char c, int atStart); +static bool _isIdentChar(unsigned char c); +static bool _isDQChar(unsigned char c, bool atStart); #define DB_MAX_ERR_STMT 128 @@ -410,215 +410,187 @@ _sendCopyLine(ArchiveHandle *AH, char *qry, char *eos) } /* - * Used by ExecuteSqlCommandBuf to send one buffered line of SQL (not data for the copy command). + * Used by ExecuteSqlCommandBuf to send one buffered line of SQL + * (not data for the copy command). */ static char * _sendSQLLine(ArchiveHandle *AH, char *qry, char *eos) { - int pos = 0; /* Current position */ - char *sqlPtr; - int consumed; - int startDT = 0; - /* * The following is a mini state machine to assess the end of an SQL * statement. It really only needs to parse good SQL, or at least * that's the theory... End-of-statement is assumed to be an unquoted, - * un commented semi-colon. + * un-commented semi-colon that's not within any parentheses. + * + * Note: the input can be split into bufferloads at arbitrary boundaries. + * Therefore all state must be kept in AH->sqlparse, not in local + * variables of this routine. We assume that AH->sqlparse was + * filled with zeroes when created. */ - - /* - * fprintf(stderr, "Buffer at start is: '%s'\n\n", AH->sqlBuf->data); - */ - - for (pos = 0; pos < (eos - qry); pos++) + for (; qry < eos; qry++) { - appendPQExpBufferChar(AH->sqlBuf, qry[pos]); - /* fprintf(stderr, " %c",qry[pos]); */ - - /* Loop until character consumed */ - do + switch (AH->sqlparse.state) { - /* - * If a character needs to be scanned in a different state, - * consumed can be set to 0 to avoid advancing. Care must be - * taken to ensure internal state is not damaged. - */ - consumed = 1; - - switch (AH->sqlparse.state) - { - - case SQL_SCAN: /* Default state == 0, set in _allocAH */ - if (qry[pos] == ';' && AH->sqlparse.braceDepth == 0) - { - /* - * We've got the end of a statement. Send It & - * reset the buffer. - */ - - /* - * fprintf(stderr, " sending: '%s'\n\n", - * AH->sqlBuf->data); - */ - ExecuteSqlCommand(AH, AH->sqlBuf, "could not execute query"); - resetPQExpBuffer(AH->sqlBuf); - AH->sqlparse.lastChar = '\0'; - - /* - * Remove any following newlines - so that - * embedded COPY commands don't get a starting - * newline. - */ - pos++; - for (; pos < (eos - qry) && qry[pos] == '\n'; pos++); - - /* We've got our line, so exit */ - return qry + pos; - } - else - { - /* - * Look for normal boring quote chars, or - * dollar-quotes. We make the assumption that - * $-quotes will not have an ident character - * before them in all pg_dump output. - */ - if (qry[pos] == '"' - || qry[pos] == '\'' - || (qry[pos] == '$' && _isIdentChar(AH->sqlparse.lastChar) == 0) - ) - { - /* fprintf(stderr,"[startquote]\n"); */ - AH->sqlparse.state = SQL_IN_QUOTE; - AH->sqlparse.quoteChar = qry[pos]; - AH->sqlparse.backSlash = 0; - if (qry[pos] == '$') - { - /* override the state */ - AH->sqlparse.state = SQL_IN_DOLLARTAG; - /* Used for checking first char of tag */ - startDT = 1; - /* We store the tag for later comparison. */ - AH->sqlparse.tagBuf = createPQExpBuffer(); - /* Get leading $ */ - appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]); - } - } - else if (qry[pos] == '-' && AH->sqlparse.lastChar == '-') - AH->sqlparse.state = SQL_IN_SQL_COMMENT; - else if (qry[pos] == '*' && AH->sqlparse.lastChar == '/') - AH->sqlparse.state = SQL_IN_EXT_COMMENT; - else if (qry[pos] == '(') - AH->sqlparse.braceDepth++; - else if (qry[pos] == ')') - AH->sqlparse.braceDepth--; - - AH->sqlparse.lastChar = qry[pos]; - } - break; - - case SQL_IN_DOLLARTAG: + case SQL_SCAN: /* Default state == 0, set in _allocAH */ + if (*qry == ';' && AH->sqlparse.braceDepth == 0) + { + /* + * We've found the end of a statement. Send it and + * reset the buffer. + */ + appendPQExpBufferChar(AH->sqlBuf, ';'); /* inessential */ + ExecuteSqlCommand(AH, AH->sqlBuf, + "could not execute query"); + resetPQExpBuffer(AH->sqlBuf); + AH->sqlparse.lastChar = '\0'; /* - * Like a quote, we look for a closing char *but* we - * only allow a very limited set of contained chars, - * and no escape chars. If invalid chars are found, we - * abort tag processing. + * Remove any following newlines - so that + * embedded COPY commands don't get a starting newline. */ + qry++; + while (qry < eos && *qry == '\n') + qry++; - if (qry[pos] == '$') - { - /* fprintf(stderr,"[endquote]\n"); */ - /* Get trailing $ */ - appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]); - AH->sqlparse.state = SQL_IN_DOLLARQUOTE; - } + /* We've finished one line, so exit */ + return qry; + } + else if (*qry == '\'') + { + if (AH->sqlparse.lastChar == 'E') + AH->sqlparse.state = SQL_IN_E_QUOTE; else - { - if (_isDQChar(qry[pos], startDT)) - { - /* Valid, so add */ - appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]); - } - else - { - /* - * Jump back to 'scan' state, we're not really - * in a tag, and valid tag chars do not - * include the various chars we look for in - * this state machine, so it's safe to just - * jump from this state back to SCAN. We set - * consumed = 0 so that this char gets - * rescanned in new state. - */ - destroyPQExpBuffer(AH->sqlparse.tagBuf); - AH->sqlparse.state = SQL_SCAN; - consumed = 0; - } - } - startDT = 0; - break; + AH->sqlparse.state = SQL_IN_SINGLE_QUOTE; + AH->sqlparse.backSlash = false; + } + else if (*qry == '"') + { + AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE; + } + /* + * Look for dollar-quotes. We make the assumption that + * $-quotes will not have an ident character just + * before them in pg_dump output. XXX is this + * good enough? + */ + else if (*qry == '$' && !_isIdentChar(AH->sqlparse.lastChar)) + { + AH->sqlparse.state = SQL_IN_DOLLAR_TAG; + /* initialize separate buffer with possible tag */ + if (AH->sqlparse.tagBuf == NULL) + AH->sqlparse.tagBuf = createPQExpBuffer(); + else + resetPQExpBuffer(AH->sqlparse.tagBuf); + appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry); + } + else if (*qry == '-' && AH->sqlparse.lastChar == '-') + AH->sqlparse.state = SQL_IN_SQL_COMMENT; + else if (*qry == '*' && AH->sqlparse.lastChar == '/') + AH->sqlparse.state = SQL_IN_EXT_COMMENT; + else if (*qry == '(') + AH->sqlparse.braceDepth++; + else if (*qry == ')') + AH->sqlparse.braceDepth--; + break; + case SQL_IN_SQL_COMMENT: + if (*qry == '\n') + AH->sqlparse.state = SQL_SCAN; + break; - case SQL_IN_DOLLARQUOTE: + case SQL_IN_EXT_COMMENT: + /* + * This isn't fully correct, because we don't account for + * nested slash-stars, but pg_dump never emits such. + */ + if (AH->sqlparse.lastChar == '*' && *qry == '/') + AH->sqlparse.state = SQL_SCAN; + break; + case SQL_IN_SINGLE_QUOTE: + /* We needn't handle '' specially */ + if (*qry == '\'' && !AH->sqlparse.backSlash) + AH->sqlparse.state = SQL_SCAN; + else if (*qry == '\\') + AH->sqlparse.backSlash = !AH->sqlparse.backSlash; + else + AH->sqlparse.backSlash = false; + break; + + case SQL_IN_E_QUOTE: + /* + * Eventually we will need to handle '' specially, because + * after E'...''... we should still be in E_QUOTE state. + * + * XXX problem: how do we tell whether the dump was made + * by a version that thinks backslashes aren't special + * in non-E literals?? + */ + if (*qry == '\'' && !AH->sqlparse.backSlash) + AH->sqlparse.state = SQL_SCAN; + else if (*qry == '\\') + AH->sqlparse.backSlash = !AH->sqlparse.backSlash; + else + AH->sqlparse.backSlash = false; + break; + + case SQL_IN_DOUBLE_QUOTE: + /* We needn't handle "" specially */ + if (*qry == '"') + AH->sqlparse.state = SQL_SCAN; + break; + + case SQL_IN_DOLLAR_TAG: + if (*qry == '$') + { + /* Do not add the closing $ to tagBuf */ + AH->sqlparse.state = SQL_IN_DOLLAR_QUOTE; + AH->sqlparse.minTagEndPos = AH->sqlBuf->len + AH->sqlparse.tagBuf->len + 1; + } + else if (_isDQChar(*qry, (AH->sqlparse.tagBuf->len == 1))) + { + /* Valid, so add to tag */ + appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry); + } + else + { /* - * Comparing the entire string backwards each time is - * NOT efficient, but dollar quotes in pg_dump are - * small and the code is a lot simpler. + * Ooops, we're not really in a dollar-tag. Valid tag + * chars do not include the various chars we look for + * in this state machine, so it's safe to just jump + * from this state back to SCAN. We have to back up + * the qry pointer so that the current character gets + * rescanned in SCAN state; and then "continue" so that + * the bottom-of-loop actions aren't done yet. */ - sqlPtr = AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len; + AH->sqlparse.state = SQL_SCAN; + qry--; + continue; + } + break; - if (strncmp(AH->sqlparse.tagBuf->data, sqlPtr, AH->sqlparse.tagBuf->len) == 0) - { - /* End of $-quote */ - AH->sqlparse.state = SQL_SCAN; - destroyPQExpBuffer(AH->sqlparse.tagBuf); - } - break; + case SQL_IN_DOLLAR_QUOTE: + /* + * If we are at a $, see whether what precedes it matches + * tagBuf. (Remember that the trailing $ of the tag was + * not added to tagBuf.) However, don't compare until we + * have enough data to be a possible match --- this is + * needed to avoid false match on '$a$a$...' + */ + if (*qry == '$' && + AH->sqlBuf->len >= AH->sqlparse.minTagEndPos && + strcmp(AH->sqlparse.tagBuf->data, + AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len) == 0) + AH->sqlparse.state = SQL_SCAN; + break; + } - case SQL_IN_SQL_COMMENT: - if (qry[pos] == '\n') - AH->sqlparse.state = SQL_SCAN; - break; - - case SQL_IN_EXT_COMMENT: - if (AH->sqlparse.lastChar == '*' && qry[pos] == '/') - AH->sqlparse.state = SQL_SCAN; - break; - - case SQL_IN_QUOTE: - - if (!AH->sqlparse.backSlash && AH->sqlparse.quoteChar == qry[pos]) - { - /* fprintf(stderr,"[endquote]\n"); */ - AH->sqlparse.state = SQL_SCAN; - } - else - { - if (qry[pos] == '\\') - { - if (AH->sqlparse.lastChar == '\\') - AH->sqlparse.backSlash = !AH->sqlparse.backSlash; - else - AH->sqlparse.backSlash = 1; - } - else - AH->sqlparse.backSlash = 0; - } - break; - - } - - } while (consumed == 0); - - AH->sqlparse.lastChar = qry[pos]; - /* fprintf(stderr, "\n"); */ + appendPQExpBufferChar(AH->sqlBuf, *qry); + AH->sqlparse.lastChar = *qry; } /* - * If we get here, we've processed entire string with no complete SQL + * If we get here, we've processed entire bufferload with no complete SQL * stmt */ return eos; @@ -673,7 +645,7 @@ CommitTransaction(ArchiveHandle *AH) destroyPQExpBuffer(qry); } -static int +static bool _isIdentChar(unsigned char c) { if ((c >= 'a' && c <= 'z') @@ -684,22 +656,22 @@ _isIdentChar(unsigned char c) || (c >= (unsigned char) '\200') /* no need to check <= * \377 */ ) - return 1; + return true; else - return 0; + return false; } -static int -_isDQChar(unsigned char c, int atStart) +static bool +_isDQChar(unsigned char c, bool atStart) { if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_') - || (atStart == 0 && c >= '0' && c <= '9') + || (!atStart && c >= '0' && c <= '9') || (c >= (unsigned char) '\200') /* no need to check <= * \377 */ ) - return 1; + return true; else - return 0; + return false; }