From 636a939fe54d317099e8574997d96975a54c792b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 29 Nov 2001 21:02:41 +0000 Subject: [PATCH] Fix array_out's failure to backslash backslashes, per bug# 524. Also, remove brain-dead rule that double quotes are needed if and only if the datatype is pass-by-reference; neither direction of the implication holds water. Instead, examine the actual data string to see if it contains any characters that force us to quote it. Add some documentation about quoting of array values, which was previously explained nowhere AFAICT. --- doc/src/sgml/array.sgml | 34 +++++++++- src/backend/utils/adt/arrayfuncs.c | 96 +++++++++++++++++----------- src/test/regress/expected/arrays.out | 38 +++++------ 3 files changed, 109 insertions(+), 59 deletions(-) diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index 99cfde85f2..1696d61c25 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -1,4 +1,4 @@ - + Arrays @@ -248,4 +248,36 @@ SELECT * FROM sal_emp WHERE pay_by_quarter **= 10000; + + Quoting array elements. + + As shown above, when writing an array literal value you may write double + quotes around any individual array + element. You must do so if the element value would otherwise + confuse the array-value parser. For example, elements containing curly + braces, commas, double quotes, backslashes, or white space must be + double-quoted. To put a double quote or backslash in an array element + value, precede it with a backslash. + + + + + + Remember that what you write in an SQL query will first be interpreted + as a string literal, and then as an array. This doubles the number of + backslashes you need. For example, to insert a text array + value containing a backslash and a double quote, you'd need to write + +INSERT ... VALUES ('{"\\\\","\\""}'); + + The string-literal processor removes one level of backslashes, so that + what arrives at the array-value parser looks like {"\\","\""}. + In turn, the strings fed to the text datatype's input routine + become \ and " respectively. (If we were working + with a datatype whose input routine also treated backslashes specially, + bytea for example, we might need as many as eight backslashes + in the query to get one backslash into the stored array element.) + + + diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index f668784714..0cfa371c99 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.71 2001/10/25 05:49:43 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.72 2001/11/29 21:02:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -346,6 +346,7 @@ ArrayCount(char *str, int *dim, int typdelim) * If element type is pass-by-ref, the Datums point to palloc'd values. * *nbytes is set to the amount of data space needed for the array, * including alignment padding but not including array header overhead. + * CAUTION: the contents of "arrayStr" may be modified! *--------------------------------------------------------------------------- */ static Datum * @@ -564,16 +565,13 @@ array_out(PG_FUNCTION_ARGS) char *p, *tmp, *retval, - **values, - delim[2]; + **values; + bool *needquotes; int nitems, overall_length, i, j, k, -#ifndef TCL_ARRAYS - l, -#endif indx[MAXDIM]; int ndim, *dim; @@ -581,26 +579,29 @@ array_out(PG_FUNCTION_ARGS) system_cache_lookup(element_type, false, &typlen, &typbyval, &typdelim, &typelem, &typoutput, &typalign); fmgr_info(typoutput, &outputproc); - sprintf(delim, "%c", typdelim); ndim = ARR_NDIM(v); dim = ARR_DIMS(v); nitems = ArrayGetNItems(ndim, dim); if (nitems == 0) { - retval = (char *) palloc(3); - retval[0] = '{'; - retval[1] = '}'; - retval[2] = '\0'; + retval = pstrdup("{}"); PG_RETURN_CSTRING(retval); } + /* + * Convert all values to string form, count total space needed + * (including any overhead such as escaping backslashes), + * and detect whether each item needs double quotes. + */ + values = (char **) palloc(nitems * sizeof(char *)); + needquotes = (bool *) palloc(nitems * sizeof(bool)); p = ARR_DATA_PTR(v); overall_length = 1; /* [TRH] don't forget to count \0 at end. */ - values = (char **) palloc(nitems * sizeof(char *)); for (i = 0; i < nitems; i++) { Datum itemvalue; + bool nq; itemvalue = fetch_att(p, typbyval, typlen); values[i] = DatumGetCString(FunctionCall3(&outputproc, @@ -612,20 +613,32 @@ array_out(PG_FUNCTION_ARGS) else p += INTALIGN(*(int32 *) p); - /* - * For the pair of double quotes - */ - if (!typbyval) - overall_length += 2; - + /* count data plus backslashes; detect chars needing quotes */ + nq = (values[i][0] == '\0'); /* force quotes for empty string */ for (tmp = values[i]; *tmp; tmp++) { + char ch = *tmp; + overall_length += 1; + if (ch == '"' || ch == '\\') + { + nq = true; #ifndef TCL_ARRAYS - if (*tmp == '"') overall_length += 1; #endif + } + else if (ch == '{' || ch == '}' || ch == typdelim || + isspace((unsigned char) ch)) + nq = true; } + + needquotes[i] = nq; + + /* Count the pair of double quotes, if needed */ + if (nq) + overall_length += 2; + + /* and the comma */ overall_length += 1; } @@ -634,41 +647,41 @@ array_out(PG_FUNCTION_ARGS) */ for (i = j = 0, k = 1; i < ndim; k *= dim[i++], j += k); - p = (char *) palloc(overall_length + 2 * j); - retval = p; + retval = (char *) palloc(overall_length + 2 * j); + p = retval; - strcpy(p, "{"); +#define APPENDSTR(str) (strcpy(p, (str)), p += strlen(p)) +#define APPENDCHAR(ch) (*p++ = (ch), *p = '\0') + + APPENDCHAR('{'); for (i = 0; i < ndim; indx[i++] = 0); j = 0; k = 0; do { for (i = j; i < ndim - 1; i++) - strcat(p, "{"); + APPENDCHAR('{'); - /* - * Surround anything that is not passed by value in double quotes. - * See above for more details. - */ - if (!typbyval) + if (needquotes[k]) { - strcat(p, "\""); + APPENDCHAR('"'); #ifndef TCL_ARRAYS - l = strlen(p); for (tmp = values[k]; *tmp; tmp++) { - if (*tmp == '"') - p[l++] = '\\'; - p[l++] = *tmp; + char ch = *tmp; + + if (ch == '"' || ch == '\\') + *p++ = '\\'; + *p++ = ch; } - p[l] = '\0'; + *p = '\0'; #else - strcat(p, values[k]); + APPENDSTR(values[k]); #endif - strcat(p, "\""); + APPENDCHAR('"'); } else - strcat(p, values[k]); + APPENDSTR(values[k]); pfree(values[k++]); for (i = ndim - 1; i >= 0; i--) @@ -676,16 +689,21 @@ array_out(PG_FUNCTION_ARGS) indx[i] = (indx[i] + 1) % dim[i]; if (indx[i]) { - strcat(p, delim); + APPENDCHAR(typdelim); break; } else - strcat(p, "}"); + APPENDCHAR('}'); } j = i; } while (j != -1); +#undef APPENDSTR +#undef APPENDCHAR + pfree(values); + pfree(needquotes); + PG_RETURN_CSTRING(retval); } diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 74c1008d90..f729fe1ed4 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -28,11 +28,11 @@ INSERT INTO arrtest (a, b[2][2][1], c, d, e, f, g) INSERT INTO arrtest (a, b[1][2][2], c, d[2][1]) VALUES ('{}', '{3,4}', '{foo,bar}', '{bar,foo}'); SELECT * FROM arrtest; - a | b | c | d | e | f | g --------------+-----------------+---------------+-------------------+---------------+-------------------+----------------- - {1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | | {} | {} - {11,12,23} | {{3,4},{4,5}} | {"foobar"} | {{"elt1","elt2"}} | {"3.4","6.7"} | {"abc ","abcde"} | {"abc","abcde"} - {} | {3,4} | {"foo","bar"} | {"bar","foo"} | | | + a | b | c | d | e | f | g +-------------+-----------------+-----------+---------------+-----------+-----------------+------------- + {1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | | {} | {} + {11,12,23} | {{3,4},{4,5}} | {foobar} | {{elt1,elt2}} | {3.4,6.7} | {"abc ",abcde} | {abc,abcde} + {} | {3,4} | {foo,bar} | {bar,foo} | | | (3 rows) SELECT arrtest.a[1], @@ -62,11 +62,11 @@ SELECT a[1:3], c[1:2], d[1:1][1:2] FROM arrtest; - a | b | c | d -------------+-----------------+---------------+------------------- - {1,2,3} | {{{0,0},{1,2}}} | | - {11,12,23} | | {"foobar"} | {{"elt1","elt2"}} - | | {"foo","bar"} | + a | b | c | d +------------+-----------------+-----------+--------------- + {1,2,3} | {{{0,0},{1,2}}} | | + {11,12,23} | | {foobar} | {{elt1,elt2}} + | | {foo,bar} | (3 rows) SELECT array_dims(a) AS a,array_dims(b) AS b,array_dims(c) AS c @@ -98,11 +98,11 @@ UPDATE arrtest SET c[2:2] = '{"new_word"}' WHERE array_dims(c) is not null; SELECT a,b,c FROM arrtest; - a | b | c ----------------+-----------------------+----------------------- + a | b | c +---------------+-----------------------+------------------- {16,25,3,4,5} | {{{113,142},{1,147}}} | {} - {} | {3,4} | {"foo","new_word"} - {16,25,23} | {{3,4},{4,5}} | {"foobar","new_word"} + {} | {3,4} | {foo,new_word} + {16,25,23} | {{3,4},{4,5}} | {foobar,new_word} (3 rows) SELECT a[1:3], @@ -110,10 +110,10 @@ SELECT a[1:3], c[1:2], d[1:1][2:2] FROM arrtest; - a | b | c | d -------------+-----------------------+-----------------------+------------ - {16,25,3} | {{{113,142},{1,147}}} | | - | | {"foo","new_word"} | - {16,25,23} | | {"foobar","new_word"} | {{"elt2"}} + a | b | c | d +------------+-----------------------+-------------------+---------- + {16,25,3} | {{{113,142},{1,147}}} | | + | | {foo,new_word} | + {16,25,23} | | {foobar,new_word} | {{elt2}} (3 rows)