pg_clean_ascii(): escape bytes rather than lose them

Rather than replace each unprintable byte with a '?' character, replace
it with a hex escape instead. The API now allocates a copy rather than
modifying the input in place.

Author: Jacob Champion <jchampion@timescale.com>
Discussion: https://www.postgresql.org/message-id/CAAWbhmgsvHrH9wLU2kYc3pOi1KSenHSLAHBbCVmmddW6-mc_=w@mail.gmail.com
This commit is contained in:
Peter Eisentraut 2022-09-13 16:10:44 +02:00
parent da5d4ea5aa
commit 45b1a67a0f
4 changed files with 67 additions and 19 deletions

View File

@ -2280,11 +2280,7 @@ retry1:
*/ */
if (strcmp(nameptr, "application_name") == 0) if (strcmp(nameptr, "application_name") == 0)
{ {
char *tmp_app_name = pstrdup(valptr); port->application_name = pg_clean_ascii(valptr, 0);
pg_clean_ascii(tmp_app_name);
port->application_name = tmp_app_name;
} }
} }
offset = valoffset + strlen(valptr) + 1; offset = valoffset + strlen(valptr) + 1;

View File

@ -12921,9 +12921,18 @@ assign_maintenance_io_concurrency(int newval, void *extra)
static bool static bool
check_application_name(char **newval, void **extra, GucSource source) check_application_name(char **newval, void **extra, GucSource source)
{ {
/* Only allow clean ASCII chars in the application name */ char *clean;
pg_clean_ascii(*newval);
/* Only allow clean ASCII chars in the application name */
clean = pg_clean_ascii(*newval, MCXT_ALLOC_NO_OOM);
if (!clean)
return false;
clean = guc_strdup(WARNING, clean);
if (!clean)
return false;
*newval = clean;
return true; return true;
} }
@ -12937,9 +12946,18 @@ assign_application_name(const char *newval, void *extra)
static bool static bool
check_cluster_name(char **newval, void **extra, GucSource source) check_cluster_name(char **newval, void **extra, GucSource source)
{ {
/* Only allow clean ASCII chars in the cluster name */ char *clean;
pg_clean_ascii(*newval);
/* Only allow clean ASCII chars in the cluster name */
clean = pg_clean_ascii(*newval, MCXT_ALLOC_NO_OOM);
if (!clean)
return false;
clean = guc_strdup(WARNING, clean);
if (!clean)
return false;
*newval = clean;
return true; return true;
} }

View File

@ -22,6 +22,7 @@
#endif #endif
#include "common/string.h" #include "common/string.h"
#include "lib/stringinfo.h"
/* /*
@ -59,9 +60,12 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
/* /*
* pg_clean_ascii -- Replace any non-ASCII chars with a '?' char * pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string
* *
* Modifies the string passed in which must be '\0'-terminated. * Makes a newly allocated copy of the string passed in, which must be
* '\0'-terminated. In the backend, additional alloc_flags may be provided and
* will be passed as-is to palloc_extended(); in the frontend, alloc_flags is
* ignored and the copy is malloc'd.
* *
* This function exists specifically to deal with filtering out * This function exists specifically to deal with filtering out
* non-ASCII characters in a few places where the client can provide an almost * non-ASCII characters in a few places where the client can provide an almost
@ -73,22 +77,52 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
* In general, this function should NOT be used- instead, consider how to handle * In general, this function should NOT be used- instead, consider how to handle
* the string without needing to filter out the non-ASCII characters. * the string without needing to filter out the non-ASCII characters.
* *
* Ultimately, we'd like to improve the situation to not require stripping out * Ultimately, we'd like to improve the situation to not require replacing all
* all non-ASCII but perform more intelligent filtering which would allow UTF or * non-ASCII but perform more intelligent filtering which would allow UTF or
* similar, but it's unclear exactly what we should allow, so stick to ASCII only * similar, but it's unclear exactly what we should allow, so stick to ASCII only
* for now. * for now.
*/ */
void char *
pg_clean_ascii(char *str) pg_clean_ascii(const char *str, int alloc_flags)
{ {
/* Only allow clean ASCII chars in the string */ size_t dstlen;
char *p; char *dst;
const char *p;
size_t i = 0;
/* Worst case, each byte can become four bytes, plus a null terminator. */
dstlen = strlen(str) * 4 + 1;
#ifdef FRONTEND
dst = malloc(dstlen);
#else
dst = palloc_extended(dstlen, alloc_flags);
#endif
if (!dst)
return NULL;
for (p = str; *p != '\0'; p++) for (p = str; *p != '\0'; p++)
{ {
/* Only allow clean ASCII chars in the string */
if (*p < 32 || *p > 126) if (*p < 32 || *p > 126)
*p = '?'; {
Assert(i < (dstlen - 3));
snprintf(&dst[i], dstlen - i, "\\x%02x", (unsigned char) *p);
i += 4;
}
else
{
Assert(i < dstlen);
dst[i] = *p;
i++;
}
} }
Assert(i < dstlen);
dst[i] = '\0';
return dst;
} }

View File

@ -24,7 +24,7 @@ typedef struct PromptInterruptContext
extern bool pg_str_endswith(const char *str, const char *end); extern bool pg_str_endswith(const char *str, const char *end);
extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr, extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr,
int base); int base);
extern void pg_clean_ascii(char *str); extern char *pg_clean_ascii(const char *str, int alloc_flags);
extern int pg_strip_crlf(char *str); extern int pg_strip_crlf(char *str);
extern bool pg_is_ascii(const char *str); extern bool pg_is_ascii(const char *str);